diff options
Diffstat (limited to 'third_party/aom/av1/common')
140 files changed, 0 insertions, 88562 deletions
diff --git a/third_party/aom/av1/common/alloccommon.c b/third_party/aom/av1/common/alloccommon.c deleted file mode 100644 index 1bf81c91d..000000000 --- a/third_party/aom/av1/common/alloccommon.c +++ /dev/null @@ -1,300 +0,0 @@ -/* - * - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "config/aom_config.h" - -#include "aom_mem/aom_mem.h" - -#include "av1/common/alloccommon.h" -#include "av1/common/blockd.h" -#include "av1/common/entropymode.h" -#include "av1/common/entropymv.h" -#include "av1/common/onyxc_int.h" - -int av1_get_MBs(int width, int height) { - const int aligned_width = ALIGN_POWER_OF_TWO(width, 3); - const int aligned_height = ALIGN_POWER_OF_TWO(height, 3); - const int mi_cols = aligned_width >> MI_SIZE_LOG2; - const int mi_rows = aligned_height >> MI_SIZE_LOG2; - - const int mb_cols = (mi_cols + 2) >> 2; - const int mb_rows = (mi_rows + 2) >> 2; - return mb_rows * mb_cols; -} - -#if LOOP_FILTER_BITMASK -static int alloc_loop_filter_mask(AV1_COMMON *cm) { - aom_free(cm->lf.lfm); - cm->lf.lfm = NULL; - - // Each lfm holds bit masks for all the 4x4 blocks in a max - // 64x64 (128x128 for ext_partitions) region. The stride - // and rows are rounded up / truncated to a multiple of 16 - // (32 for ext_partition). - cm->lf.lfm_stride = (cm->mi_cols + (MI_SIZE_64X64 - 1)) >> MIN_MIB_SIZE_LOG2; - cm->lf.lfm_num = ((cm->mi_rows + (MI_SIZE_64X64 - 1)) >> MIN_MIB_SIZE_LOG2) * - cm->lf.lfm_stride; - cm->lf.lfm = - (LoopFilterMask *)aom_calloc(cm->lf.lfm_num, sizeof(*cm->lf.lfm)); - if (!cm->lf.lfm) return 1; - - unsigned int i; - for (i = 0; i < cm->lf.lfm_num; ++i) av1_zero(cm->lf.lfm[i]); - - return 0; -} - -static void free_loop_filter_mask(AV1_COMMON *cm) { - if (cm->lf.lfm == NULL) return; - - aom_free(cm->lf.lfm); - cm->lf.lfm = NULL; - cm->lf.lfm_num = 0; - cm->lf.lfm_stride = 0; -} -#endif - -void av1_set_mb_mi(AV1_COMMON *cm, int width, int height) { - // Ensure that the decoded width and height are both multiples of - // 8 luma pixels (note: this may only be a multiple of 4 chroma pixels if - // subsampling is used). - // This simplifies the implementation of various experiments, - // eg. cdef, which operates on units of 8x8 luma pixels. - const int aligned_width = ALIGN_POWER_OF_TWO(width, 3); - const int aligned_height = ALIGN_POWER_OF_TWO(height, 3); - - cm->mi_cols = aligned_width >> MI_SIZE_LOG2; - cm->mi_rows = aligned_height >> MI_SIZE_LOG2; - cm->mi_stride = calc_mi_size(cm->mi_cols); - - cm->mb_cols = (cm->mi_cols + 2) >> 2; - cm->mb_rows = (cm->mi_rows + 2) >> 2; - cm->MBs = cm->mb_rows * cm->mb_cols; - -#if LOOP_FILTER_BITMASK - alloc_loop_filter_mask(cm); -#endif -} - -void av1_free_ref_frame_buffers(BufferPool *pool) { - int i; - - for (i = 0; i < FRAME_BUFFERS; ++i) { - if (pool->frame_bufs[i].ref_count > 0 && - pool->frame_bufs[i].raw_frame_buffer.data != NULL) { - pool->release_fb_cb(pool->cb_priv, &pool->frame_bufs[i].raw_frame_buffer); - pool->frame_bufs[i].ref_count = 0; - } - aom_free(pool->frame_bufs[i].mvs); - pool->frame_bufs[i].mvs = NULL; - aom_free(pool->frame_bufs[i].seg_map); - pool->frame_bufs[i].seg_map = NULL; - aom_free_frame_buffer(&pool->frame_bufs[i].buf); - } -} - -// Assumes cm->rst_info[p].restoration_unit_size is already initialized -void av1_alloc_restoration_buffers(AV1_COMMON *cm) { - const int num_planes = av1_num_planes(cm); - for (int p = 0; p < num_planes; ++p) - av1_alloc_restoration_struct(cm, &cm->rst_info[p], p > 0); - - if (cm->rst_tmpbuf == NULL) { - CHECK_MEM_ERROR(cm, cm->rst_tmpbuf, - (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE)); - } - - if (cm->rlbs == NULL) { - CHECK_MEM_ERROR(cm, cm->rlbs, aom_malloc(sizeof(RestorationLineBuffers))); - } - - // For striped loop restoration, we divide each row of tiles into "stripes", - // of height 64 luma pixels but with an offset by RESTORATION_UNIT_OFFSET - // luma pixels to match the output from CDEF. We will need to store 2 * - // RESTORATION_CTX_VERT lines of data for each stripe, and also need to be - // able to quickly answer the question "Where is the <n>'th stripe for tile - // row <m>?" To make that efficient, we generate the rst_last_stripe array. - int num_stripes = 0; - for (int i = 0; i < cm->tile_rows; ++i) { - TileInfo tile_info; - av1_tile_set_row(&tile_info, cm, i); - const int mi_h = tile_info.mi_row_end - tile_info.mi_row_start; - const int ext_h = RESTORATION_UNIT_OFFSET + (mi_h << MI_SIZE_LOG2); - const int tile_stripes = (ext_h + 63) / 64; - num_stripes += tile_stripes; - cm->rst_end_stripe[i] = num_stripes; - } - - // Now we need to allocate enough space to store the line buffers for the - // stripes - const int frame_w = cm->superres_upscaled_width; - const int use_highbd = cm->seq_params.use_highbitdepth ? 1 : 0; - - for (int p = 0; p < num_planes; ++p) { - const int is_uv = p > 0; - const int ss_x = is_uv && cm->seq_params.subsampling_x; - const int plane_w = ((frame_w + ss_x) >> ss_x) + 2 * RESTORATION_EXTRA_HORZ; - const int stride = ALIGN_POWER_OF_TWO(plane_w, 5); - const int buf_size = num_stripes * stride * RESTORATION_CTX_VERT - << use_highbd; - RestorationStripeBoundaries *boundaries = &cm->rst_info[p].boundaries; - - if (buf_size != boundaries->stripe_boundary_size || - boundaries->stripe_boundary_above == NULL || - boundaries->stripe_boundary_below == NULL) { - aom_free(boundaries->stripe_boundary_above); - aom_free(boundaries->stripe_boundary_below); - - CHECK_MEM_ERROR(cm, boundaries->stripe_boundary_above, - (uint8_t *)aom_memalign(32, buf_size)); - CHECK_MEM_ERROR(cm, boundaries->stripe_boundary_below, - (uint8_t *)aom_memalign(32, buf_size)); - - boundaries->stripe_boundary_size = buf_size; - } - boundaries->stripe_boundary_stride = stride; - } -} - -void av1_free_restoration_buffers(AV1_COMMON *cm) { - int p; - for (p = 0; p < MAX_MB_PLANE; ++p) - av1_free_restoration_struct(&cm->rst_info[p]); - aom_free(cm->rst_tmpbuf); - cm->rst_tmpbuf = NULL; - aom_free(cm->rlbs); - cm->rlbs = NULL; - for (p = 0; p < MAX_MB_PLANE; ++p) { - RestorationStripeBoundaries *boundaries = &cm->rst_info[p].boundaries; - aom_free(boundaries->stripe_boundary_above); - aom_free(boundaries->stripe_boundary_below); - boundaries->stripe_boundary_above = NULL; - boundaries->stripe_boundary_below = NULL; - } - - aom_free_frame_buffer(&cm->rst_frame); -} - -void av1_free_above_context_buffers(AV1_COMMON *cm, - int num_free_above_contexts) { - int i; - const int num_planes = cm->num_allocated_above_context_planes; - - for (int tile_row = 0; tile_row < num_free_above_contexts; tile_row++) { - for (i = 0; i < num_planes; i++) { - aom_free(cm->above_context[i][tile_row]); - cm->above_context[i][tile_row] = NULL; - } - aom_free(cm->above_seg_context[tile_row]); - cm->above_seg_context[tile_row] = NULL; - - aom_free(cm->above_txfm_context[tile_row]); - cm->above_txfm_context[tile_row] = NULL; - } - for (i = 0; i < num_planes; i++) { - aom_free(cm->above_context[i]); - cm->above_context[i] = NULL; - } - aom_free(cm->above_seg_context); - cm->above_seg_context = NULL; - - aom_free(cm->above_txfm_context); - cm->above_txfm_context = NULL; - - cm->num_allocated_above_contexts = 0; - cm->num_allocated_above_context_mi_col = 0; - cm->num_allocated_above_context_planes = 0; -} - -void av1_free_context_buffers(AV1_COMMON *cm) { - cm->free_mi(cm); - - av1_free_above_context_buffers(cm, cm->num_allocated_above_contexts); - -#if LOOP_FILTER_BITMASK - free_loop_filter_mask(cm); -#endif -} - -int av1_alloc_above_context_buffers(AV1_COMMON *cm, - int num_alloc_above_contexts) { - const int num_planes = av1_num_planes(cm); - int plane_idx; - const int aligned_mi_cols = - ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2); - - // Allocate above context buffers - cm->num_allocated_above_contexts = num_alloc_above_contexts; - cm->num_allocated_above_context_mi_col = aligned_mi_cols; - cm->num_allocated_above_context_planes = num_planes; - for (plane_idx = 0; plane_idx < num_planes; plane_idx++) { - cm->above_context[plane_idx] = (ENTROPY_CONTEXT **)aom_calloc( - num_alloc_above_contexts, sizeof(cm->above_context[0])); - if (!cm->above_context[plane_idx]) return 1; - } - - cm->above_seg_context = (PARTITION_CONTEXT **)aom_calloc( - num_alloc_above_contexts, sizeof(cm->above_seg_context)); - if (!cm->above_seg_context) return 1; - - cm->above_txfm_context = (TXFM_CONTEXT **)aom_calloc( - num_alloc_above_contexts, sizeof(cm->above_txfm_context)); - if (!cm->above_txfm_context) return 1; - - for (int tile_row = 0; tile_row < num_alloc_above_contexts; tile_row++) { - for (plane_idx = 0; plane_idx < num_planes; plane_idx++) { - cm->above_context[plane_idx][tile_row] = (ENTROPY_CONTEXT *)aom_calloc( - aligned_mi_cols, sizeof(*cm->above_context[0][tile_row])); - if (!cm->above_context[plane_idx][tile_row]) return 1; - } - - cm->above_seg_context[tile_row] = (PARTITION_CONTEXT *)aom_calloc( - aligned_mi_cols, sizeof(*cm->above_seg_context[tile_row])); - if (!cm->above_seg_context[tile_row]) return 1; - - cm->above_txfm_context[tile_row] = (TXFM_CONTEXT *)aom_calloc( - aligned_mi_cols, sizeof(*cm->above_txfm_context[tile_row])); - if (!cm->above_txfm_context[tile_row]) return 1; - } - - return 0; -} - -int av1_alloc_context_buffers(AV1_COMMON *cm, int width, int height) { - int new_mi_size; - - av1_set_mb_mi(cm, width, height); - new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows); - if (cm->mi_alloc_size < new_mi_size) { - cm->free_mi(cm); - if (cm->alloc_mi(cm, new_mi_size)) goto fail; - } - - return 0; - -fail: - // clear the mi_* values to force a realloc on resync - av1_set_mb_mi(cm, 0, 0); - av1_free_context_buffers(cm); - return 1; -} - -void av1_remove_common(AV1_COMMON *cm) { - av1_free_context_buffers(cm); - - aom_free(cm->fc); - cm->fc = NULL; - aom_free(cm->frame_contexts); - cm->frame_contexts = NULL; -} - -void av1_init_context_buffers(AV1_COMMON *cm) { cm->setup_mi(cm); } diff --git a/third_party/aom/av1/common/alloccommon.h b/third_party/aom/av1/common/alloccommon.h deleted file mode 100644 index 8e5896981..000000000 --- a/third_party/aom/av1/common/alloccommon.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_ALLOCCOMMON_H_ -#define AOM_AV1_COMMON_ALLOCCOMMON_H_ - -#define INVALID_IDX -1 // Invalid buffer index. - -#ifdef __cplusplus -extern "C" { -#endif - -struct AV1Common; -struct BufferPool; - -void av1_remove_common(struct AV1Common *cm); - -int av1_alloc_above_context_buffers(struct AV1Common *cm, - int num_alloc_above_contexts); -void av1_free_above_context_buffers(struct AV1Common *cm, - int num_free_above_contexts); -int av1_alloc_context_buffers(struct AV1Common *cm, int width, int height); -void av1_init_context_buffers(struct AV1Common *cm); -void av1_free_context_buffers(struct AV1Common *cm); - -void av1_free_ref_frame_buffers(struct BufferPool *pool); -void av1_alloc_restoration_buffers(struct AV1Common *cm); -void av1_free_restoration_buffers(struct AV1Common *cm); - -int av1_alloc_state_buffers(struct AV1Common *cm, int width, int height); -void av1_free_state_buffers(struct AV1Common *cm); - -void av1_set_mb_mi(struct AV1Common *cm, int width, int height); -int av1_get_MBs(int width, int height); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_ALLOCCOMMON_H_ diff --git a/third_party/aom/av1/common/arm/av1_inv_txfm_neon.c b/third_party/aom/av1/common/arm/av1_inv_txfm_neon.c deleted file mode 100644 index bad411743..000000000 --- a/third_party/aom/av1/common/arm/av1_inv_txfm_neon.c +++ /dev/null @@ -1,3231 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <arm_neon.h> - -#include "config/aom_config.h" -#include "config/aom_dsp_rtcd.h" -#include "config/av1_rtcd.h" - -#include "av1/common/av1_inv_txfm1d.h" -#include "av1/common/av1_inv_txfm1d_cfg.h" -#include "av1/common/av1_txfm.h" -#include "av1/common/enums.h" -#include "av1/common/idct.h" -#include "av1/common/arm/av1_inv_txfm_neon.h" -#include "av1/common/arm/transpose_neon.h" - -// 1D itx types -typedef enum ATTRIBUTE_PACKED { - IDCT_1D, - IADST_1D, - IFLIPADST_1D = IADST_1D, - IIDENTITY_1D, - ITX_TYPES_1D, -} ITX_TYPE_1D; - -static const ITX_TYPE_1D vitx_1d_tab[TX_TYPES] = { - IDCT_1D, IADST_1D, IDCT_1D, IADST_1D, - IFLIPADST_1D, IDCT_1D, IFLIPADST_1D, IADST_1D, - IFLIPADST_1D, IIDENTITY_1D, IDCT_1D, IIDENTITY_1D, - IADST_1D, IIDENTITY_1D, IFLIPADST_1D, IIDENTITY_1D, -}; - -static const ITX_TYPE_1D hitx_1d_tab[TX_TYPES] = { - IDCT_1D, IDCT_1D, IADST_1D, IADST_1D, - IDCT_1D, IFLIPADST_1D, IFLIPADST_1D, IFLIPADST_1D, - IADST_1D, IIDENTITY_1D, IIDENTITY_1D, IDCT_1D, - IIDENTITY_1D, IADST_1D, IIDENTITY_1D, IFLIPADST_1D, -}; - -// 1D functions -static const transform_1d_neon lowbd_txfm_all_1d_arr[TX_SIZES][ITX_TYPES_1D] = { - { av1_idct4_new, av1_iadst4_new, av1_iidentity4_c }, - { av1_idct8_new, av1_iadst8_new, av1_iidentity8_c }, - { av1_idct16_new, av1_iadst16_new, av1_iidentity16_c }, - { av1_idct32_new, NULL, NULL }, - { av1_idct64_new, NULL, NULL }, -}; - -static INLINE void lowbd_add_flip_buffer_8xn_neon(int16x8_t *in, - uint8_t *output, int stride, - int flipud, - const int height) { - int j = flipud ? (height - 1) : 0; - const int step = flipud ? -1 : 1; - int16x8_t temp_output; - for (int i = 0; i < height; ++i, j += step) { - temp_output = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(output))); - temp_output = vaddq_s16(temp_output, in[j]); - vst1_u8(output, vqmovun_s16(temp_output)); - output += stride; - } -} - -static INLINE uint8x16_t lowbd_get_recon_16x16_neon(const uint8x16_t pred, - int16x8_t res0, - int16x8_t res1) { - int16x8_t temp_output[2]; - uint8x16_t temp_output_8q; - temp_output[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pred))); - temp_output[0] = vaddq_s16(temp_output[0], res0); - temp_output[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pred))); - temp_output[1] = vaddq_s16(temp_output[1], res1); - temp_output_8q = - vcombine_u8(vqmovun_s16(temp_output[0]), vqmovun_s16(temp_output[1])); - return temp_output_8q; -} - -static INLINE void lowbd_add_flip_buffer_16xn_neon(int16x8_t *in, - uint8_t *output, int stride, - int flipud, int height) { - uint8x16_t temp_output_8q; - int j = flipud ? (height - 1) : 0; - const int step = flipud ? -1 : 1; - for (int i = 0; i < height; ++i, j += step) { - temp_output_8q = vld1q_u8(output + i * stride); - temp_output_8q = - lowbd_get_recon_16x16_neon(temp_output_8q, in[j], in[j + height]); - vst1q_u8((output + i * stride), temp_output_8q); - } -} - -static INLINE void lowbd_inv_txfm2d_memset_neon(int16x8_t *a, int size, - int value) { - for (int i = 0; i < size; i++) { - a[i] = vdupq_n_s16((int16_t)value); - } -} - -static INLINE void btf_16_lane_0_1_neon(const int16x8_t in0, - const int16x8_t in1, const int16x4_t c, - int16x8_t *t0, int16x8_t *t1) { - int32x4_t s0[2], s1[2]; - int16x4_t v0[2], v1[2]; - - s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 0); - s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 0); - s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 1); - s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 1); - - s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 1); - s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 1); - s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 0); - s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 0); - - v0[0] = vrshrn_n_s32(s0[0], INV_COS_BIT); - v0[1] = vrshrn_n_s32(s0[1], INV_COS_BIT); - v1[0] = vrshrn_n_s32(s1[0], INV_COS_BIT); - v1[1] = vrshrn_n_s32(s1[1], INV_COS_BIT); - - *t0 = vcombine_s16(v0[0], v0[1]); - *t1 = vcombine_s16(v1[0], v1[1]); -} - -static INLINE void btf_16_lane_1_0_neon(const int16x8_t in0, - const int16x8_t in1, const int16x4_t c, - int16x8_t *t0, int16x8_t *t1) { - int32x4_t s0[2], s1[2]; - int16x4_t v0[2], v1[2]; - - s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 1); - s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 1); - s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 0); - s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 0); - - s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 0); - s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 0); - s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 1); - s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 1); - - v0[0] = vrshrn_n_s32(s0[0], INV_COS_BIT); - v0[1] = vrshrn_n_s32(s0[1], INV_COS_BIT); - v1[0] = vrshrn_n_s32(s1[0], INV_COS_BIT); - v1[1] = vrshrn_n_s32(s1[1], INV_COS_BIT); - - *t0 = vcombine_s16(v0[0], v0[1]); - *t1 = vcombine_s16(v1[0], v1[1]); -} - -static INLINE void btf_16_lane_2_3_neon(const int16x8_t in0, - const int16x8_t in1, const int16x4_t c, - int16x8_t *t0, int16x8_t *t1) { - int32x4_t s0[2], s1[2]; - int16x4_t v0[2], v1[2]; - - s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 2); - s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 2); - s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 3); - s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 3); - - s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 3); - s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 3); - s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 2); - s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 2); - - v0[0] = vrshrn_n_s32(s0[0], INV_COS_BIT); - v0[1] = vrshrn_n_s32(s0[1], INV_COS_BIT); - v1[0] = vrshrn_n_s32(s1[0], INV_COS_BIT); - v1[1] = vrshrn_n_s32(s1[1], INV_COS_BIT); - - *t0 = vcombine_s16(v0[0], v0[1]); - *t1 = vcombine_s16(v1[0], v1[1]); -} - -static INLINE void btf_16_neon(const int16x8_t in0, int16_t coef1, - int16_t coef2, int16x8_t *t0, int16x8_t *t1) { - int32x4_t s0_l, s0_h, s1_l, s1_h; - int16x4_t v0[2], v1[2]; - - s0_l = vmull_n_s16(vget_low_s16(in0), coef1); - s0_h = vmull_n_s16(vget_high_s16(in0), coef1); - s1_l = vmull_n_s16(vget_low_s16(in0), coef2); - s1_h = vmull_n_s16(vget_high_s16(in0), coef2); - - v0[0] = vrshrn_n_s32(s0_l, INV_COS_BIT); - v0[1] = vrshrn_n_s32(s0_h, INV_COS_BIT); - v1[0] = vrshrn_n_s32(s1_l, INV_COS_BIT); - v1[1] = vrshrn_n_s32(s1_h, INV_COS_BIT); - - *t0 = vcombine_s16(v0[0], v0[1]); - *t1 = vcombine_s16(v1[0], v1[1]); -} - -static INLINE void btf_16_lane_3_2_neon(const int16x8_t in0, - const int16x8_t in1, const int16x4_t c, - int16x8_t *t0, int16x8_t *t1) { - int32x4_t s0[2], s1[2]; - int16x4_t v0[2], v1[2]; - - s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 3); - s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 3); - s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 2); - s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 2); - - s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 2); - s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 2); - s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 3); - s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 3); - - v0[0] = vrshrn_n_s32(s0[0], INV_COS_BIT); - v0[1] = vrshrn_n_s32(s0[1], INV_COS_BIT); - v1[0] = vrshrn_n_s32(s1[0], INV_COS_BIT); - v1[1] = vrshrn_n_s32(s1[1], INV_COS_BIT); - - *t0 = vcombine_s16(v0[0], v0[1]); - *t1 = vcombine_s16(v1[0], v1[1]); -} - -static INLINE void btf_16_half_neon(int16x8_t *const x, const int16x4_t c) { - int32x4_t t0[2], t1[2]; - int16x4_t v0[2], v1[2]; - - // Don't add/sub before multiply, which will overflow in iadst8. - const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(x[0]), c, 0); - const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(x[0]), c, 0); - const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(x[1]), c, 0); - const int32x4_t x1_hi = vmull_lane_s16(vget_high_s16(x[1]), c, 0); - - t0[0] = vaddq_s32(x0_lo, x1_lo); - t0[1] = vaddq_s32(x0_hi, x1_hi); - t1[0] = vsubq_s32(x0_lo, x1_lo); - t1[1] = vsubq_s32(x0_hi, x1_hi); - - v0[0] = vrshrn_n_s32(t0[0], INV_COS_BIT); - v0[1] = vrshrn_n_s32(t0[1], INV_COS_BIT); - v1[0] = vrshrn_n_s32(t1[0], INV_COS_BIT); - v1[1] = vrshrn_n_s32(t1[1], INV_COS_BIT); - - x[0] = vcombine_s16(v0[0], v0[1]); - x[1] = vcombine_s16(v1[0], v1[1]); -} - -static INLINE int16x4_t create_s16x4_neon(int16_t *const c0, int16_t *const c1, - int16_t *const c2, - int16_t *const c3) { - int16x4_t val = vdup_n_s16((int16_t)0); - val = vld1_lane_s16(c0, val, 0); - val = vld1_lane_s16(c1, val, 1); - val = vld1_lane_s16(c2, val, 2); - val = vld1_lane_s16(c3, val, 3); - return val; -} - -static INLINE void iadst8_new_neon(int16x8_t *const in, int16x8_t *out, - int8_t cos_bit, int bit) { - (void)bit; - const int32_t *cospi = cospi_arr(cos_bit); - - const int16x4_t c0 = - create_s16x4_neon((int16_t *)(cospi + 4), (int16_t *)(cospi + 60), - (int16_t *)(cospi + 20), (int16_t *)(cospi + 44)); - const int16x4_t c1 = - create_s16x4_neon((int16_t *)(cospi + 36), (int16_t *)(cospi + 28), - (int16_t *)(cospi + 52), (int16_t *)(cospi + 12)); - const int16x4_t c2 = - create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32), - (int16_t *)(cospi + 16), (int16_t *)(cospi + 48)); - - int16x8_t x[8]; - int16x8_t s0, s1, s2, s3, s4, s5, s6, s7; - - // Stage 1 - x[0] = in[7]; - x[1] = in[0]; - x[2] = in[5]; - x[3] = in[2]; - x[4] = in[3]; - x[5] = in[4]; - x[6] = in[1]; - x[7] = in[6]; - - // Stage 2 - btf_16_lane_0_1_neon(x[0], x[1], c0, &s0, &s1); - btf_16_lane_2_3_neon(x[2], x[3], c0, &s2, &s3); - btf_16_lane_0_1_neon(x[4], x[5], c1, &s4, &s5); - btf_16_lane_2_3_neon(x[6], x[7], c1, &s6, &s7); - - // Stage 3 - x[0] = vqaddq_s16(s0, s4); - x[1] = vqaddq_s16(s1, s5); - x[2] = vqaddq_s16(s2, s6); - x[3] = vqaddq_s16(s3, s7); - x[4] = vqsubq_s16(s0, s4); - x[5] = vqsubq_s16(s1, s5); - x[6] = vqsubq_s16(s2, s6); - x[7] = vqsubq_s16(s3, s7); - - // Stage 4 - s0 = x[0]; - s1 = x[1]; - s2 = x[2]; - s3 = x[3]; - btf_16_lane_2_3_neon(x[4], x[5], c2, &s4, &s5); - btf_16_lane_3_2_neon(x[7], x[6], c2, &s7, &s6); - - // Stage 5 - x[0] = vqaddq_s16(s0, s2); - x[1] = vqaddq_s16(s1, s3); - x[2] = vqsubq_s16(s0, s2); - x[3] = vqsubq_s16(s1, s3); - x[4] = vqaddq_s16(s4, s6); - x[5] = vqaddq_s16(s5, s7); - x[6] = vqsubq_s16(s4, s6); - x[7] = vqsubq_s16(s5, s7); - - // stage 6 - btf_16_half_neon(x + 2, c2); - btf_16_half_neon(x + 6, c2); - - // Stage 7 - out[0] = x[0]; - out[1] = vnegq_s16(x[4]); - out[2] = x[6]; - out[3] = vnegq_s16(x[2]); - out[4] = x[3]; - out[5] = vnegq_s16(x[7]); - out[6] = x[5]; - out[7] = vnegq_s16(x[1]); -} - -static INLINE void iadst8_low1_new_neon(int16x8_t *const in, int16x8_t *out, - int8_t cos_bit, int bit) { - (void)bit; - const int32_t *cospi = cospi_arr(cos_bit); - const int16x4_t c2 = - create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32), - (int16_t *)(cospi + 16), (int16_t *)(cospi + 48)); - - int16x8_t x[8]; - int16x8_t s0, s1, s4, s5; - - // Stage 1 - x[1] = in[0]; - - // Stage 2 - - btf_16_neon(x[1], cospi[60], -cospi[4], &s0, &s1); - - // Stage 3 - x[0] = s0; - x[1] = s1; - x[4] = s0; - x[5] = s1; - - // Stage 4 - s0 = x[0]; - s1 = x[1]; - btf_16_lane_2_3_neon(x[4], x[5], c2, &s4, &s5); - - // Stage 5 - x[0] = s0; - x[1] = s1; - x[2] = s0; - x[3] = s1; - x[4] = s4; - x[5] = s5; - x[6] = s4; - x[7] = s5; - - // stage 6 - btf_16_half_neon(x + 2, c2); - btf_16_half_neon(x + 6, c2); - - // Stage 7 - out[0] = x[0]; - out[1] = vnegq_s16(x[4]); - out[2] = x[6]; - out[3] = vnegq_s16(x[2]); - out[4] = x[3]; - out[5] = vnegq_s16(x[7]); - out[6] = x[5]; - out[7] = vnegq_s16(x[1]); -} - -static INLINE void idct8_new_neon(int16x8_t *in, int16x8_t *out, int8_t cos_bit, - int bit) { - (void)bit; - const int32_t *cospi = cospi_arr(cos_bit); - int16x8_t step1[8], step2[8]; - const int16x4_t c0 = - create_s16x4_neon((int16_t *)(cospi + 8), (int16_t *)(cospi + 56), - (int16_t *)(cospi + 40), (int16_t *)(cospi + 24)); - const int16x4_t c2 = - create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32), - (int16_t *)(cospi + 16), (int16_t *)(cospi + 48)); - - // stage 2 - btf_16_lane_0_1_neon(in[1], in[7], c0, &step1[7], &step1[4]); - btf_16_lane_2_3_neon(in[5], in[3], c0, &step1[6], &step1[5]); - - // stage 3 - btf_16_lane_0_1_neon(in[0], in[4], c2, &step2[0], &step2[1]); - btf_16_lane_2_3_neon(in[2], in[6], c2, &step2[3], &step2[2]); - step2[4] = vqaddq_s16(step1[4], step1[5]); - step2[5] = vqsubq_s16(step1[4], step1[5]); - step2[6] = vqsubq_s16(step1[7], step1[6]); - step2[7] = vqaddq_s16(step1[7], step1[6]); - - // stage 4 - step1[0] = vqaddq_s16(step2[0], step2[3]); - step1[1] = vqaddq_s16(step2[1], step2[2]); - step1[2] = vqsubq_s16(step2[1], step2[2]); - step1[3] = vqsubq_s16(step2[0], step2[3]); - btf_16_lane_0_1_neon(step2[6], step2[5], c2, &step1[6], &step1[5]); - - // stage 5 - out[0] = vqaddq_s16(step1[0], step2[7]); - out[1] = vqaddq_s16(step1[1], step1[6]); - out[2] = vqaddq_s16(step1[2], step1[5]); - out[3] = vqaddq_s16(step1[3], step2[4]); - out[4] = vqsubq_s16(step1[3], step2[4]); - out[5] = vqsubq_s16(step1[2], step1[5]); - out[6] = vqsubq_s16(step1[1], step1[6]); - out[7] = vqsubq_s16(step1[0], step2[7]); -} - -static INLINE void idct8_low1_new_neon(int16x8_t *in, int16x8_t *out, - int8_t cos_bit, int bit) { - (void)bit; - const int32_t *cospi = cospi_arr(cos_bit); - int16x8_t step1; - int32x4_t t32[2]; - - // stage 1 - // stage 2 - // stage 3 - t32[0] = vmull_n_s16(vget_low_s16(in[0]), (int16_t)cospi[32]); - t32[1] = vmull_n_s16(vget_high_s16(in[0]), (int16_t)cospi[32]); - - step1 = vcombine_s16(vrshrn_n_s32(t32[0], INV_COS_BIT), - vrshrn_n_s32(t32[1], INV_COS_BIT)); - - // stage 4 - // stage 5 - out[0] = step1; - out[1] = step1; - out[2] = step1; - out[3] = step1; - out[4] = step1; - out[5] = step1; - out[6] = step1; - out[7] = step1; -} - -void av1_round_shift_array_16_neon(int16x8_t *arr, int size, int bit) { - assert(!(size % 4)); - if (!bit) return; - const int16x8_t dup_bits_n_16x8 = vdupq_n_s16((int16_t)(-bit)); - for (int i = 0; i < size; i++) { - arr[i] = vrshlq_s16(arr[i], dup_bits_n_16x8); - } -} - -static INLINE void flip_buf_ud_neon(int16x8_t *input, int size) { - int16x8_t temp[8]; - for (int i = 0; i < size; ++i) { - temp[i] = input[size - 1 - i]; - } - for (int i = 0; i < size; ++i) { - input[i] = temp[i]; - } -} - -static INLINE void load_buffer_32bit_to_16bit_neon(const int32_t *input, - int16x8_t *const a, - int out_size) { - for (int i = 0; i < 8; ++i) { - a[i] = vcombine_s16(vmovn_s32(vld1q_s32(input)), - vmovn_s32(vld1q_s32(input + 4))); - input += out_size; - } -} - -static INLINE void identity8_new_neon(int16x8_t *input, int16x8_t *output, - int8_t cos_bit, int bit) { - (void)bit; - (void)cos_bit; - - output[0] = vmulq_n_s16(input[0], (int16_t)2); - output[1] = vmulq_n_s16(input[1], (int16_t)2); - output[2] = vmulq_n_s16(input[2], (int16_t)2); - output[3] = vmulq_n_s16(input[3], (int16_t)2); - output[4] = vmulq_n_s16(input[4], (int16_t)2); - output[5] = vmulq_n_s16(input[5], (int16_t)2); - output[6] = vmulq_n_s16(input[6], (int16_t)2); - output[7] = vmulq_n_s16(input[7], (int16_t)2); -} - -static INLINE void round_shift_for_rect(int16x8_t *input, int16x8_t *output, - int size) { - int32x4_t out_low, out_high; - int16x4_t low, high; - - for (int z = 0; z < size; ++z) { - out_low = vmull_n_s16(vget_low_s16(input[z]), (int16_t)NewInvSqrt2); - out_high = vmull_n_s16(vget_high_s16(input[z]), (int16_t)NewInvSqrt2); - - low = vqrshrn_n_s32(out_low, (int32_t)NewSqrt2Bits); - high = vqrshrn_n_s32(out_high, (int32_t)NewSqrt2Bits); - - output[z] = vcombine_s16(low, high); - } -} - -static INLINE void identity16_new_neon(int16x8_t *input, int16x8_t *output, - int8_t cos_bit, int bit) { - (void)bit; - (void)cos_bit; - - int32x4_t out_low, out_high; - int16x4_t low, high; - int16_t scale = (int16_t)(2 * NewSqrt2); - - for (int z = 0; z < 16; ++z) { - out_low = vmull_n_s16(vget_low_s16(input[z]), scale); - out_high = vmull_n_s16(vget_high_s16(input[z]), scale); - - low = vqrshrn_n_s32(out_low, (int32_t)NewSqrt2Bits); - high = vqrshrn_n_s32(out_high, (int32_t)NewSqrt2Bits); - - output[z] = vcombine_s16(low, high); - } -} - -static INLINE void identity32_new_neon(int16x8_t *input, int16x8_t *output, - int8_t cos_bit, int bit) { - (void)bit; - (void)cos_bit; - - for (int z = 0; z < 32; ++z) { - output[z] = vmulq_n_s16(input[z], (int16_t)4); - } -} - -static INLINE void idct16_low1_new_neon(int16x8_t *in, int16x8_t *out, - int8_t cos_bit, int bit) { - (void)bit; - const int32_t *cospi = cospi_arr(cos_bit); - int16x8_t step1; - int32x4_t t32[2]; - - // stage 4 - - t32[0] = vmull_n_s16(vget_low_s16(in[0]), cospi[32]); - t32[1] = vmull_n_s16(vget_high_s16(in[0]), cospi[32]); - step1 = vcombine_s16(vrshrn_n_s32(t32[0], INV_COS_BIT), - vrshrn_n_s32(t32[1], INV_COS_BIT)); - - // stage 6 - // stage 7 - out[0] = step1; - out[1] = step1; - out[2] = step1; - out[3] = step1; - out[4] = step1; - out[5] = step1; - out[6] = step1; - out[7] = step1; - out[8] = step1; - out[9] = step1; - out[10] = step1; - out[11] = step1; - out[12] = step1; - out[13] = step1; - out[14] = step1; - out[15] = step1; -} - -static INLINE void idct16_new_neon(int16x8_t *in, int16x8_t *out, - int8_t cos_bit, int bit) { - (void)bit; - const int32_t *cospi = cospi_arr(cos_bit); - int16x8_t step1[16], step2[16]; - - const int16x4_t c0 = - create_s16x4_neon((int16_t *)(cospi + 4), (int16_t *)(cospi + 60), - (int16_t *)(cospi + 36), (int16_t *)(cospi + 28)); - const int16x4_t c1 = - create_s16x4_neon((int16_t *)(cospi + 20), (int16_t *)(cospi + 44), - (int16_t *)(cospi + 52), (int16_t *)(cospi + 12)); - const int16x4_t c2 = - create_s16x4_neon((int16_t *)(cospi + 8), (int16_t *)(cospi + 56), - (int16_t *)(cospi + 40), (int16_t *)(cospi + 24)); - const int16x4_t c3 = - create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32), - (int16_t *)(cospi + 16), (int16_t *)(cospi + 48)); - - // stage 2 - - btf_16_lane_0_1_neon(in[1], in[15], c0, &step2[15], &step2[8]); - btf_16_lane_2_3_neon(in[9], in[7], c0, &step2[14], &step2[9]); - btf_16_lane_0_1_neon(in[5], in[11], c1, &step2[13], &step2[10]); - btf_16_lane_2_3_neon(in[13], in[3], c1, &step2[12], &step2[11]); - - step2[0] = in[0]; - step2[1] = in[8]; - step2[2] = in[4]; - step2[3] = in[12]; - step2[4] = in[2]; - step2[5] = in[10]; - step2[6] = in[6]; - step2[7] = in[14]; - - // stage 3 - - btf_16_lane_0_1_neon(step2[4], step2[7], c2, &step1[7], &step1[4]); - btf_16_lane_2_3_neon(step2[5], step2[6], c2, &step1[6], &step1[5]); - - step1[0] = step2[0]; - step1[1] = step2[1]; - step1[2] = step2[2]; - step1[3] = step2[3]; - step1[8] = vqaddq_s16(step2[8], step2[9]); - step1[9] = vqsubq_s16(step2[8], step2[9]); - step1[10] = vqsubq_s16(step2[11], step2[10]); - step1[11] = vqaddq_s16(step2[11], step2[10]); - step1[12] = vqaddq_s16(step2[12], step2[13]); - step1[13] = vqsubq_s16(step2[12], step2[13]); - step1[14] = vqsubq_s16(step2[15], step2[14]); - step1[15] = vqaddq_s16(step2[15], step2[14]); - - // stage 4 - - btf_16_lane_0_1_neon(step1[0], step1[1], c3, &step2[0], &step2[1]); - btf_16_lane_2_3_neon(step1[2], step1[3], c3, &step2[3], &step2[2]); - btf_16_lane_2_3_neon(step1[14], step1[9], c3, &step2[14], &step2[9]); - btf_16_lane_3_2_neon(vnegq_s16(step1[10]), vnegq_s16(step1[13]), c3, - &step2[10], &step2[13]); - - step2[4] = vqaddq_s16(step1[4], step1[5]); - step2[5] = vqsubq_s16(step1[4], step1[5]); - step2[6] = vqsubq_s16(step1[7], step1[6]); - step2[7] = vqaddq_s16(step1[7], step1[6]); - step2[8] = step1[8]; - step2[11] = step1[11]; - step2[12] = step1[12]; - step2[15] = step1[15]; - - // stage 5 - - btf_16_lane_0_1_neon(step2[6], step2[5], c3, &step1[6], &step1[5]); - - step1[0] = vqaddq_s16(step2[0], step2[3]); - step1[1] = vqaddq_s16(step2[1], step2[2]); - step1[2] = vqsubq_s16(step2[1], step2[2]); - step1[3] = vqsubq_s16(step2[0], step2[3]); - step1[4] = step2[4]; - step1[7] = step2[7]; - step1[8] = vqaddq_s16(step2[8], step2[11]); - step1[9] = vqaddq_s16(step2[9], step2[10]); - step1[10] = vqsubq_s16(step2[9], step2[10]); - step1[11] = vqsubq_s16(step2[8], step2[11]); - step1[12] = vqsubq_s16(step2[15], step2[12]); - step1[13] = vqsubq_s16(step2[14], step2[13]); - step1[14] = vqaddq_s16(step2[14], step2[13]); - step1[15] = vqaddq_s16(step2[15], step2[12]); - - // stage 6 - - btf_16_lane_0_1_neon(step1[13], step1[10], c3, &step2[13], &step2[10]); - btf_16_lane_0_1_neon(step1[12], step1[11], c3, &step2[12], &step2[11]); - - step2[0] = vqaddq_s16(step1[0], step1[7]); - step2[1] = vqaddq_s16(step1[1], step1[6]); - step2[2] = vqaddq_s16(step1[2], step1[5]); - step2[3] = vqaddq_s16(step1[3], step1[4]); - step2[4] = vqsubq_s16(step1[3], step1[4]); - step2[5] = vqsubq_s16(step1[2], step1[5]); - step2[6] = vqsubq_s16(step1[1], step1[6]); - step2[7] = vqsubq_s16(step1[0], step1[7]); - step2[8] = step1[8]; - step2[9] = step1[9]; - step2[14] = step1[14]; - step2[15] = step1[15]; - - // stage 7 - out[0] = vqaddq_s16(step2[0], step2[15]); - out[1] = vqaddq_s16(step2[1], step2[14]); - out[2] = vqaddq_s16(step2[2], step2[13]); - out[3] = vqaddq_s16(step2[3], step2[12]); - out[4] = vqaddq_s16(step2[4], step2[11]); - out[5] = vqaddq_s16(step2[5], step2[10]); - out[6] = vqaddq_s16(step2[6], step2[9]); - out[7] = vqaddq_s16(step2[7], step2[8]); - out[8] = vqsubq_s16(step2[7], step2[8]); - out[9] = vqsubq_s16(step2[6], step2[9]); - out[10] = vqsubq_s16(step2[5], step2[10]); - out[11] = vqsubq_s16(step2[4], step2[11]); - out[12] = vqsubq_s16(step2[3], step2[12]); - out[13] = vqsubq_s16(step2[2], step2[13]); - out[14] = vqsubq_s16(step2[1], step2[14]); - out[15] = vqsubq_s16(step2[0], step2[15]); -} - -static INLINE void idct16_low8_new_neon(int16x8_t *in, int16x8_t *out, - int8_t cos_bit, int bit) { - (void)bit; - const int32_t *cospi = cospi_arr(cos_bit); - int16x8_t step1[16], step2[16]; - const int16x4_t c0 = - create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32), - (int16_t *)(cospi + 16), (int16_t *)(cospi + 48)); - - // stage 1 - // stage 2 - - step2[0] = in[0]; - step2[2] = in[4]; - step2[4] = in[2]; - step2[6] = in[6]; - - btf_16_neon(in[1], cospi[60], cospi[4], &step2[8], &step2[15]); - btf_16_neon(in[7], -cospi[36], cospi[28], &step2[9], &step2[14]); - btf_16_neon(in[5], cospi[44], cospi[20], &step2[10], &step2[13]); - btf_16_neon(in[3], -cospi[52], cospi[12], &step2[11], &step2[12]); - - // stage 3 - - btf_16_neon(step2[4], cospi[56], cospi[8], &step1[4], &step1[7]); - btf_16_neon(step2[6], -cospi[40], cospi[24], &step1[5], &step1[6]); - - step1[0] = step2[0]; - step1[2] = step2[2]; - step1[8] = vqaddq_s16(step2[8], step2[9]); - step1[9] = vqsubq_s16(step2[8], step2[9]); - step1[10] = vqsubq_s16(step2[11], step2[10]); - step1[11] = vqaddq_s16(step2[11], step2[10]); - step1[12] = vqaddq_s16(step2[12], step2[13]); - step1[13] = vqsubq_s16(step2[12], step2[13]); - step1[14] = vqsubq_s16(step2[15], step2[14]); - step1[15] = vqaddq_s16(step2[15], step2[14]); - - // stage 4 - - btf_16_neon(step1[0], cospi[32], cospi[32], &step2[0], &step2[1]); - btf_16_neon(step1[2], cospi[48], cospi[16], &step2[2], &step2[3]); - btf_16_lane_2_3_neon(step1[14], step1[9], c0, &step2[14], &step2[9]); - btf_16_lane_3_2_neon(vnegq_s16(step1[10]), vnegq_s16(step1[13]), c0, - &step2[10], &step2[13]); - - step2[4] = vqaddq_s16(step1[4], step1[5]); - step2[5] = vqsubq_s16(step1[4], step1[5]); - step2[6] = vqsubq_s16(step1[7], step1[6]); - step2[7] = vqaddq_s16(step1[7], step1[6]); - step2[8] = step1[8]; - step2[11] = step1[11]; - step2[12] = step1[12]; - step2[15] = step1[15]; - - // stage 5 - - btf_16_lane_0_1_neon(step2[6], step2[5], c0, &step1[6], &step1[5]); - step1[0] = vqaddq_s16(step2[0], step2[3]); - step1[1] = vqaddq_s16(step2[1], step2[2]); - step1[2] = vqsubq_s16(step2[1], step2[2]); - step1[3] = vqsubq_s16(step2[0], step2[3]); - step1[4] = step2[4]; - step1[7] = step2[7]; - step1[8] = vqaddq_s16(step2[8], step2[11]); - step1[9] = vqaddq_s16(step2[9], step2[10]); - step1[10] = vqsubq_s16(step2[9], step2[10]); - step1[11] = vqsubq_s16(step2[8], step2[11]); - step1[12] = vqsubq_s16(step2[15], step2[12]); - step1[13] = vqsubq_s16(step2[14], step2[13]); - step1[14] = vqaddq_s16(step2[14], step2[13]); - step1[15] = vqaddq_s16(step2[15], step2[12]); - - // stage 6 - btf_16_lane_0_1_neon(step1[13], step1[10], c0, &step2[13], &step2[10]); - btf_16_lane_0_1_neon(step1[12], step1[11], c0, &step2[12], &step2[11]); - - step2[0] = vqaddq_s16(step1[0], step1[7]); - step2[1] = vqaddq_s16(step1[1], step1[6]); - step2[2] = vqaddq_s16(step1[2], step1[5]); - step2[3] = vqaddq_s16(step1[3], step1[4]); - step2[4] = vqsubq_s16(step1[3], step1[4]); - step2[5] = vqsubq_s16(step1[2], step1[5]); - step2[6] = vqsubq_s16(step1[1], step1[6]); - step2[7] = vqsubq_s16(step1[0], step1[7]); - step2[8] = step1[8]; - step2[9] = step1[9]; - step2[14] = step1[14]; - step2[15] = step1[15]; - - // stage 7 - - out[0] = vqaddq_s16(step2[0], step2[15]); - out[1] = vqaddq_s16(step2[1], step2[14]); - out[2] = vqaddq_s16(step2[2], step2[13]); - out[3] = vqaddq_s16(step2[3], step2[12]); - out[4] = vqaddq_s16(step2[4], step2[11]); - out[5] = vqaddq_s16(step2[5], step2[10]); - out[6] = vqaddq_s16(step2[6], step2[9]); - out[7] = vqaddq_s16(step2[7], step2[8]); - out[8] = vqsubq_s16(step2[7], step2[8]); - out[9] = vqsubq_s16(step2[6], step2[9]); - out[10] = vqsubq_s16(step2[5], step2[10]); - out[11] = vqsubq_s16(step2[4], step2[11]); - out[12] = vqsubq_s16(step2[3], step2[12]); - out[13] = vqsubq_s16(step2[2], step2[13]); - out[14] = vqsubq_s16(step2[1], step2[14]); - out[15] = vqsubq_s16(step2[0], step2[15]); -} - -static INLINE void iadst16_new_neon(int16x8_t *const in, int16x8_t *out, - int8_t cos_bit, int bit) { - (void)bit; - const int32_t *cospi = cospi_arr(cos_bit); - - const int16x4_t c0 = - create_s16x4_neon((int16_t *)(cospi + 2), (int16_t *)(cospi + 62), - (int16_t *)(cospi + 10), (int16_t *)(cospi + 54)); - const int16x4_t c1 = - create_s16x4_neon((int16_t *)(cospi + 18), (int16_t *)(cospi + 46), - (int16_t *)(cospi + 26), (int16_t *)(cospi + 38)); - const int16x4_t c2 = - create_s16x4_neon((int16_t *)(cospi + 34), (int16_t *)(cospi + 30), - (int16_t *)(cospi + 42), (int16_t *)(cospi + 22)); - const int16x4_t c3 = - create_s16x4_neon((int16_t *)(cospi + 50), (int16_t *)(cospi + 14), - (int16_t *)(cospi + 58), (int16_t *)(cospi + 6)); - const int16x4_t c4 = - create_s16x4_neon((int16_t *)(cospi + 8), (int16_t *)(cospi + 56), - (int16_t *)(cospi + 40), (int16_t *)(cospi + 24)); - - const int16x4_t c = - create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32), - (int16_t *)(cospi + 16), (int16_t *)(cospi + 48)); - - int16x8_t x[16]; - int16x8_t t[14]; - int16x8_t s0, s1, s2, s3, s4, s5, s6, s7; - int16x8_t s8, s9, s10, s11, s12, s13, s14, s15; - - // Stage 1 - x[0] = in[15]; - x[1] = in[0]; - x[2] = in[13]; - x[3] = in[2]; - x[4] = in[11]; - x[5] = in[4]; - x[6] = in[9]; - x[7] = in[6]; - x[8] = in[7]; - x[9] = in[8]; - x[10] = in[5]; - x[11] = in[10]; - x[12] = in[3]; - x[13] = in[12]; - x[14] = in[1]; - x[15] = in[14]; - - // Stage 2 - btf_16_lane_0_1_neon(x[0], x[1], c0, &s0, &s1); - btf_16_lane_2_3_neon(x[2], x[3], c0, &s2, &s3); - btf_16_lane_0_1_neon(x[4], x[5], c1, &s4, &s5); - btf_16_lane_2_3_neon(x[6], x[7], c1, &s6, &s7); - btf_16_lane_0_1_neon(x[8], x[9], c2, &s8, &s9); - btf_16_lane_2_3_neon(x[10], x[11], c2, &s10, &s11); - btf_16_lane_0_1_neon(x[12], x[13], c3, &s12, &s13); - btf_16_lane_2_3_neon(x[14], x[15], c3, &s14, &s15); - - // Stage 3 - x[0] = vqaddq_s16(s0, s8); - x[1] = vqaddq_s16(s1, s9); - x[2] = vqaddq_s16(s2, s10); - x[3] = vqaddq_s16(s3, s11); - x[4] = vqaddq_s16(s4, s12); - x[5] = vqaddq_s16(s5, s13); - x[6] = vqaddq_s16(s6, s14); - x[7] = vqaddq_s16(s7, s15); - x[8] = vqsubq_s16(s0, s8); - x[9] = vqsubq_s16(s1, s9); - x[10] = vqsubq_s16(s2, s10); - x[11] = vqsubq_s16(s3, s11); - x[12] = vqsubq_s16(s4, s12); - x[13] = vqsubq_s16(s5, s13); - x[14] = vqsubq_s16(s6, s14); - x[15] = vqsubq_s16(s7, s15); - - // Stage 4 - t[0] = x[0]; - t[1] = x[1]; - t[2] = x[2]; - t[3] = x[3]; - t[4] = x[4]; - t[5] = x[5]; - t[6] = x[6]; - t[7] = x[7]; - btf_16_lane_0_1_neon(x[8], x[9], c4, &s8, &s9); - btf_16_lane_2_3_neon(x[10], x[11], c4, &s10, &s11); - btf_16_lane_1_0_neon(x[13], x[12], c4, &s13, &s12); - btf_16_lane_3_2_neon(x[15], x[14], c4, &s15, &s14); - - // Stage 5 - x[0] = vqaddq_s16(t[0], t[4]); - x[1] = vqaddq_s16(t[1], t[5]); - x[2] = vqaddq_s16(t[2], t[6]); - x[3] = vqaddq_s16(t[3], t[7]); - x[4] = vqsubq_s16(t[0], t[4]); - x[5] = vqsubq_s16(t[1], t[5]); - x[6] = vqsubq_s16(t[2], t[6]); - x[7] = vqsubq_s16(t[3], t[7]); - x[8] = vqaddq_s16(s8, s12); - x[9] = vqaddq_s16(s9, s13); - x[10] = vqaddq_s16(s10, s14); - x[11] = vqaddq_s16(s11, s15); - x[12] = vqsubq_s16(s8, s12); - x[13] = vqsubq_s16(s9, s13); - x[14] = vqsubq_s16(s10, s14); - x[15] = vqsubq_s16(s11, s15); - - // stage 6 - t[0] = x[0]; - t[1] = x[1]; - t[2] = x[2]; - t[3] = x[3]; - btf_16_lane_2_3_neon(x[4], x[5], c, &s4, &s5); - btf_16_lane_3_2_neon(x[7], x[6], c, &s7, &s6); - t[8] = x[8]; - t[9] = x[9]; - t[10] = x[10]; - t[11] = x[11]; - btf_16_lane_2_3_neon(x[12], x[13], c, &s12, &s13); - btf_16_lane_3_2_neon(x[15], x[14], c, &s15, &s14); - - // Stage 7 - x[0] = vqaddq_s16(t[0], t[2]); - x[1] = vqaddq_s16(t[1], t[3]); - x[2] = vqsubq_s16(t[0], t[2]); - x[3] = vqsubq_s16(t[1], t[3]); - x[4] = vqaddq_s16(s4, s6); - x[5] = vqaddq_s16(s5, s7); - x[6] = vqsubq_s16(s4, s6); - x[7] = vqsubq_s16(s5, s7); - x[8] = vqaddq_s16(t[8], t[10]); - x[9] = vqaddq_s16(t[9], t[11]); - x[10] = vqsubq_s16(t[8], t[10]); - x[11] = vqsubq_s16(t[9], t[11]); - x[12] = vqaddq_s16(s12, s14); - x[13] = vqaddq_s16(s13, s15); - x[14] = vqsubq_s16(s12, s14); - x[15] = vqsubq_s16(s13, s15); - - // Stage 8 - btf_16_half_neon(x + 2, c); - btf_16_half_neon(x + 6, c); - btf_16_half_neon(x + 10, c); - btf_16_half_neon(x + 14, c); - - // Stage 9 - out[0] = x[0]; - out[1] = vnegq_s16(x[8]); - out[2] = x[12]; - out[3] = vnegq_s16(x[4]); - out[4] = x[6]; - out[5] = vnegq_s16(x[14]); - out[6] = x[10]; - out[7] = vnegq_s16(x[2]); - out[8] = x[3]; - out[9] = vnegq_s16(x[11]); - out[10] = x[15]; - out[11] = vnegq_s16(x[7]); - out[12] = x[5]; - out[13] = vnegq_s16(x[13]); - out[14] = x[9]; - out[15] = vnegq_s16(x[1]); -} - -static INLINE void iadst16_low1_new_neon(int16x8_t *const in, int16x8_t *out, - int8_t cos_bit, int bit) { - (void)bit; - const int32_t *cospi = cospi_arr(cos_bit); - const int16x4_t c4 = - create_s16x4_neon((int16_t *)(cospi + 8), (int16_t *)(cospi + 56), - (int16_t *)(cospi + 40), (int16_t *)(cospi + 24)); - const int16x4_t c = - create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32), - (int16_t *)(cospi + 16), (int16_t *)(cospi + 48)); - - int16x8_t x[16]; - int16x8_t t[10]; - int16x8_t s0, s1, s4, s5; - int16x8_t s8, s9, s12, s13; - - // Stage 1 - x[1] = in[0]; - - // Stage 2 - btf_16_neon(x[1], cospi[62], -cospi[2], &s0, &s1); - - // Stage 3 - x[0] = s0; - x[1] = s1; - x[8] = s0; - x[9] = s1; - - // Stage 4 - t[0] = x[0]; - t[1] = x[1]; - btf_16_lane_0_1_neon(x[8], x[9], c4, &s8, &s9); - - // Stage 5 - x[0] = t[0]; - x[1] = t[1]; - x[4] = t[0]; - x[5] = t[1]; - x[8] = s8; - x[9] = s9; - x[12] = s8; - x[13] = s9; - - // stage 6 - t[0] = x[0]; - t[1] = x[1]; - btf_16_lane_2_3_neon(x[4], x[5], c, &s4, &s5); - t[8] = x[8]; - t[9] = x[9]; - btf_16_lane_2_3_neon(x[12], x[13], c, &s12, &s13); - - // Stage 7 - x[0] = t[0]; - x[1] = t[1]; - x[2] = t[0]; - x[3] = t[1]; - x[4] = s4; - x[5] = s5; - x[6] = s4; - x[7] = s5; - x[8] = t[8]; - x[9] = t[9]; - x[10] = t[8]; - x[11] = t[9]; - x[12] = s12; - x[13] = s13; - x[14] = s12; - x[15] = s13; - - // Stage 8 - btf_16_half_neon(x + 2, c); - btf_16_half_neon(x + 6, c); - btf_16_half_neon(x + 10, c); - btf_16_half_neon(x + 14, c); - - // Stage 9 - out[0] = x[0]; - out[1] = vnegq_s16(x[8]); - out[2] = x[12]; - out[3] = vnegq_s16(x[4]); - out[4] = x[6]; - out[5] = vnegq_s16(x[14]); - out[6] = x[10]; - out[7] = vnegq_s16(x[2]); - out[8] = x[3]; - out[9] = vnegq_s16(x[11]); - out[10] = x[15]; - out[11] = vnegq_s16(x[7]); - out[12] = x[5]; - out[13] = vnegq_s16(x[13]); - out[14] = x[9]; - out[15] = vnegq_s16(x[1]); -} - -static INLINE void iadst16_low8_new_neon(int16x8_t *const in, int16x8_t *out, - int8_t cos_bit, int bit) { - (void)bit; - const int32_t *cospi = cospi_arr(cos_bit); - - const int16x4_t c4 = - create_s16x4_neon((int16_t *)(cospi + 8), (int16_t *)(cospi + 56), - (int16_t *)(cospi + 40), (int16_t *)(cospi + 24)); - const int16x4_t c = - create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32), - (int16_t *)(cospi + 16), (int16_t *)(cospi + 48)); - - int16x8_t x[16]; - int16x8_t t[14]; - int16x8_t s0, s1, s2, s3, s4, s5, s6, s7; - int16x8_t s8, s9, s10, s11, s12, s13, s14, s15; - - // Stage 1 - x[1] = in[0]; - x[3] = in[2]; - x[5] = in[4]; - x[7] = in[6]; - x[8] = in[7]; - x[10] = in[5]; - x[12] = in[3]; - x[14] = in[1]; - - // Stage 2 - btf_16_neon(x[1], cospi[62], -cospi[2], &s0, &s1); - btf_16_neon(x[3], cospi[54], -cospi[10], &s2, &s3); - btf_16_neon(x[5], cospi[46], -cospi[18], &s4, &s5); - btf_16_neon(x[7], cospi[38], -cospi[26], &s6, &s7); - - btf_16_neon(x[8], cospi[34], cospi[30], &s8, &s9); - btf_16_neon(x[10], cospi[42], cospi[22], &s10, &s11); - btf_16_neon(x[12], cospi[50], cospi[14], &s12, &s13); - btf_16_neon(x[14], cospi[58], cospi[6], &s14, &s15); - - // Stage 3 - x[0] = vqaddq_s16(s0, s8); - x[1] = vqaddq_s16(s1, s9); - x[2] = vqaddq_s16(s2, s10); - x[3] = vqaddq_s16(s3, s11); - x[4] = vqaddq_s16(s4, s12); - x[5] = vqaddq_s16(s5, s13); - x[6] = vqaddq_s16(s6, s14); - x[7] = vqaddq_s16(s7, s15); - x[8] = vqsubq_s16(s0, s8); - x[9] = vqsubq_s16(s1, s9); - x[10] = vqsubq_s16(s2, s10); - x[11] = vqsubq_s16(s3, s11); - x[12] = vqsubq_s16(s4, s12); - x[13] = vqsubq_s16(s5, s13); - x[14] = vqsubq_s16(s6, s14); - x[15] = vqsubq_s16(s7, s15); - - // Stage 4 - t[0] = x[0]; - t[1] = x[1]; - t[2] = x[2]; - t[3] = x[3]; - t[4] = x[4]; - t[5] = x[5]; - t[6] = x[6]; - t[7] = x[7]; - btf_16_lane_0_1_neon(x[8], x[9], c4, &s8, &s9); - btf_16_lane_2_3_neon(x[10], x[11], c4, &s10, &s11); - btf_16_lane_1_0_neon(x[13], x[12], c4, &s13, &s12); - btf_16_lane_3_2_neon(x[15], x[14], c4, &s15, &s14); - - // Stage 5 - x[0] = vqaddq_s16(t[0], t[4]); - x[1] = vqaddq_s16(t[1], t[5]); - x[2] = vqaddq_s16(t[2], t[6]); - x[3] = vqaddq_s16(t[3], t[7]); - x[4] = vqsubq_s16(t[0], t[4]); - x[5] = vqsubq_s16(t[1], t[5]); - x[6] = vqsubq_s16(t[2], t[6]); - x[7] = vqsubq_s16(t[3], t[7]); - x[8] = vqaddq_s16(s8, s12); - x[9] = vqaddq_s16(s9, s13); - x[10] = vqaddq_s16(s10, s14); - x[11] = vqaddq_s16(s11, s15); - x[12] = vqsubq_s16(s8, s12); - x[13] = vqsubq_s16(s9, s13); - x[14] = vqsubq_s16(s10, s14); - x[15] = vqsubq_s16(s11, s15); - - // stage 6 - t[0] = x[0]; - t[1] = x[1]; - t[2] = x[2]; - t[3] = x[3]; - btf_16_lane_2_3_neon(x[4], x[5], c, &s4, &s5); - btf_16_lane_3_2_neon(x[7], x[6], c, &s7, &s6); - t[8] = x[8]; - t[9] = x[9]; - t[10] = x[10]; - t[11] = x[11]; - btf_16_lane_2_3_neon(x[12], x[13], c, &s12, &s13); - btf_16_lane_3_2_neon(x[15], x[14], c, &s15, &s14); - - // Stage 7 - x[0] = vqaddq_s16(t[0], t[2]); - x[1] = vqaddq_s16(t[1], t[3]); - x[2] = vqsubq_s16(t[0], t[2]); - x[3] = vqsubq_s16(t[1], t[3]); - x[4] = vqaddq_s16(s4, s6); - x[5] = vqaddq_s16(s5, s7); - x[6] = vqsubq_s16(s4, s6); - x[7] = vqsubq_s16(s5, s7); - x[8] = vqaddq_s16(t[8], t[10]); - x[9] = vqaddq_s16(t[9], t[11]); - x[10] = vqsubq_s16(t[8], t[10]); - x[11] = vqsubq_s16(t[9], t[11]); - x[12] = vqaddq_s16(s12, s14); - x[13] = vqaddq_s16(s13, s15); - x[14] = vqsubq_s16(s12, s14); - x[15] = vqsubq_s16(s13, s15); - - // Stage 8 - btf_16_half_neon(x + 2, c); - btf_16_half_neon(x + 6, c); - btf_16_half_neon(x + 10, c); - btf_16_half_neon(x + 14, c); - - // Stage 9 - out[0] = x[0]; - out[1] = vnegq_s16(x[8]); - out[2] = x[12]; - out[3] = vnegq_s16(x[4]); - out[4] = x[6]; - out[5] = vnegq_s16(x[14]); - out[6] = x[10]; - out[7] = vnegq_s16(x[2]); - out[8] = x[3]; - out[9] = vnegq_s16(x[11]); - out[10] = x[15]; - out[11] = vnegq_s16(x[7]); - out[12] = x[5]; - out[13] = vnegq_s16(x[13]); - out[14] = x[9]; - out[15] = vnegq_s16(x[1]); -} - -static INLINE void idct32_new_neon(int16x8_t *in, int16x8_t *out, - int8_t cos_bit, int bit) { - (void)bit; - const int32_t *cospi = cospi_arr(cos_bit); - int16x8_t step1[32], step2[32]; - - const int16x4_t c0 = - create_s16x4_neon((int16_t *)(cospi + 2), (int16_t *)(cospi + 62), - (int16_t *)(cospi + 34), (int16_t *)(cospi + 30)); - const int16x4_t c1 = - create_s16x4_neon((int16_t *)(cospi + 18), (int16_t *)(cospi + 46), - (int16_t *)(cospi + 50), (int16_t *)(cospi + 14)); - const int16x4_t c2 = - create_s16x4_neon((int16_t *)(cospi + 10), (int16_t *)(cospi + 54), - (int16_t *)(cospi + 42), (int16_t *)(cospi + 22)); - const int16x4_t c3 = - create_s16x4_neon((int16_t *)(cospi + 26), (int16_t *)(cospi + 38), - (int16_t *)(cospi + 58), (int16_t *)(cospi + 6)); - const int16x4_t c4 = - create_s16x4_neon((int16_t *)(cospi + 4), (int16_t *)(cospi + 60), - (int16_t *)(cospi + 36), (int16_t *)(cospi + 28)); - const int16x4_t c5 = - create_s16x4_neon((int16_t *)(cospi + 20), (int16_t *)(cospi + 44), - (int16_t *)(cospi + 52), (int16_t *)(cospi + 12)); - const int16x4_t c6 = - create_s16x4_neon((int16_t *)(cospi + 8), (int16_t *)(cospi + 56), - (int16_t *)(cospi + 40), (int16_t *)(cospi + 24)); - const int16x4_t c7 = - create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32), - (int16_t *)(cospi + 16), (int16_t *)(cospi + 48)); - - // stage 2 - - btf_16_lane_0_1_neon(in[1], in[31], c0, &step2[31], &step2[16]); - btf_16_lane_2_3_neon(in[17], in[15], c0, &step2[30], &step2[17]); - btf_16_lane_0_1_neon(in[9], in[23], c1, &step2[29], &step2[18]); - btf_16_lane_2_3_neon(in[25], in[7], c1, &step2[28], &step2[19]); - btf_16_lane_0_1_neon(in[5], in[27], c2, &step2[27], &step2[20]); - btf_16_lane_2_3_neon(in[21], in[11], c2, &step2[26], &step2[21]); - btf_16_lane_0_1_neon(in[13], in[19], c3, &step2[25], &step2[22]); - btf_16_lane_2_3_neon(in[29], in[3], c3, &step2[24], &step2[23]); - - step2[0] = in[0]; - step2[1] = in[16]; - step2[2] = in[8]; - step2[3] = in[24]; - step2[4] = in[4]; - step2[5] = in[20]; - step2[6] = in[12]; - step2[7] = in[28]; - step2[8] = in[2]; - step2[9] = in[18]; - step2[10] = in[10]; - step2[11] = in[26]; - step2[12] = in[6]; - step2[13] = in[22]; - step2[14] = in[14]; - step2[15] = in[30]; - - // stage 3 - - btf_16_lane_0_1_neon(step2[8], step2[15], c4, &step1[15], &step1[8]); - btf_16_lane_2_3_neon(step2[9], step2[14], c4, &step1[14], &step1[9]); - btf_16_lane_0_1_neon(step2[10], step2[13], c5, &step1[13], &step1[10]); - btf_16_lane_2_3_neon(step2[11], step2[12], c5, &step1[12], &step1[11]); - - step1[0] = step2[0]; - step1[1] = step2[1]; - step1[2] = step2[2]; - step1[3] = step2[3]; - step1[4] = step2[4]; - step1[5] = step2[5]; - step1[6] = step2[6]; - step1[7] = step2[7]; - - step1[16] = vqaddq_s16(step2[16], step2[17]); - step1[17] = vqsubq_s16(step2[16], step2[17]); - step1[18] = vqsubq_s16(step2[19], step2[18]); - step1[19] = vqaddq_s16(step2[19], step2[18]); - step1[20] = vqaddq_s16(step2[20], step2[21]); - step1[21] = vqsubq_s16(step2[20], step2[21]); - step1[22] = vqsubq_s16(step2[23], step2[22]); - step1[23] = vqaddq_s16(step2[23], step2[22]); - step1[24] = vqaddq_s16(step2[24], step2[25]); - step1[25] = vqsubq_s16(step2[24], step2[25]); - step1[26] = vqsubq_s16(step2[27], step2[26]); - step1[27] = vqaddq_s16(step2[27], step2[26]); - step1[28] = vqaddq_s16(step2[28], step2[29]); - step1[29] = vqsubq_s16(step2[28], step2[29]); - step1[30] = vqsubq_s16(step2[31], step2[30]); - step1[31] = vqaddq_s16(step2[31], step2[30]); - - // stage 4 - - btf_16_lane_0_1_neon(step1[4], step1[7], c6, &step2[7], &step2[4]); - btf_16_lane_2_3_neon(step1[5], step1[6], c6, &step2[6], &step2[5]); - btf_16_lane_0_1_neon(step1[30], step1[17], c6, &step2[30], &step2[17]); - btf_16_lane_1_0_neon(vnegq_s16(step1[18]), vnegq_s16(step1[29]), c6, - &step2[18], &step2[29]); - btf_16_lane_2_3_neon(step1[26], step1[21], c6, &step2[26], &step2[21]); - btf_16_lane_3_2_neon(vnegq_s16(step1[22]), vnegq_s16(step1[25]), c6, - &step2[22], &step2[25]); - - step2[0] = step1[0]; - step2[1] = step1[1]; - step2[2] = step1[2]; - step2[3] = step1[3]; - step2[8] = vqaddq_s16(step1[8], step1[9]); - step2[9] = vqsubq_s16(step1[8], step1[9]); - step2[10] = vqsubq_s16(step1[11], step1[10]); - step2[11] = vqaddq_s16(step1[11], step1[10]); - step2[12] = vqaddq_s16(step1[12], step1[13]); - step2[13] = vqsubq_s16(step1[12], step1[13]); - step2[14] = vqsubq_s16(step1[15], step1[14]); - step2[15] = vqaddq_s16(step1[15], step1[14]); - step2[16] = step1[16]; - step2[19] = step1[19]; - step2[20] = step1[20]; - step2[23] = step1[23]; - step2[24] = step1[24]; - step2[27] = step1[27]; - step2[28] = step1[28]; - step2[31] = step1[31]; - - // stage 5 - - btf_16_lane_0_1_neon(step2[0], step2[1], c7, &step1[0], &step1[1]); - btf_16_lane_2_3_neon(step2[2], step2[3], c7, &step1[3], &step1[2]); - btf_16_lane_2_3_neon(step2[14], step2[9], c7, &step1[14], &step1[9]); - btf_16_lane_3_2_neon(vnegq_s16(step2[10]), vnegq_s16(step2[13]), c7, - &step1[10], &step1[13]); - - step1[4] = vqaddq_s16(step2[4], step2[5]); - step1[5] = vqsubq_s16(step2[4], step2[5]); - step1[6] = vqsubq_s16(step2[7], step2[6]); - step1[7] = vqaddq_s16(step2[7], step2[6]); - step1[8] = step2[8]; - step1[11] = step2[11]; - step1[12] = step2[12]; - step1[15] = step2[15]; - step1[16] = vqaddq_s16(step2[16], step2[19]); - step1[17] = vqaddq_s16(step2[17], step2[18]); - step1[18] = vqsubq_s16(step2[17], step2[18]); - step1[19] = vqsubq_s16(step2[16], step2[19]); - step1[20] = vqsubq_s16(step2[23], step2[20]); - step1[21] = vqsubq_s16(step2[22], step2[21]); - step1[22] = vqaddq_s16(step2[22], step2[21]); - step1[23] = vqaddq_s16(step2[23], step2[20]); - step1[24] = vqaddq_s16(step2[24], step2[27]); - step1[25] = vqaddq_s16(step2[25], step2[26]); - step1[26] = vqsubq_s16(step2[25], step2[26]); - step1[27] = vqsubq_s16(step2[24], step2[27]); - step1[28] = vqsubq_s16(step2[31], step2[28]); - step1[29] = vqsubq_s16(step2[30], step2[29]); - step1[30] = vqaddq_s16(step2[30], step2[29]); - step1[31] = vqaddq_s16(step2[31], step2[28]); - - // stage 6 - - btf_16_lane_0_1_neon(step1[6], step1[5], c7, &step2[6], &step2[5]); - btf_16_lane_2_3_neon(step1[29], step1[18], c7, &step2[29], &step2[18]); - btf_16_lane_2_3_neon(step1[28], step1[19], c7, &step2[28], &step2[19]); - btf_16_lane_3_2_neon(vnegq_s16(step1[20]), vnegq_s16(step1[27]), c7, - &step2[20], &step2[27]); - btf_16_lane_3_2_neon(vnegq_s16(step1[21]), vnegq_s16(step1[26]), c7, - &step2[21], &step2[26]); - - step2[0] = vqaddq_s16(step1[0], step1[3]); - step2[1] = vqaddq_s16(step1[1], step1[2]); - step2[2] = vqsubq_s16(step1[1], step1[2]); - step2[3] = vqsubq_s16(step1[0], step1[3]); - step2[4] = step1[4]; - step2[7] = step1[7]; - step2[8] = vqaddq_s16(step1[8], step1[11]); - step2[9] = vqaddq_s16(step1[9], step1[10]); - step2[10] = vqsubq_s16(step1[9], step1[10]); - step2[11] = vqsubq_s16(step1[8], step1[11]); - step2[12] = vqsubq_s16(step1[15], step1[12]); - step2[13] = vqsubq_s16(step1[14], step1[13]); - step2[14] = vqaddq_s16(step1[14], step1[13]); - step2[15] = vqaddq_s16(step1[15], step1[12]); - step2[16] = step1[16]; - step2[17] = step1[17]; - step2[22] = step1[22]; - step2[23] = step1[23]; - step2[24] = step1[24]; - step2[25] = step1[25]; - step2[30] = step1[30]; - step2[31] = step1[31]; - - // stage 7 - - btf_16_lane_0_1_neon(step2[13], step2[10], c7, &step1[13], &step1[10]); - btf_16_lane_0_1_neon(step2[12], step2[11], c7, &step1[12], &step1[11]); - - step1[0] = vqaddq_s16(step2[0], step2[7]); - step1[1] = vqaddq_s16(step2[1], step2[6]); - step1[2] = vqaddq_s16(step2[2], step2[5]); - step1[3] = vqaddq_s16(step2[3], step2[4]); - step1[4] = vqsubq_s16(step2[3], step2[4]); - step1[5] = vqsubq_s16(step2[2], step2[5]); - step1[6] = vqsubq_s16(step2[1], step2[6]); - step1[7] = vqsubq_s16(step2[0], step2[7]); - step1[8] = step2[8]; - step1[9] = step2[9]; - step1[14] = step2[14]; - step1[15] = step2[15]; - step1[16] = vqaddq_s16(step2[16], step2[23]); - step1[17] = vqaddq_s16(step2[17], step2[22]); - step1[18] = vqaddq_s16(step2[18], step2[21]); - step1[19] = vqaddq_s16(step2[19], step2[20]); - step1[20] = vqsubq_s16(step2[19], step2[20]); - step1[21] = vqsubq_s16(step2[18], step2[21]); - step1[22] = vqsubq_s16(step2[17], step2[22]); - step1[23] = vqsubq_s16(step2[16], step2[23]); - step1[24] = vqsubq_s16(step2[31], step2[24]); - step1[25] = vqsubq_s16(step2[30], step2[25]); - step1[26] = vqsubq_s16(step2[29], step2[26]); - step1[27] = vqsubq_s16(step2[28], step2[27]); - step1[28] = vqaddq_s16(step2[27], step2[28]); - step1[29] = vqaddq_s16(step2[26], step2[29]); - step1[30] = vqaddq_s16(step2[25], step2[30]); - step1[31] = vqaddq_s16(step2[24], step2[31]); - - // stage 8 - - btf_16_lane_0_1_neon(step1[27], step1[20], c7, &step2[27], &step2[20]); - btf_16_lane_0_1_neon(step1[26], step1[21], c7, &step2[26], &step2[21]); - btf_16_lane_0_1_neon(step1[25], step1[22], c7, &step2[25], &step2[22]); - btf_16_lane_0_1_neon(step1[24], step1[23], c7, &step2[24], &step2[23]); - - step2[0] = vqaddq_s16(step1[0], step1[15]); - step2[1] = vqaddq_s16(step1[1], step1[14]); - step2[2] = vqaddq_s16(step1[2], step1[13]); - step2[3] = vqaddq_s16(step1[3], step1[12]); - step2[4] = vqaddq_s16(step1[4], step1[11]); - step2[5] = vqaddq_s16(step1[5], step1[10]); - step2[6] = vqaddq_s16(step1[6], step1[9]); - step2[7] = vqaddq_s16(step1[7], step1[8]); - step2[8] = vqsubq_s16(step1[7], step1[8]); - step2[9] = vqsubq_s16(step1[6], step1[9]); - step2[10] = vqsubq_s16(step1[5], step1[10]); - step2[11] = vqsubq_s16(step1[4], step1[11]); - step2[12] = vqsubq_s16(step1[3], step1[12]); - step2[13] = vqsubq_s16(step1[2], step1[13]); - step2[14] = vqsubq_s16(step1[1], step1[14]); - step2[15] = vqsubq_s16(step1[0], step1[15]); - step2[16] = step1[16]; - step2[17] = step1[17]; - step2[18] = step1[18]; - step2[19] = step1[19]; - step2[28] = step1[28]; - step2[29] = step1[29]; - step2[30] = step1[30]; - step2[31] = step1[31]; - - // stage 9 - - out[0] = vqaddq_s16(step2[0], step2[31]); - out[1] = vqaddq_s16(step2[1], step2[30]); - out[2] = vqaddq_s16(step2[2], step2[29]); - out[3] = vqaddq_s16(step2[3], step2[28]); - out[4] = vqaddq_s16(step2[4], step2[27]); - out[5] = vqaddq_s16(step2[5], step2[26]); - out[6] = vqaddq_s16(step2[6], step2[25]); - out[7] = vqaddq_s16(step2[7], step2[24]); - out[8] = vqaddq_s16(step2[8], step2[23]); - out[9] = vqaddq_s16(step2[9], step2[22]); - out[10] = vqaddq_s16(step2[10], step2[21]); - out[11] = vqaddq_s16(step2[11], step2[20]); - out[12] = vqaddq_s16(step2[12], step2[19]); - out[13] = vqaddq_s16(step2[13], step2[18]); - out[14] = vqaddq_s16(step2[14], step2[17]); - out[15] = vqaddq_s16(step2[15], step2[16]); - out[16] = vqsubq_s16(step2[15], step2[16]); - out[17] = vqsubq_s16(step2[14], step2[17]); - out[18] = vqsubq_s16(step2[13], step2[18]); - out[19] = vqsubq_s16(step2[12], step2[19]); - out[20] = vqsubq_s16(step2[11], step2[20]); - out[21] = vqsubq_s16(step2[10], step2[21]); - out[22] = vqsubq_s16(step2[9], step2[22]); - out[23] = vqsubq_s16(step2[8], step2[23]); - out[24] = vqsubq_s16(step2[7], step2[24]); - out[25] = vqsubq_s16(step2[6], step2[25]); - out[26] = vqsubq_s16(step2[5], step2[26]); - out[27] = vqsubq_s16(step2[4], step2[27]); - out[28] = vqsubq_s16(step2[3], step2[28]); - out[29] = vqsubq_s16(step2[2], step2[29]); - out[30] = vqsubq_s16(step2[1], step2[30]); - out[31] = vqsubq_s16(step2[0], step2[31]); -} - -static INLINE void idct32_low1_new_neon(int16x8_t *in, int16x8_t *out, - int8_t cos_bit, int bit) { - (void)bit; - const int32_t *cospi = cospi_arr(cos_bit); - int16x8_t step1; - int32x4_t t32[2]; - - // stage 1 - // stage 2 - // stage 3 - // stage 4 - // stage 5 - - t32[0] = vmull_n_s16(vget_low_s16(in[0]), cospi[32]); - t32[1] = vmull_n_s16(vget_high_s16(in[0]), cospi[32]); - step1 = vcombine_s16(vrshrn_n_s32(t32[0], INV_COS_BIT), - vrshrn_n_s32(t32[1], INV_COS_BIT)); - - // stage 6 - // stage 7 - // stage 8 - // stage 9 - - out[0] = step1; - out[1] = step1; - out[2] = step1; - out[3] = step1; - out[4] = step1; - out[5] = step1; - out[6] = step1; - out[7] = step1; - out[8] = step1; - out[9] = step1; - out[10] = step1; - out[11] = step1; - out[12] = step1; - out[13] = step1; - out[14] = step1; - out[15] = step1; - out[16] = step1; - out[17] = step1; - out[18] = step1; - out[19] = step1; - out[20] = step1; - out[21] = step1; - out[22] = step1; - out[23] = step1; - out[24] = step1; - out[25] = step1; - out[26] = step1; - out[27] = step1; - out[28] = step1; - out[29] = step1; - out[30] = step1; - out[31] = step1; -} - -static INLINE void idct32_low8_new_neon(int16x8_t *in, int16x8_t *out, - int8_t cos_bit, int bit) { - (void)bit; - const int32_t *cospi = cospi_arr(cos_bit); - int16x8_t step1[32], step2[32]; - int32x4_t t32[16]; - const int16x4_t c0 = - create_s16x4_neon((int16_t *)(cospi + 8), (int16_t *)(cospi + 56), - (int16_t *)(cospi + 40), (int16_t *)(cospi + 24)); - const int16x4_t c1 = - create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32), - (int16_t *)(cospi + 16), (int16_t *)(cospi + 48)); - - // stage 1 - // stage 2 - - step2[0] = in[0]; - step2[4] = in[4]; - step2[8] = in[2]; - step2[12] = in[6]; - - btf_16_neon(in[1], cospi[62], cospi[2], &step2[16], &step2[31]); - btf_16_neon(in[7], -cospi[50], cospi[14], &step2[19], &step2[28]); - btf_16_neon(in[5], cospi[54], cospi[10], &step2[20], &step2[27]); - btf_16_neon(in[3], -cospi[58], cospi[6], &step2[23], &step2[24]); - - // stage 3 - step1[0] = step2[0]; - step1[4] = step2[4]; - - btf_16_neon(step2[8], cospi[60], cospi[4], &step1[8], &step1[15]); - btf_16_neon(step2[12], -cospi[52], cospi[12], &step1[11], &step1[12]); - - step1[16] = step2[16]; - step1[17] = step2[16]; - step1[18] = step2[19]; - step1[19] = step2[19]; - step1[20] = step2[20]; - step1[21] = step2[20]; - step1[22] = step2[23]; - step1[23] = step2[23]; - step1[24] = step2[24]; - step1[25] = step2[24]; - step1[26] = step2[27]; - step1[27] = step2[27]; - step1[28] = step2[28]; - step1[29] = step2[28]; - step1[30] = step2[31]; - step1[31] = step2[31]; - - // stage 4 - - btf_16_neon(step1[4], cospi[56], cospi[8], &step2[4], &step2[7]); - btf_16_lane_0_1_neon(step1[30], step1[17], c0, &step2[30], &step2[17]); - btf_16_lane_1_0_neon(vnegq_s16(step1[18]), vnegq_s16(step1[29]), c0, - &step2[18], &step2[29]); - btf_16_lane_2_3_neon(step1[26], step1[21], c0, &step2[26], &step2[21]); - btf_16_lane_3_2_neon(vnegq_s16(step1[22]), vnegq_s16(step1[25]), c0, - &step2[22], &step2[25]); - - step2[0] = step1[0]; - step2[8] = step1[8]; - step2[9] = step1[8]; - step2[10] = step1[11]; - step2[11] = step1[11]; - step2[12] = step1[12]; - step2[13] = step1[12]; - step2[14] = step1[15]; - step2[15] = step1[15]; - step2[16] = step1[16]; - step2[19] = step1[19]; - step2[20] = step1[20]; - step2[23] = step1[23]; - step2[24] = step1[24]; - step2[27] = step1[27]; - step2[28] = step1[28]; - step2[31] = step1[31]; - - // stage 5 - - t32[0] = vmull_n_s16(vget_low_s16(step2[0]), cospi[32]); - t32[1] = vmull_n_s16(vget_high_s16(step2[0]), cospi[32]); - step1[0] = vcombine_s16(vrshrn_n_s32(t32[0], INV_COS_BIT), - vrshrn_n_s32(t32[1], INV_COS_BIT)); - - btf_16_lane_2_3_neon(step2[14], step2[9], c1, &step1[14], &step1[9]); - btf_16_lane_3_2_neon(vnegq_s16(step2[10]), vnegq_s16(step2[13]), c1, - &step1[10], &step1[13]); - - step1[4] = step2[4]; - step1[5] = step2[4]; - step1[6] = step2[7]; - step1[7] = step2[7]; - step1[8] = step2[8]; - step1[11] = step2[11]; - step1[12] = step2[12]; - step1[15] = step2[15]; - step1[16] = vqaddq_s16(step2[16], step2[19]); - step1[17] = vqaddq_s16(step2[17], step2[18]); - step1[18] = vqsubq_s16(step2[17], step2[18]); - step1[19] = vqsubq_s16(step2[16], step2[19]); - step1[20] = vqsubq_s16(step2[23], step2[20]); - step1[21] = vqsubq_s16(step2[22], step2[21]); - step1[22] = vqaddq_s16(step2[22], step2[21]); - step1[23] = vqaddq_s16(step2[23], step2[20]); - step1[24] = vqaddq_s16(step2[24], step2[27]); - step1[25] = vqaddq_s16(step2[25], step2[26]); - step1[26] = vqsubq_s16(step2[25], step2[26]); - step1[27] = vqsubq_s16(step2[24], step2[27]); - step1[28] = vqsubq_s16(step2[31], step2[28]); - step1[29] = vqsubq_s16(step2[30], step2[29]); - step1[30] = vqaddq_s16(step2[30], step2[29]); - step1[31] = vqaddq_s16(step2[31], step2[28]); - - // stage 6 - - btf_16_lane_0_1_neon(step1[6], step1[5], c1, &step2[6], &step2[5]); - btf_16_lane_2_3_neon(step1[29], step1[18], c1, &step2[29], &step2[18]); - btf_16_lane_2_3_neon(step1[28], step1[19], c1, &step2[28], &step2[19]); - btf_16_lane_3_2_neon(vnegq_s16(step1[20]), vnegq_s16(step1[27]), c1, - &step2[20], &step2[27]); - btf_16_lane_3_2_neon(vnegq_s16(step1[21]), vnegq_s16(step1[26]), c1, - &step2[21], &step2[26]); - - step2[0] = step1[0]; - step2[1] = step1[0]; - step2[2] = step1[0]; - step2[3] = step1[0]; - step2[4] = step1[4]; - step2[7] = step1[7]; - step2[8] = vqaddq_s16(step1[8], step1[11]); - step2[9] = vqaddq_s16(step1[9], step1[10]); - step2[10] = vqsubq_s16(step1[9], step1[10]); - step2[11] = vqsubq_s16(step1[8], step1[11]); - step2[12] = vqsubq_s16(step1[15], step1[12]); - step2[13] = vqsubq_s16(step1[14], step1[13]); - step2[14] = vqaddq_s16(step1[14], step1[13]); - step2[15] = vqaddq_s16(step1[15], step1[12]); - step2[16] = step1[16]; - step2[17] = step1[17]; - step2[22] = step1[22]; - step2[23] = step1[23]; - step2[24] = step1[24]; - step2[25] = step1[25]; - step2[30] = step1[30]; - step2[31] = step1[31]; - - // stage 7 - - btf_16_lane_0_1_neon(step2[13], step2[10], c1, &step1[13], &step1[10]); - btf_16_lane_0_1_neon(step2[12], step2[11], c1, &step1[12], &step1[11]); - - step1[0] = vqaddq_s16(step2[0], step2[7]); - step1[1] = vqaddq_s16(step2[1], step2[6]); - step1[2] = vqaddq_s16(step2[2], step2[5]); - step1[3] = vqaddq_s16(step2[3], step2[4]); - step1[4] = vqsubq_s16(step2[3], step2[4]); - step1[5] = vqsubq_s16(step2[2], step2[5]); - step1[6] = vqsubq_s16(step2[1], step2[6]); - step1[7] = vqsubq_s16(step2[0], step2[7]); - step1[8] = step2[8]; - step1[9] = step2[9]; - step1[14] = step2[14]; - step1[15] = step2[15]; - step1[16] = vqaddq_s16(step2[16], step2[23]); - step1[17] = vqaddq_s16(step2[17], step2[22]); - step1[18] = vqaddq_s16(step2[18], step2[21]); - step1[19] = vqaddq_s16(step2[19], step2[20]); - step1[20] = vqsubq_s16(step2[19], step2[20]); - step1[21] = vqsubq_s16(step2[18], step2[21]); - step1[22] = vqsubq_s16(step2[17], step2[22]); - step1[23] = vqsubq_s16(step2[16], step2[23]); - step1[24] = vqsubq_s16(step2[31], step2[24]); - step1[25] = vqsubq_s16(step2[30], step2[25]); - step1[26] = vqsubq_s16(step2[29], step2[26]); - step1[27] = vqsubq_s16(step2[28], step2[27]); - step1[28] = vqaddq_s16(step2[27], step2[28]); - step1[29] = vqaddq_s16(step2[26], step2[29]); - step1[30] = vqaddq_s16(step2[25], step2[30]); - step1[31] = vqaddq_s16(step2[24], step2[31]); - - // stage 8 - - btf_16_lane_0_1_neon(step1[27], step1[20], c1, &step2[27], &step2[20]); - btf_16_lane_0_1_neon(step1[26], step1[21], c1, &step2[26], &step2[21]); - btf_16_lane_0_1_neon(step1[25], step1[22], c1, &step2[25], &step2[22]); - btf_16_lane_0_1_neon(step1[24], step1[23], c1, &step2[24], &step2[23]); - - step2[0] = vqaddq_s16(step1[0], step1[15]); - step2[1] = vqaddq_s16(step1[1], step1[14]); - step2[2] = vqaddq_s16(step1[2], step1[13]); - step2[3] = vqaddq_s16(step1[3], step1[12]); - step2[4] = vqaddq_s16(step1[4], step1[11]); - step2[5] = vqaddq_s16(step1[5], step1[10]); - step2[6] = vqaddq_s16(step1[6], step1[9]); - step2[7] = vqaddq_s16(step1[7], step1[8]); - step2[8] = vqsubq_s16(step1[7], step1[8]); - step2[9] = vqsubq_s16(step1[6], step1[9]); - step2[10] = vqsubq_s16(step1[5], step1[10]); - step2[11] = vqsubq_s16(step1[4], step1[11]); - step2[12] = vqsubq_s16(step1[3], step1[12]); - step2[13] = vqsubq_s16(step1[2], step1[13]); - step2[14] = vqsubq_s16(step1[1], step1[14]); - step2[15] = vqsubq_s16(step1[0], step1[15]); - step2[16] = step1[16]; - step2[17] = step1[17]; - step2[18] = step1[18]; - step2[19] = step1[19]; - step2[28] = step1[28]; - step2[29] = step1[29]; - step2[30] = step1[30]; - step2[31] = step1[31]; - - // stage 9 - - out[0] = vqaddq_s16(step2[0], step2[31]); - out[1] = vqaddq_s16(step2[1], step2[30]); - out[2] = vqaddq_s16(step2[2], step2[29]); - out[3] = vqaddq_s16(step2[3], step2[28]); - out[4] = vqaddq_s16(step2[4], step2[27]); - out[5] = vqaddq_s16(step2[5], step2[26]); - out[6] = vqaddq_s16(step2[6], step2[25]); - out[7] = vqaddq_s16(step2[7], step2[24]); - out[8] = vqaddq_s16(step2[8], step2[23]); - out[9] = vqaddq_s16(step2[9], step2[22]); - out[10] = vqaddq_s16(step2[10], step2[21]); - out[11] = vqaddq_s16(step2[11], step2[20]); - out[12] = vqaddq_s16(step2[12], step2[19]); - out[13] = vqaddq_s16(step2[13], step2[18]); - out[14] = vqaddq_s16(step2[14], step2[17]); - out[15] = vqaddq_s16(step2[15], step2[16]); - out[16] = vqsubq_s16(step2[15], step2[16]); - out[17] = vqsubq_s16(step2[14], step2[17]); - out[18] = vqsubq_s16(step2[13], step2[18]); - out[19] = vqsubq_s16(step2[12], step2[19]); - out[20] = vqsubq_s16(step2[11], step2[20]); - out[21] = vqsubq_s16(step2[10], step2[21]); - out[22] = vqsubq_s16(step2[9], step2[22]); - out[23] = vqsubq_s16(step2[8], step2[23]); - out[24] = vqsubq_s16(step2[7], step2[24]); - out[25] = vqsubq_s16(step2[6], step2[25]); - out[26] = vqsubq_s16(step2[5], step2[26]); - out[27] = vqsubq_s16(step2[4], step2[27]); - out[28] = vqsubq_s16(step2[3], step2[28]); - out[29] = vqsubq_s16(step2[2], step2[29]); - out[30] = vqsubq_s16(step2[1], step2[30]); - out[31] = vqsubq_s16(step2[0], step2[31]); -} - -static INLINE void idct32_low16_new_neon(int16x8_t *in, int16x8_t *out, - int8_t cos_bit, int bit) { - (void)bit; - const int32_t *cospi = cospi_arr(cos_bit); - int16x8_t step1[32], step2[32]; - int32x4_t t32[16]; - const int16x4_t c0 = - create_s16x4_neon((int16_t *)(cospi + 8), (int16_t *)(cospi + 56), - (int16_t *)(cospi + 40), (int16_t *)(cospi + 24)); - const int16x4_t c1 = - create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32), - (int16_t *)(cospi + 16), (int16_t *)(cospi + 48)); - - // stage 1 - // stage 2 - - btf_16_neon(in[1], cospi[62], cospi[2], &step2[16], &step2[31]); - btf_16_neon(in[15], -cospi[34], cospi[30], &step2[17], &step2[30]); - btf_16_neon(in[9], cospi[46], cospi[18], &step2[18], &step2[29]); - btf_16_neon(in[7], -cospi[50], cospi[14], &step2[19], &step2[28]); - btf_16_neon(in[5], cospi[54], cospi[10], &step2[20], &step2[27]); - btf_16_neon(in[11], -cospi[42], cospi[22], &step2[21], &step2[26]); - btf_16_neon(in[13], cospi[38], cospi[26], &step2[22], &step2[25]); - btf_16_neon(in[3], -cospi[58], cospi[6], &step2[23], &step2[24]); - - step2[0] = in[0]; - step2[2] = in[8]; - step2[4] = in[4]; - step2[6] = in[12]; - step2[8] = in[2]; - step2[10] = in[10]; - step2[12] = in[6]; - step2[14] = in[14]; - - // stage 3 - - btf_16_neon(step2[8], cospi[60], cospi[4], &step1[8], &step1[15]); - btf_16_neon(step2[14], -cospi[36], cospi[28], &step1[9], &step1[14]); - btf_16_neon(step2[10], cospi[44], cospi[20], &step1[10], &step1[13]); - btf_16_neon(step2[12], -cospi[52], cospi[12], &step1[11], &step1[12]); - - step1[0] = step2[0]; - step1[2] = step2[2]; - step1[4] = step2[4]; - step1[6] = step2[6]; - step1[16] = vqaddq_s16(step2[16], step2[17]); - step1[17] = vqsubq_s16(step2[16], step2[17]); - step1[18] = vqsubq_s16(step2[19], step2[18]); - step1[19] = vqaddq_s16(step2[19], step2[18]); - step1[20] = vqaddq_s16(step2[20], step2[21]); - step1[21] = vqsubq_s16(step2[20], step2[21]); - step1[22] = vqsubq_s16(step2[23], step2[22]); - step1[23] = vqaddq_s16(step2[23], step2[22]); - step1[24] = vqaddq_s16(step2[24], step2[25]); - step1[25] = vqsubq_s16(step2[24], step2[25]); - step1[26] = vqsubq_s16(step2[27], step2[26]); - step1[27] = vqaddq_s16(step2[27], step2[26]); - step1[28] = vqaddq_s16(step2[28], step2[29]); - step1[29] = vqsubq_s16(step2[28], step2[29]); - step1[30] = vqsubq_s16(step2[31], step2[30]); - step1[31] = vqaddq_s16(step2[31], step2[30]); - - // stage 4 - - btf_16_neon(step1[4], cospi[56], cospi[8], &step2[4], &step2[7]); - btf_16_neon(step1[6], -cospi[40], cospi[24], &step2[5], &step2[6]); - btf_16_lane_0_1_neon(step1[30], step1[17], c0, &step2[30], &step2[17]); - btf_16_lane_1_0_neon(vnegq_s16(step1[18]), vnegq_s16(step1[29]), c0, - &step2[18], &step2[29]); - btf_16_lane_2_3_neon(step1[26], step1[21], c0, &step2[26], &step2[21]); - btf_16_lane_3_2_neon(vnegq_s16(step1[22]), vnegq_s16(step1[25]), c0, - &step2[22], &step2[25]); - - step2[0] = step1[0]; - step2[2] = step1[2]; - step2[8] = vqaddq_s16(step1[8], step1[9]); - step2[9] = vqsubq_s16(step1[8], step1[9]); - step2[10] = vqsubq_s16(step1[11], step1[10]); - step2[11] = vqaddq_s16(step1[11], step1[10]); - step2[12] = vqaddq_s16(step1[12], step1[13]); - step2[13] = vqsubq_s16(step1[12], step1[13]); - step2[14] = vqsubq_s16(step1[15], step1[14]); - step2[15] = vqaddq_s16(step1[15], step1[14]); - step2[16] = step1[16]; - step2[19] = step1[19]; - step2[20] = step1[20]; - step2[23] = step1[23]; - step2[24] = step1[24]; - step2[27] = step1[27]; - step2[28] = step1[28]; - step2[31] = step1[31]; - - // stage 5 - - t32[0] = vmull_n_s16(vget_low_s16(step2[0]), cospi[32]); - t32[1] = vmull_n_s16(vget_high_s16(step2[0]), cospi[32]); - - step1[0] = vcombine_s16(vrshrn_n_s32(t32[0], INV_COS_BIT), - vrshrn_n_s32(t32[1], INV_COS_BIT)); - - btf_16_neon(step2[2], cospi[48], cospi[16], &step1[2], &step1[3]); - btf_16_lane_2_3_neon(step2[14], step2[9], c1, &step1[14], &step1[9]); - btf_16_lane_3_2_neon(vnegq_s16(step2[10]), vnegq_s16(step2[13]), c1, - &step1[10], &step1[13]); - - step1[4] = vqaddq_s16(step2[4], step2[5]); - step1[5] = vqsubq_s16(step2[4], step2[5]); - step1[6] = vqsubq_s16(step2[7], step2[6]); - step1[7] = vqaddq_s16(step2[7], step2[6]); - step1[8] = step2[8]; - step1[11] = step2[11]; - step1[12] = step2[12]; - step1[15] = step2[15]; - step1[16] = vqaddq_s16(step2[16], step2[19]); - step1[17] = vqaddq_s16(step2[17], step2[18]); - step1[18] = vqsubq_s16(step2[17], step2[18]); - step1[19] = vqsubq_s16(step2[16], step2[19]); - step1[20] = vqsubq_s16(step2[23], step2[20]); - step1[21] = vqsubq_s16(step2[22], step2[21]); - step1[22] = vqaddq_s16(step2[22], step2[21]); - step1[23] = vqaddq_s16(step2[23], step2[20]); - step1[24] = vqaddq_s16(step2[24], step2[27]); - step1[25] = vqaddq_s16(step2[25], step2[26]); - step1[26] = vqsubq_s16(step2[25], step2[26]); - step1[27] = vqsubq_s16(step2[24], step2[27]); - step1[28] = vqsubq_s16(step2[31], step2[28]); - step1[29] = vqsubq_s16(step2[30], step2[29]); - step1[30] = vqaddq_s16(step2[30], step2[29]); - step1[31] = vqaddq_s16(step2[31], step2[28]); - - // stage 6 - - btf_16_lane_0_1_neon(step1[6], step1[5], c1, &step2[6], &step2[5]); - btf_16_lane_2_3_neon(step1[29], step1[18], c1, &step2[29], &step2[18]); - btf_16_lane_2_3_neon(step1[28], step1[19], c1, &step2[28], &step2[19]); - btf_16_lane_3_2_neon(vnegq_s16(step1[20]), vnegq_s16(step1[27]), c1, - &step2[20], &step2[27]); - btf_16_lane_3_2_neon(vnegq_s16(step1[21]), vnegq_s16(step1[26]), c1, - &step2[21], &step2[26]); - - step2[0] = vqaddq_s16(step1[0], step1[3]); - step2[1] = vqaddq_s16(step1[0], step1[2]); - step2[2] = vqsubq_s16(step1[0], step1[2]); - step2[3] = vqsubq_s16(step1[0], step1[3]); - step2[4] = step1[4]; - step2[7] = step1[7]; - step2[8] = vqaddq_s16(step1[8], step1[11]); - step2[9] = vqaddq_s16(step1[9], step1[10]); - step2[10] = vqsubq_s16(step1[9], step1[10]); - step2[11] = vqsubq_s16(step1[8], step1[11]); - step2[12] = vqsubq_s16(step1[15], step1[12]); - step2[13] = vqsubq_s16(step1[14], step1[13]); - step2[14] = vqaddq_s16(step1[14], step1[13]); - step2[15] = vqaddq_s16(step1[15], step1[12]); - step2[16] = step1[16]; - step2[17] = step1[17]; - step2[22] = step1[22]; - step2[23] = step1[23]; - step2[24] = step1[24]; - step2[25] = step1[25]; - step2[30] = step1[30]; - step2[31] = step1[31]; - - // stage 7 - - btf_16_lane_0_1_neon(step2[13], step2[10], c1, &step1[13], &step1[10]); - btf_16_lane_0_1_neon(step2[12], step2[11], c1, &step1[12], &step1[11]); - - step1[0] = vqaddq_s16(step2[0], step2[7]); - step1[1] = vqaddq_s16(step2[1], step2[6]); - step1[2] = vqaddq_s16(step2[2], step2[5]); - step1[3] = vqaddq_s16(step2[3], step2[4]); - step1[4] = vqsubq_s16(step2[3], step2[4]); - step1[5] = vqsubq_s16(step2[2], step2[5]); - step1[6] = vqsubq_s16(step2[1], step2[6]); - step1[7] = vqsubq_s16(step2[0], step2[7]); - step1[8] = step2[8]; - step1[9] = step2[9]; - step1[14] = step2[14]; - step1[15] = step2[15]; - step1[16] = vqaddq_s16(step2[16], step2[23]); - step1[17] = vqaddq_s16(step2[17], step2[22]); - step1[18] = vqaddq_s16(step2[18], step2[21]); - step1[19] = vqaddq_s16(step2[19], step2[20]); - step1[20] = vqsubq_s16(step2[19], step2[20]); - step1[21] = vqsubq_s16(step2[18], step2[21]); - step1[22] = vqsubq_s16(step2[17], step2[22]); - step1[23] = vqsubq_s16(step2[16], step2[23]); - step1[24] = vqsubq_s16(step2[31], step2[24]); - step1[25] = vqsubq_s16(step2[30], step2[25]); - step1[26] = vqsubq_s16(step2[29], step2[26]); - step1[27] = vqsubq_s16(step2[28], step2[27]); - step1[28] = vqaddq_s16(step2[27], step2[28]); - step1[29] = vqaddq_s16(step2[26], step2[29]); - step1[30] = vqaddq_s16(step2[25], step2[30]); - step1[31] = vqaddq_s16(step2[24], step2[31]); - - // stage 8 - - btf_16_lane_0_1_neon(step1[27], step1[20], c1, &step2[27], &step2[20]); - btf_16_lane_0_1_neon(step1[26], step1[21], c1, &step2[26], &step2[21]); - btf_16_lane_0_1_neon(step1[25], step1[22], c1, &step2[25], &step2[22]); - btf_16_lane_0_1_neon(step1[24], step1[23], c1, &step2[24], &step2[23]); - - step2[0] = vqaddq_s16(step1[0], step1[15]); - step2[1] = vqaddq_s16(step1[1], step1[14]); - step2[2] = vqaddq_s16(step1[2], step1[13]); - step2[3] = vqaddq_s16(step1[3], step1[12]); - step2[4] = vqaddq_s16(step1[4], step1[11]); - step2[5] = vqaddq_s16(step1[5], step1[10]); - step2[6] = vqaddq_s16(step1[6], step1[9]); - step2[7] = vqaddq_s16(step1[7], step1[8]); - step2[8] = vqsubq_s16(step1[7], step1[8]); - step2[9] = vqsubq_s16(step1[6], step1[9]); - step2[10] = vqsubq_s16(step1[5], step1[10]); - step2[11] = vqsubq_s16(step1[4], step1[11]); - step2[12] = vqsubq_s16(step1[3], step1[12]); - step2[13] = vqsubq_s16(step1[2], step1[13]); - step2[14] = vqsubq_s16(step1[1], step1[14]); - step2[15] = vqsubq_s16(step1[0], step1[15]); - step2[16] = step1[16]; - step2[17] = step1[17]; - step2[18] = step1[18]; - step2[19] = step1[19]; - step2[28] = step1[28]; - step2[29] = step1[29]; - step2[30] = step1[30]; - step2[31] = step1[31]; - - // stage 9 - - out[0] = vqaddq_s16(step2[0], step2[31]); - out[1] = vqaddq_s16(step2[1], step2[30]); - out[2] = vqaddq_s16(step2[2], step2[29]); - out[3] = vqaddq_s16(step2[3], step2[28]); - out[4] = vqaddq_s16(step2[4], step2[27]); - out[5] = vqaddq_s16(step2[5], step2[26]); - out[6] = vqaddq_s16(step2[6], step2[25]); - out[7] = vqaddq_s16(step2[7], step2[24]); - out[8] = vqaddq_s16(step2[8], step2[23]); - out[9] = vqaddq_s16(step2[9], step2[22]); - out[10] = vqaddq_s16(step2[10], step2[21]); - out[11] = vqaddq_s16(step2[11], step2[20]); - out[12] = vqaddq_s16(step2[12], step2[19]); - out[13] = vqaddq_s16(step2[13], step2[18]); - out[14] = vqaddq_s16(step2[14], step2[17]); - out[15] = vqaddq_s16(step2[15], step2[16]); - out[16] = vqsubq_s16(step2[15], step2[16]); - out[17] = vqsubq_s16(step2[14], step2[17]); - out[18] = vqsubq_s16(step2[13], step2[18]); - out[19] = vqsubq_s16(step2[12], step2[19]); - out[20] = vqsubq_s16(step2[11], step2[20]); - out[21] = vqsubq_s16(step2[10], step2[21]); - out[22] = vqsubq_s16(step2[9], step2[22]); - out[23] = vqsubq_s16(step2[8], step2[23]); - out[24] = vqsubq_s16(step2[7], step2[24]); - out[25] = vqsubq_s16(step2[6], step2[25]); - out[26] = vqsubq_s16(step2[5], step2[26]); - out[27] = vqsubq_s16(step2[4], step2[27]); - out[28] = vqsubq_s16(step2[3], step2[28]); - out[29] = vqsubq_s16(step2[2], step2[29]); - out[30] = vqsubq_s16(step2[1], step2[30]); - out[31] = vqsubq_s16(step2[0], step2[31]); -} - -// Functions for blocks with eob at DC and within -// topleft 8x8, 16x16, 32x32 corner -static const transform_1d_neon - lowbd_txfm_all_1d_zeros_w8_arr[TX_SIZES][ITX_TYPES_1D][4] = { - { - { av1_idct4_new, av1_idct4_new, NULL, NULL }, - { av1_iadst4_new, av1_iadst4_new, NULL, NULL }, - { av1_iidentity4_c, av1_iidentity4_c, NULL, NULL }, - }, - { { av1_idct8_new, av1_idct8_new, NULL, NULL }, - { av1_iadst8_new, av1_iadst8_new, NULL, NULL }, - { av1_iidentity8_c, av1_iidentity8_c, NULL, NULL } }, - { - { av1_idct16_new, av1_idct16_new, av1_idct16_new, NULL }, - { av1_iadst16_new, av1_iadst16_new, av1_iadst16_new, NULL }, - { av1_iidentity16_c, av1_iidentity16_c, av1_iidentity16_c, NULL }, - }, - { { av1_idct32_new, av1_idct32_new, av1_idct32_new, av1_idct32_new }, - { NULL, NULL, NULL, NULL }, - { av1_iidentity32_c, av1_iidentity32_c, av1_iidentity32_c, - av1_iidentity32_c } }, - { { av1_idct64_new, av1_idct64_new, av1_idct64_new, av1_idct64_new }, - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL } } - }; - -static const transform_neon - lowbd_txfm_all_1d_zeros_w_arr[TX_SIZES][ITX_TYPES_1D][4] = { - { - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL }, - }, - { { idct8_low1_new_neon, idct8_new_neon, NULL, NULL }, - { iadst8_low1_new_neon, iadst8_new_neon, NULL, NULL }, - { identity8_new_neon, identity8_new_neon, NULL, NULL } }, - { - { idct16_low1_new_neon, idct16_low8_new_neon, idct16_new_neon, NULL }, - { iadst16_low1_new_neon, iadst16_low8_new_neon, iadst16_new_neon, - NULL }, - { identity16_new_neon, identity16_new_neon, identity16_new_neon, - NULL }, - }, - { { idct32_low1_new_neon, idct32_low8_new_neon, idct32_low16_new_neon, - idct32_new_neon }, - { NULL, NULL, NULL, NULL }, - { identity32_new_neon, identity32_new_neon, identity32_new_neon, - identity32_new_neon } }, - { { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL } } - }; - -static INLINE void lowbd_inv_txfm2d_add_wxh_idtx_neon( - const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]); - int32_t *temp_in = txfm_buf; - - int eobx, eoby; - get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob); - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3; - - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col); - - int32_t *temp_out = temp_in + buf_offset; - int32_t *buf = temp_out + buf_offset; - int32_t *buf_ptr = buf; - const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 }; - int r, bd = 8; - - const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx]; - const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby]; - const transform_1d_neon row_txfm = - lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x]; - const transform_1d_neon col_txfm = - lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y]; - - assert(col_txfm != NULL); - assert(row_txfm != NULL); - - // row tx - int row_start = (buf_size_nonzero_h_div8 * 8); - for (int i = 0; i < row_start; i++) { - if (abs(rect_type) == 1) { - for (int j = 0; j < txfm_size_col; j++) - temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits); - row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range); - } else { - row_txfm(input, buf_ptr, cos_bit_row, stage_range); - } - av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]); - input += txfm_size_col; - buf_ptr += txfm_size_col; - } - - // Doing memset for the rows which are not processed in row transform. - memset(buf_ptr, 0, - sizeof(int32_t) * txfm_size_col * (txfm_size_row - row_start)); - - // col tx - for (int c = 0; c < txfm_size_col; c++) { - for (r = 0; r < txfm_size_row; ++r) temp_in[r] = buf[r * txfm_size_col + c]; - - col_txfm(temp_in, temp_out, cos_bit_col, stage_range); - av1_round_shift_array(temp_out, txfm_size_row, -shift[1]); - - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = - highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd); - } - } -} - -static INLINE void lowbd_inv_txfm2d_add_idtx_neon(const int32_t *input, - uint8_t *output, int stride, - TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - int16x8_t a[32 * 4]; - int16x8_t b[32 * 4]; - int eobx, eoby; - get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob); - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - lowbd_inv_txfm2d_memset_neon(&a[0], (txfm_size_col * (txfm_size_row) >> 3), - 0); - lowbd_inv_txfm2d_memset_neon(&b[0], (txfm_size_col * (txfm_size_row) >> 3), - 0); - const int buf_size_w_div8 = txfm_size_col >> 3; - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3; - const int buf_size_nonzero_w_div8 = (eobx + 8) >> 3; - const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx]; - const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby]; - const int32_t *input_1; - int temp_b = 0; - const transform_neon row_txfm = - lowbd_txfm_all_1d_zeros_w_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x]; - const transform_neon col_txfm = - lowbd_txfm_all_1d_zeros_w_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y]; - - assert(col_txfm != NULL); - assert(row_txfm != NULL); - - for (int i = 0; i < buf_size_nonzero_h_div8; i++) { - input_1 = input; - for (int j = 0; j < buf_size_nonzero_w_div8; ++j) { - int k = j * 8 + i * txfm_size_col; - load_buffer_32bit_to_16bit_neon(input_1, &a[k], txfm_size_col); - transpose_s16_8x8q(&a[k], &a[k]); - input_1 += 8; - } - input += (txfm_size_col * 8); - if (abs(rect_type) == 1) { - int y = i * txfm_size_col; - round_shift_for_rect(&a[y], &a[y], txfm_size_col); - } - row_txfm(&a[i * txfm_size_col], &a[i * txfm_size_col], cos_bit_row, 0); - av1_round_shift_array_16_neon(&a[i * txfm_size_col], txfm_size_col, - -shift[0]); - for (int j = 0; j < buf_size_w_div8; ++j) { - int k = j * 8 + i * txfm_size_col; - transpose_s16_8x8q(&a[k], &b[temp_b + txfm_size_row * j]); - } - temp_b += 8; - } - for (int j = 0; j < buf_size_w_div8; ++j) { - col_txfm(&b[j * txfm_size_row], &b[j * txfm_size_row], cos_bit_col, 0); - av1_round_shift_array_16_neon(&b[j * txfm_size_row], txfm_size_row, - -shift[1]); - } - if (txfm_size_col >= 16) { - for (int i = 0; i < (txfm_size_col >> 4); i++) { - lowbd_add_flip_buffer_16xn_neon( - &b[i * txfm_size_row * 2], output + 16 * i, stride, 0, txfm_size_row); - } - } else if (txfm_size_col == 8) { - lowbd_add_flip_buffer_8xn_neon(b, output, stride, 0, txfm_size_row); - } -} - -static INLINE void lowbd_inv_txfm2d_add_v_wxh_identity_neon( - const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]); - int32_t *temp_in = txfm_buf; - - int eobx, eoby; - get_eobx_eoby_scan_v_identity(&eobx, &eoby, tx_size, eob); - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3; - - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col); - - int32_t *temp_out = temp_in + buf_offset; - int32_t *buf = temp_out + buf_offset; - int32_t *buf_ptr = buf; - const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 }; - int r, bd = 8; - - const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx]; - const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby]; - const transform_1d_neon row_txfm = - lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x]; - const transform_1d_neon col_txfm = - lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y]; - - assert(col_txfm != NULL); - assert(row_txfm != NULL); - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - // row tx - int row_start = (buf_size_nonzero_h_div8 * 8); - for (int i = 0; i < row_start; i++) { - if (abs(rect_type) == 1) { - for (int j = 0; j < txfm_size_col; j++) - temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits); - row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range); - } else { - row_txfm(input, buf_ptr, cos_bit_row, stage_range); - } - av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]); - input += txfm_size_col; - buf_ptr += txfm_size_col; - } - // Doing memset for the rows which are not processed in row transform. - memset(buf_ptr, 0, - sizeof(int32_t) * txfm_size_col * (txfm_size_row - row_start)); - - // col tx - for (int c = 0; c < txfm_size_col; c++) { - if (lr_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + c]; - } else { - // flip left right - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)]; - } - col_txfm(temp_in, temp_out, cos_bit_col, stage_range); - av1_round_shift_array(temp_out, txfm_size_row, -shift[1]); - - if (ud_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = - highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd); - } - } else { - // flip upside down - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = highbd_clip_pixel_add( - output[r * stride + c], temp_out[txfm_size_row - r - 1], bd); - } - } - } -} - -static INLINE void lowbd_inv_txfm2d_add_v_identity_neon( - const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - int16x8_t a[16 * 2]; - int16x8_t b[16 * 2]; - int eobx, eoby, ud_flip, lr_flip; - get_eobx_eoby_scan_v_identity(&eobx, &eoby, tx_size, eob); - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - lowbd_inv_txfm2d_memset_neon(&b[0], (txfm_size_col * (txfm_size_row) >> 3), - 0); - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - const int buf_size_w_div8 = txfm_size_col >> 3; - const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3; - const int buf_size_nonzero_w_div8 = (eobx + 8) >> 3; - const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx]; - const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby]; - const int32_t *input_1; - int temp_b = 0; - const transform_neon row_txfm = - lowbd_txfm_all_1d_zeros_w_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x]; - const transform_neon col_txfm = - lowbd_txfm_all_1d_zeros_w_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y]; - - assert(col_txfm != NULL); - assert(row_txfm != NULL); - - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - for (int i = 0; i < buf_size_nonzero_h_div8; i++) { - input_1 = input; - for (int j = 0; j < buf_size_nonzero_w_div8; ++j) { - int k = j * 8 + i * txfm_size_col; - load_buffer_32bit_to_16bit_neon(input_1, &a[k], txfm_size_col); - transpose_s16_8x8q(&a[k], &a[k]); - input_1 += 8; - } - input += (txfm_size_col * 8); - if (abs(rect_type) == 1) { - int y = i * txfm_size_col; - round_shift_for_rect(&a[y], &a[y], txfm_size_col); - } - row_txfm(&a[i * txfm_size_col], &a[i * txfm_size_col], cos_bit_row, 0); - av1_round_shift_array_16_neon(&a[i * txfm_size_col], txfm_size_col, - -shift[0]); - if (lr_flip == 1) { - for (int j = 0; j < buf_size_w_div8; ++j) { - int k = j * 8 + i * txfm_size_col; - flip_buf_ud_neon(&a[k], 8); - transpose_s16_8x8q( - &a[k], &b[temp_b + txfm_size_row * (buf_size_w_div8 - 1 - j)]); - } - temp_b += 8; - } else { - for (int j = 0; j < buf_size_w_div8; ++j) { - int k = j * 8 + i * txfm_size_col; - transpose_s16_8x8q(&a[k], &b[temp_b + txfm_size_row * j]); - } - temp_b += 8; - } - } - for (int j = 0; j < buf_size_w_div8; ++j) { - col_txfm(&b[j * txfm_size_row], &b[j * txfm_size_row], cos_bit_col, 0); - av1_round_shift_array_16_neon(&b[j * txfm_size_row], txfm_size_row, - -shift[1]); - } - if (txfm_size_col >= 16) { - for (int i = 0; i < (txfm_size_col >> 4); i++) { - lowbd_add_flip_buffer_16xn_neon( - &b[i * txfm_size_row * 2], output + 16 * i, stride, 0, txfm_size_row); - } - } else if (txfm_size_col == 8) { - lowbd_add_flip_buffer_8xn_neon(b, output, stride, 0, txfm_size_row); - } -} - -static INLINE void lowbd_inv_txfm2d_add_h_wxh_identity_neon( - const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]); - int32_t *temp_in = txfm_buf; - - int eobx, eoby; - get_eobx_eoby_scan_h_identity(&eobx, &eoby, tx_size, eob); - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3; - - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col); - - int32_t *temp_out = temp_in + buf_offset; - int32_t *buf = temp_out + buf_offset; - int32_t *buf_ptr = buf; - const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 }; - int r, bd = 8; - - const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx]; - const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby]; - const transform_1d_neon row_txfm = - lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x]; - const transform_1d_neon col_txfm = - lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y]; - - assert(col_txfm != NULL); - assert(row_txfm != NULL); - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - // row tx - int row_start = (buf_size_nonzero_h_div8 * 8); - for (int i = 0; i < row_start; i++) { - if (abs(rect_type) == 1) { - for (int j = 0; j < txfm_size_col; j++) - temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits); - row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range); - } else { - row_txfm(input, buf_ptr, cos_bit_row, stage_range); - } - av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]); - input += txfm_size_col; - buf_ptr += txfm_size_col; - } - // Doing memset for the rows which are not processed in row transform. - memset(buf_ptr, 0, - sizeof(int32_t) * txfm_size_col * (txfm_size_row - row_start)); - - // col tx - for (int c = 0; c < txfm_size_col; c++) { - if (lr_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + c]; - } else { - // flip left right - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)]; - } - col_txfm(temp_in, temp_out, cos_bit_col, stage_range); - av1_round_shift_array(temp_out, txfm_size_row, -shift[1]); - - if (ud_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = - highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd); - } - } else { - // flip upside down - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = highbd_clip_pixel_add( - output[r * stride + c], temp_out[txfm_size_row - r - 1], bd); - } - } - } -} - -static INLINE void lowbd_inv_txfm2d_add_h_identity_neon( - const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - int16x8_t a[16 * 2]; - int16x8_t b[16 * 2]; - int eobx, eoby, ud_flip, lr_flip; - get_eobx_eoby_scan_h_identity(&eobx, &eoby, tx_size, eob); - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - lowbd_inv_txfm2d_memset_neon(&a[0], (txfm_size_col * (txfm_size_row) >> 3), - 0); - const int buf_size_w_div8 = txfm_size_col >> 3; - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3; - const int buf_size_nonzero_w_div8 = (eobx + 8) >> 3; - const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx]; - const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby]; - const int32_t *input_1; - int temp_b = 0; - const transform_neon row_txfm = - lowbd_txfm_all_1d_zeros_w_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x]; - const transform_neon col_txfm = - lowbd_txfm_all_1d_zeros_w_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y]; - - assert(col_txfm != NULL); - assert(row_txfm != NULL); - - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - for (int i = 0; i < buf_size_nonzero_h_div8; i++) { - input_1 = input; - for (int j = 0; j < buf_size_nonzero_w_div8; ++j) { - int k = j * 8 + i * txfm_size_col; - load_buffer_32bit_to_16bit_neon(input_1, &a[k], txfm_size_col); - transpose_s16_8x8q(&a[k], &a[k]); - input_1 += 8; - } - input += (txfm_size_col * 8); - if (abs(rect_type) == 1) { - int y = i * txfm_size_col; - round_shift_for_rect(&a[y], &a[y], txfm_size_col); - } - row_txfm(&a[i * txfm_size_col], &a[i * txfm_size_col], cos_bit_row, 0); - av1_round_shift_array_16_neon(&a[i * txfm_size_col], txfm_size_col, - -shift[0]); - for (int j = 0; j < buf_size_w_div8; ++j) { - int k = j * 8 + i * txfm_size_col; - transpose_s16_8x8q(&a[k], &b[temp_b + txfm_size_row * j]); - } - temp_b += 8; - } - for (int j = 0; j < buf_size_w_div8; ++j) { - col_txfm(&b[j * txfm_size_row], &b[j * txfm_size_row], cos_bit_col, 0); - av1_round_shift_array_16_neon(&b[j * txfm_size_row], txfm_size_row, - -shift[1]); - } - if (txfm_size_col >= 16) { - for (int i = 0; i < (txfm_size_col >> 4); i++) { - lowbd_add_flip_buffer_16xn_neon(&b[i * txfm_size_row * 2], - output + 16 * i, stride, ud_flip, - txfm_size_row); - } - } else if (txfm_size_col == 8) { - lowbd_add_flip_buffer_8xn_neon(b, output, stride, ud_flip, txfm_size_row); - } -} - -static INLINE void lowbd_inv_txfm2d_add_4x4_neon(const int32_t *input, - uint8_t *output, int stride, - TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - (void)eob; - DECLARE_ALIGNED(32, int, txfm_buf[4 * 4 + 8 + 8]); - int32_t *temp_in = txfm_buf; - - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col); - int32_t *temp_out = temp_in + buf_offset; - int32_t *buf = temp_out + buf_offset; - int32_t *buf_ptr = buf; - const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 }; - int r, bd = 8; - const transform_1d_neon row_txfm = - lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]]; - const transform_1d_neon col_txfm = - lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]]; - - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - for (int i = 0; i < txfm_size_row; i++) { - row_txfm(input, buf_ptr, cos_bit_row, stage_range); - - input += txfm_size_col; - buf_ptr += txfm_size_col; - } - - for (int c = 0; c < txfm_size_col; ++c) { - if (lr_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + c]; - } else { - // flip left right - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)]; - } - col_txfm(temp_in, temp_out, cos_bit_col, stage_range); - av1_round_shift_array(temp_out, txfm_size_row, -shift[1]); - - if (ud_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = - highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd); - } - } else { - // flip upside down - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = highbd_clip_pixel_add( - output[r * stride + c], temp_out[txfm_size_row - r - 1], bd); - } - } - } -} - -void lowbd_inv_txfm2d_add_4x8_neon(const int32_t *input, uint8_t *output, - int stride, TX_TYPE tx_type, TX_SIZE tx_size, - int eob) { - (void)eob; - DECLARE_ALIGNED(32, int, txfm_buf[4 * 8 + 8 + 8]); - int32_t *temp_in = txfm_buf; - - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col); - int32_t *temp_out = temp_in + buf_offset; - int32_t *buf = temp_out + buf_offset; - int32_t *buf_ptr = buf; - const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 }; - int r, bd = 8; - const transform_1d_neon row_txfm = - lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]]; - const transform_1d_neon col_txfm = - lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]]; - - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - for (int i = 0; i < txfm_size_row; i++) { - for (int j = 0; j < txfm_size_col; j++) - temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits); - - row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range); - input += txfm_size_col; - buf_ptr += txfm_size_col; - } - - for (int c = 0; c < txfm_size_col; ++c) { - if (lr_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + c]; - } else { - // flip left right - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)]; - } - col_txfm(temp_in, temp_out, cos_bit_col, stage_range); - av1_round_shift_array(temp_out, txfm_size_row, -shift[1]); - - if (ud_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = - highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd); - } - } else { - // flip upside down - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = highbd_clip_pixel_add( - output[r * stride + c], temp_out[txfm_size_row - r - 1], bd); - } - } - } -} - -void lowbd_inv_txfm2d_add_8x4_neon(const int32_t *input, uint8_t *output, - int stride, TX_TYPE tx_type, TX_SIZE tx_size, - int eob) { - (void)eob; - DECLARE_ALIGNED(32, int, txfm_buf[8 * 4 + 8 + 8]); - int32_t *temp_in = txfm_buf; - - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col); - int32_t *temp_out = temp_in + buf_offset; - int32_t *buf = temp_out + buf_offset; - int32_t *buf_ptr = buf; - const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 }; - int r, bd = 8; - const transform_1d_neon row_txfm = - lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]]; - const transform_1d_neon col_txfm = - lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]]; - - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - for (int i = 0; i < txfm_size_row; i++) { - for (int j = 0; j < txfm_size_col; j++) - temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits); - - row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range); - input += txfm_size_col; - buf_ptr += txfm_size_col; - } - - for (int c = 0; c < txfm_size_col; ++c) { - if (lr_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + c]; - } else { - // flip left right - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)]; - } - col_txfm(temp_in, temp_out, cos_bit_col, stage_range); - av1_round_shift_array(temp_out, txfm_size_row, -shift[1]); - - if (ud_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = - highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd); - } - } else { - // flip upside down - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = highbd_clip_pixel_add( - output[r * stride + c], temp_out[txfm_size_row - r - 1], bd); - } - } - } -} - -void lowbd_inv_txfm2d_add_4x16_neon(const int32_t *input, uint8_t *output, - int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - (void)eob; - DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]); - int32_t *temp_in = txfm_buf; - - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col); - int32_t *temp_out = temp_in + buf_offset; - int32_t *buf = temp_out + buf_offset; - int32_t *buf_ptr = buf; - const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 }; - int r, bd = 8; - const transform_1d_neon row_txfm = - lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]]; - const transform_1d_neon col_txfm = - lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]]; - - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - for (int i = 0; i < txfm_size_row; i++) { - row_txfm(input, buf_ptr, cos_bit_row, stage_range); - av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]); - input += txfm_size_col; - buf_ptr += txfm_size_col; - } - - for (int c = 0; c < txfm_size_col; ++c) { - if (lr_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + c]; - } else { - // flip left right - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)]; - } - col_txfm(temp_in, temp_out, cos_bit_col, stage_range); - av1_round_shift_array(temp_out, txfm_size_row, -shift[1]); - - if (ud_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = - highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd); - } - } else { - // flip upside down - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = highbd_clip_pixel_add( - output[r * stride + c], temp_out[txfm_size_row - r - 1], bd); - } - } - } -} - -void lowbd_inv_txfm2d_add_16x4_neon(const int32_t *input, uint8_t *output, - int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - (void)eob; - - DECLARE_ALIGNED(32, int, txfm_buf[16 * 4 + 16 + 16]); - int32_t *temp_in = txfm_buf; - - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col); - int32_t *temp_out = temp_in + buf_offset; - int32_t *buf = temp_out + buf_offset; - int32_t *buf_ptr = buf; - const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 }; - int r, bd = 8; - const transform_1d_neon row_txfm = - lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]]; - const transform_1d_neon col_txfm = - lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]]; - - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - for (int i = 0; i < txfm_size_row; i++) { - row_txfm(input, buf_ptr, cos_bit_row, stage_range); - av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]); - input += txfm_size_col; - buf_ptr += txfm_size_col; - } - - for (int c = 0; c < txfm_size_col; ++c) { - if (lr_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + c]; - } else { - // flip left right - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)]; - } - col_txfm(temp_in, temp_out, cos_bit_col, stage_range); - av1_round_shift_array(temp_out, txfm_size_row, -shift[1]); - - if (ud_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = - highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd); - } - } else { - // flip upside down - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = highbd_clip_pixel_add( - output[r * stride + c], temp_out[txfm_size_row - r - 1], bd); - } - } - } -} - -static INLINE void lowbd_inv_txfm2d_add_wxh_no_identity_neon( - const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - DECLARE_ALIGNED(32, int, txfm_buf[64 * 64 + 64 + 64]); - int32_t *temp_in = txfm_buf; - - int eobx, eoby, ud_flip, lr_flip, row_start; - get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob); - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3; - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col); - - int32_t *temp_out = temp_in + buf_offset; - int32_t *buf = temp_out + buf_offset; - int32_t *buf_ptr = buf; - const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 }; - const int bd = 8; - int r; - - const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx]; - const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby]; - const transform_1d_neon row_txfm = - lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x]; - const transform_1d_neon col_txfm = - lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y]; - - assert(col_txfm != NULL); - assert(row_txfm != NULL); - - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - row_start = (buf_size_nonzero_h_div8 << 3); - - for (int i = 0; i < row_start; i++) { - if (abs(rect_type) == 1) { - for (int j = 0; j < txfm_size_col; j++) - temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits); - row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range); - } else { - row_txfm(input, buf_ptr, cos_bit_row, stage_range); - } - av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]); - input += txfm_size_col; - buf_ptr += txfm_size_col; - } - - // Doing memset for the rows which are not processed in row transform. - memset(buf_ptr, 0, - sizeof(int32_t) * txfm_size_col * (txfm_size_row - row_start)); - - for (int c = 0; c < txfm_size_col; c++) { - if (lr_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + c]; - } else { - // flip left right - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)]; - } - col_txfm(temp_in, temp_out, cos_bit_col, stage_range); - av1_round_shift_array(temp_out, txfm_size_row, -shift[1]); - - if (ud_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = - highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd); - } - } else { - // flip upside down - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = highbd_clip_pixel_add( - output[r * stride + c], temp_out[txfm_size_row - r - 1], bd); - } - } - } -} - -static INLINE void lowbd_inv_txfm2d_add_no_identity_neon( - const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - int16x8_t a[64 * 8]; - int16x8_t b[64 * 8]; - int eobx, eoby, ud_flip, lr_flip; - get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob); - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - const int buf_size_w_div8 = txfm_size_col >> 3; - const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3; - const int buf_size_nonzero_w_div8 = (eobx + 8) >> 3; - const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx]; - const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby]; - const int32_t *input_1; - int temp_b = 0; - - const transform_neon row_txfm = - lowbd_txfm_all_1d_zeros_w_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x]; - const transform_neon col_txfm = - lowbd_txfm_all_1d_zeros_w_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y]; - - assert(col_txfm != NULL); - assert(row_txfm != NULL); - - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - for (int i = 0; i < buf_size_nonzero_h_div8; i++) { - input_1 = input; - for (int j = 0; j < buf_size_nonzero_w_div8; ++j) { - int k = j * 8 + i * txfm_size_col; - load_buffer_32bit_to_16bit_neon(input_1, &a[k], txfm_size_col); - transpose_s16_8x8q(&a[k], &a[k]); - input_1 += 8; - } - input += (txfm_size_col * 8); - if (abs(rect_type) == 1) { - int y = i * txfm_size_col; - round_shift_for_rect(&a[y], &a[y], txfm_size_col); - } - row_txfm(&a[i * txfm_size_col], &a[i * txfm_size_col], cos_bit_row, 0); - av1_round_shift_array_16_neon(&a[i * txfm_size_col], txfm_size_col, - -shift[0]); - if (lr_flip == 1) { - for (int j = 0; j < buf_size_w_div8; ++j) { - int k = j * 8 + i * txfm_size_col; - flip_buf_ud_neon(&a[k], 8); - transpose_s16_8x8q( - &a[k], &b[temp_b + txfm_size_row * (buf_size_w_div8 - 1 - j)]); - } - temp_b += 8; - } else { - for (int j = 0; j < buf_size_w_div8; ++j) { - int k = j * 8 + i * txfm_size_col; - transpose_s16_8x8q(&a[k], &b[temp_b + txfm_size_row * j]); - } - temp_b += 8; - } - } - for (int j = 0; j < buf_size_w_div8; ++j) { - col_txfm(&b[j * txfm_size_row], &b[j * txfm_size_row], cos_bit_col, 0); - av1_round_shift_array_16_neon(&b[j * txfm_size_row], txfm_size_row, - -shift[1]); - } - - if (txfm_size_col >= 16) { - for (int i = 0; i < (txfm_size_col >> 4); i++) { - lowbd_add_flip_buffer_16xn_neon(&b[i * txfm_size_row * 2], - output + 16 * i, stride, ud_flip, - txfm_size_row); - } - } else if (txfm_size_col == 8) { - lowbd_add_flip_buffer_8xn_neon(b, output, stride, ud_flip, txfm_size_row); - } -} - -static INLINE void lowbd_inv_txfm2d_add_wxh_universe_neon( - const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - switch (tx_type) { - case IDTX: - lowbd_inv_txfm2d_add_wxh_idtx_neon(input, output, stride, tx_type, - tx_size, eob); - break; - - case H_DCT: - case H_ADST: - case H_FLIPADST: - lowbd_inv_txfm2d_add_v_wxh_identity_neon(input, output, stride, tx_type, - tx_size, eob); - break; - - case V_DCT: - case V_ADST: - case V_FLIPADST: - lowbd_inv_txfm2d_add_h_wxh_identity_neon(input, output, stride, tx_type, - tx_size, eob); - break; - - default: - lowbd_inv_txfm2d_add_wxh_no_identity_neon(input, output, stride, tx_type, - tx_size, eob); - break; - } -} - -static INLINE void lowbd_inv_txfm2d_add_universe_neon( - const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - switch (tx_type) { - case IDTX: - lowbd_inv_txfm2d_add_idtx_neon(input, output, stride, tx_type, tx_size, - eob); - break; - - case H_DCT: - case H_ADST: - case H_FLIPADST: - lowbd_inv_txfm2d_add_v_identity_neon(input, output, stride, tx_type, - tx_size, eob); - break; - - case V_DCT: - case V_ADST: - case V_FLIPADST: - lowbd_inv_txfm2d_add_h_identity_neon(input, output, stride, tx_type, - tx_size, eob); - break; - - default: - lowbd_inv_txfm2d_add_no_identity_neon(input, output, stride, tx_type, - tx_size, eob); - break; - } -} - -void av1_lowbd_inv_txfm2d_add_neon(const int32_t *input, uint8_t *output, - int stride, TX_TYPE tx_type, TX_SIZE tx_size, - int eob) { - int row; - switch (tx_size) { - case TX_4X4: - lowbd_inv_txfm2d_add_4x4_neon(input, output, stride, tx_type, tx_size, - eob); - break; - - case TX_4X8: - lowbd_inv_txfm2d_add_4x8_neon(input, output, stride, tx_type, tx_size, - eob); - break; - - case TX_8X4: - lowbd_inv_txfm2d_add_8x4_neon(input, output, stride, tx_type, tx_size, - eob); - break; - - case TX_4X16: - lowbd_inv_txfm2d_add_4x16_neon(input, output, stride, tx_type, tx_size, - eob); - break; - - case TX_16X4: - lowbd_inv_txfm2d_add_16x4_neon(input, output, stride, tx_type, tx_size, - eob); - break; - - case TX_16X64: { - lowbd_inv_txfm2d_add_wxh_universe_neon(input, output, stride, tx_type, - tx_size, eob); - } break; - - case TX_64X16: { - int32_t mod_input[64 * 16]; - for (row = 0; row < 16; ++row) { - memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input)); - memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input)); - } - lowbd_inv_txfm2d_add_wxh_universe_neon(mod_input, output, stride, tx_type, - tx_size, eob); - } break; - - case TX_32X64: { - lowbd_inv_txfm2d_add_wxh_universe_neon(input, output, stride, tx_type, - tx_size, eob); - } break; - - case TX_64X32: { - int32_t mod_input[64 * 32]; - for (row = 0; row < 32; ++row) { - memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input)); - memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input)); - } - lowbd_inv_txfm2d_add_wxh_universe_neon(mod_input, output, stride, tx_type, - tx_size, eob); - } break; - - case TX_64X64: { - int32_t mod_input[64 * 64]; - for (row = 0; row < 32; ++row) { - memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input)); - memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input)); - } - lowbd_inv_txfm2d_add_wxh_universe_neon(mod_input, output, stride, tx_type, - tx_size, eob); - } break; - - default: - lowbd_inv_txfm2d_add_universe_neon(input, output, stride, tx_type, - tx_size, eob); - break; - } -} -void av1_inv_txfm_add_neon(const tran_low_t *dqcoeff, uint8_t *dst, int stride, - const TxfmParam *txfm_param) { - const TX_TYPE tx_type = txfm_param->tx_type; - if (!txfm_param->lossless) { - av1_lowbd_inv_txfm2d_add_neon(dqcoeff, dst, stride, tx_type, - txfm_param->tx_size, txfm_param->eob); - } else { - av1_inv_txfm_add_c(dqcoeff, dst, stride, txfm_param); - } -} diff --git a/third_party/aom/av1/common/arm/av1_inv_txfm_neon.h b/third_party/aom/av1/common/arm/av1_inv_txfm_neon.h deleted file mode 100644 index 9ec658291..000000000 --- a/third_party/aom/av1/common/arm/av1_inv_txfm_neon.h +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#ifndef AOM_AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_ -#define AOM_AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_ - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" - -#include "aom/aom_integer.h" -#include "av1/common/enums.h" -#include "av1/common/av1_inv_txfm1d.h" -#include "av1/common/av1_inv_txfm1d_cfg.h" -#include "av1/common/av1_txfm.h" - -typedef void (*transform_1d_neon)(const int32_t *input, int32_t *output, - const int8_t cos_bit, - const int8_t *stage_ptr); -typedef void (*transform_neon)(int16x8_t *input, int16x8_t *output, - int8_t cos_bit, int bit); - -DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x8_default[8]) = { - 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, -}; - -DECLARE_ALIGNED(16, static const int16_t, - av1_eob_to_eobxy_16x16_default[16]) = { - 0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, - 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, -}; - -DECLARE_ALIGNED(16, static const int16_t, - av1_eob_to_eobxy_32x32_default[32]) = { - 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, - 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, - 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, - 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x16_default[16]) = { - 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07, - 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_16x8_default[8]) = { - 0x0707, 0x0707, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f, -}; - -DECLARE_ALIGNED(16, static const int16_t, - av1_eob_to_eobxy_16x32_default[32]) = { - 0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, - 0x0f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, - 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, - 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, -}; - -DECLARE_ALIGNED(16, static const int16_t, - av1_eob_to_eobxy_32x16_default[16]) = { - 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, - 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x32_default[32]) = { - 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07, - 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x1f07, 0x1f07, 0x1f07, - 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, - 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_32x8_default[8]) = { - 0x0707, 0x070f, 0x070f, 0x071f, 0x071f, 0x071f, 0x071f, 0x071f, -}; - -DECLARE_ALIGNED(16, static const int16_t *, - av1_eob_to_eobxy_default[TX_SIZES_ALL]) = { - NULL, - av1_eob_to_eobxy_8x8_default, - av1_eob_to_eobxy_16x16_default, - av1_eob_to_eobxy_32x32_default, - av1_eob_to_eobxy_32x32_default, - NULL, - NULL, - av1_eob_to_eobxy_8x16_default, - av1_eob_to_eobxy_16x8_default, - av1_eob_to_eobxy_16x32_default, - av1_eob_to_eobxy_32x16_default, - av1_eob_to_eobxy_32x32_default, - av1_eob_to_eobxy_32x32_default, - NULL, - NULL, - av1_eob_to_eobxy_8x32_default, - av1_eob_to_eobxy_32x8_default, - av1_eob_to_eobxy_16x32_default, - av1_eob_to_eobxy_32x16_default, -}; - -static const int lowbd_txfm_all_1d_zeros_idx[32] = { - 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -}; - -// Transform block width in log2 for eob (size of 64 map to 32) -static const int tx_size_wide_log2_eob[TX_SIZES_ALL] = { - 2, 3, 4, 5, 5, 2, 3, 3, 4, 4, 5, 5, 5, 2, 4, 3, 5, 4, 5, -}; - -static int eob_fill[32] = { - 0, 7, 7, 7, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 15, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, -}; - -static INLINE void get_eobx_eoby_scan_default(int *eobx, int *eoby, - TX_SIZE tx_size, int eob) { - if (eob == 1) { - *eobx = 0; - *eoby = 0; - return; - } - - const int tx_w_log2 = tx_size_wide_log2_eob[tx_size]; - const int eob_row = (eob - 1) >> tx_w_log2; - const int eobxy = av1_eob_to_eobxy_default[tx_size][eob_row]; - *eobx = eobxy & 0xFF; - *eoby = eobxy >> 8; -} - -static INLINE void get_eobx_eoby_scan_v_identity(int *eobx, int *eoby, - TX_SIZE tx_size, int eob) { - eob -= 1; - const int txfm_size_row = tx_size_high[tx_size]; - const int eoby_max = AOMMIN(32, txfm_size_row) - 1; - *eobx = eob / (eoby_max + 1); - *eoby = (eob >= eoby_max) ? eoby_max : eob_fill[eob]; -} - -static INLINE void get_eobx_eoby_scan_h_identity(int *eobx, int *eoby, - TX_SIZE tx_size, int eob) { - eob -= 1; - const int txfm_size_col = tx_size_wide[tx_size]; - const int eobx_max = AOMMIN(32, txfm_size_col) - 1; - *eobx = (eob >= eobx_max) ? eobx_max : eob_fill[eob]; - const int temp_eoby = eob / (eobx_max + 1); - assert(temp_eoby < 32); - *eoby = eob_fill[temp_eoby]; -} - -#endif // AOM_AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_ diff --git a/third_party/aom/av1/common/arm/av1_txfm_neon.c b/third_party/aom/av1/common/arm/av1_txfm_neon.c deleted file mode 100644 index de3c54724..000000000 --- a/third_party/aom/av1/common/arm/av1_txfm_neon.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#include <arm_neon.h> -#include <assert.h> - -#include "aom_ports/mem.h" -#include "av1/common/arm/mem_neon.h" - -void av1_round_shift_array_neon(int32_t *arr, int size, int bit) { - assert(!(size % 4)); - if (!bit) return; - const int32x4_t dup_bits_n_32x4 = vdupq_n_s32((int32_t)(-bit)); - for (int i = 0; i < size; i += 4) { - int32x4_t tmp_q_s32 = vld1q_s32(arr); - tmp_q_s32 = vrshlq_s32(tmp_q_s32, dup_bits_n_32x4); - vst1q_s32(arr, tmp_q_s32); - arr += 4; - } -} diff --git a/third_party/aom/av1/common/arm/blend_a64_hmask_neon.c b/third_party/aom/av1/common/arm/blend_a64_hmask_neon.c deleted file mode 100644 index 7134f183e..000000000 --- a/third_party/aom/av1/common/arm/blend_a64_hmask_neon.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <arm_neon.h> -#include <assert.h> - -#include "aom/aom_integer.h" -#include "aom_dsp/blend.h" -#include "aom_ports/mem.h" -#include "av1/common/arm/mem_neon.h" -#include "aom_dsp/aom_dsp_common.h" -#include "config/aom_dsp_rtcd.h" - -void aom_blend_a64_hmask_neon(uint8_t *dst, uint32_t dst_stride, - const uint8_t *src0, uint32_t src0_stride, - const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, int w, int h) { - assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); - assert(IMPLIES(src1 == dst, src1_stride == dst_stride)); - - assert(h >= 2); - assert(w >= 2); - assert(IS_POWER_OF_TWO(h)); - assert(IS_POWER_OF_TWO(w)); - uint8x8_t tmp0, tmp1; - uint8x16_t res_q; - uint16x8_t res, res_low, res_high; - uint32x2_t tmp0_32 = vdup_n_u32(0), tmp1_32 = vdup_n_u32(0); - uint16x4_t tmp0_16 = vdup_n_u16(0), tmp1_16 = vdup_n_u16(0); - const uint8x8_t vdup_64 = vdup_n_u8((uint8_t)64); - - if (w >= 16) { - const uint8x16_t vdup_64_q = vdupq_n_u8((uint8_t)64); - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; j += 16) { - __builtin_prefetch(src0); - __builtin_prefetch(src1); - const uint8x16_t tmp0_q = vld1q_u8(src0); - const uint8x16_t tmp1_q = vld1q_u8(src1); - const uint8x16_t m_q = vld1q_u8(mask); - const uint8x16_t max_minus_m_q = vsubq_u8(vdup_64_q, m_q); - res_low = vmull_u8(vget_low_u8(m_q), vget_low_u8(tmp0_q)); - res_low = - vmlal_u8(res_low, vget_low_u8(max_minus_m_q), vget_low_u8(tmp1_q)); - res_high = vmull_u8(vget_high_u8(m_q), vget_high_u8(tmp0_q)); - res_high = vmlal_u8(res_high, vget_high_u8(max_minus_m_q), - vget_high_u8(tmp1_q)); - res_q = vcombine_u8(vrshrn_n_u16(res_low, AOM_BLEND_A64_ROUND_BITS), - vrshrn_n_u16(res_high, AOM_BLEND_A64_ROUND_BITS)); - vst1q_u8(dst, res_q); - src0 += 16; - src1 += 16; - dst += 16; - mask += 16; - } - src0 += src0_stride - w; - src1 += src1_stride - w; - dst += dst_stride - w; - mask -= w; - } - } else if (w == 8) { - const uint8x8_t m = vld1_u8(mask); - const uint8x8_t max_minus_m = vsub_u8(vdup_64, m); - for (int i = 0; i < h; ++i) { - __builtin_prefetch(src0); - __builtin_prefetch(src1); - tmp0 = vld1_u8(src0); - tmp1 = vld1_u8(src1); - res = vmull_u8(m, tmp0); - res = vmlal_u8(res, max_minus_m, tmp1); - vst1_u8(dst, vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)); - src0 += src0_stride; - src1 += src1_stride; - dst += dst_stride; - } - } else if (w == 4) { - const uint8x8_t m = vreinterpret_u8_u32(vld1_dup_u32((uint32_t *)mask)); - const uint8x8_t max_minus_m = vsub_u8(vdup_64, m); - for (int i = 0; i < h; i += 2) { - __builtin_prefetch(src0 + 0 * src0_stride); - __builtin_prefetch(src0 + 1 * src0_stride); - __builtin_prefetch(src1 + 0 * src1_stride); - __builtin_prefetch(src1 + 1 * src1_stride); - load_unaligned_u8_4x2(src0, src0_stride, &tmp0_32); - tmp0 = vreinterpret_u8_u32(tmp0_32); - load_unaligned_u8_4x2(src1, src1_stride, &tmp1_32); - tmp1 = vreinterpret_u8_u32(tmp1_32); - res = vmull_u8(m, tmp0); - res = vmlal_u8(res, max_minus_m, tmp1); - vst1_lane_u32( - (uint32_t *)(dst + (0 * dst_stride)), - vreinterpret_u32_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 0); - vst1_lane_u32( - (uint32_t *)(dst + (1 * dst_stride)), - vreinterpret_u32_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 1); - src0 += (2 * src0_stride); - src1 += (2 * src1_stride); - dst += (2 * dst_stride); - } - } else if (w == 2) { - const uint8x8_t m = vreinterpret_u8_u16(vld1_dup_u16((uint16_t *)mask)); - const uint8x8_t max_minus_m = vsub_u8(vdup_64, m); - for (int i = 0; i < h; i += 2) { - __builtin_prefetch(src0 + 0 * src0_stride); - __builtin_prefetch(src0 + 1 * src0_stride); - __builtin_prefetch(src1 + 0 * src1_stride); - __builtin_prefetch(src1 + 1 * src1_stride); - load_unaligned_u8_2x2(src0, src0_stride, &tmp0_16); - tmp0 = vreinterpret_u8_u16(tmp0_16); - load_unaligned_u8_2x2(src1, src1_stride, &tmp1_16); - tmp1 = vreinterpret_u8_u16(tmp1_16); - res = vmull_u8(m, tmp0); - res = vmlal_u8(res, max_minus_m, tmp1); - vst1_lane_u16( - (uint16_t *)(dst + (0 * dst_stride)), - vreinterpret_u16_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 0); - vst1_lane_u16( - (uint16_t *)(dst + (1 * dst_stride)), - vreinterpret_u16_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 1); - src0 += (2 * src0_stride); - src1 += (2 * src1_stride); - dst += (2 * dst_stride); - } - } -} diff --git a/third_party/aom/av1/common/arm/blend_a64_vmask_neon.c b/third_party/aom/av1/common/arm/blend_a64_vmask_neon.c deleted file mode 100644 index 194e94c8c..000000000 --- a/third_party/aom/av1/common/arm/blend_a64_vmask_neon.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - * - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <arm_neon.h> -#include <assert.h> - -#include "aom/aom_integer.h" -#include "aom_dsp/blend.h" -#include "aom_ports/mem.h" -#include "av1/common/arm/mem_neon.h" -#include "aom_dsp/aom_dsp_common.h" -#include "config/aom_dsp_rtcd.h" - -void aom_blend_a64_vmask_neon(uint8_t *dst, uint32_t dst_stride, - const uint8_t *src0, uint32_t src0_stride, - const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, int w, int h) { - uint8x8_t tmp0, tmp1; - uint8x16_t tmp0_q, tmp1_q, res_q; - uint16x8_t res, res_low, res_high; - uint32x2_t tmp0_32 = vdup_n_u32(0), tmp1_32 = vdup_n_u32(0); - uint16x4_t tmp0_16 = vdup_n_u16(0), tmp1_16 = vdup_n_u16(0); - assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); - assert(IMPLIES(src1 == dst, src1_stride == dst_stride)); - - assert(h >= 2); - assert(w >= 2); - assert(IS_POWER_OF_TWO(h)); - assert(IS_POWER_OF_TWO(w)); - - if (w >= 16) { - for (int i = 0; i < h; ++i) { - const uint8x8_t m = vdup_n_u8((uint8_t)mask[i]); - const uint8x8_t max_minus_m = vdup_n_u8(64 - (uint8_t)mask[i]); - for (int j = 0; j < w; j += 16) { - __builtin_prefetch(src0); - __builtin_prefetch(src1); - tmp0_q = vld1q_u8(src0); - tmp1_q = vld1q_u8(src1); - res_low = vmull_u8(m, vget_low_u8(tmp0_q)); - res_low = vmlal_u8(res_low, max_minus_m, vget_low_u8(tmp1_q)); - res_high = vmull_u8(m, vget_high_u8(tmp0_q)); - res_high = vmlal_u8(res_high, max_minus_m, vget_high_u8(tmp1_q)); - res_q = vcombine_u8(vrshrn_n_u16(res_low, AOM_BLEND_A64_ROUND_BITS), - vrshrn_n_u16(res_high, AOM_BLEND_A64_ROUND_BITS)); - vst1q_u8(dst, res_q); - src0 += 16; - src1 += 16; - dst += 16; - } - src0 += src0_stride - w; - src1 += src1_stride - w; - dst += dst_stride - w; - } - } else if (w == 8) { - for (int i = 0; i < h; ++i) { - __builtin_prefetch(src0); - __builtin_prefetch(src1); - const uint8x8_t m = vdup_n_u8((uint8_t)mask[i]); - const uint8x8_t max_minus_m = vdup_n_u8(64 - (uint8_t)mask[i]); - tmp0 = vld1_u8(src0); - tmp1 = vld1_u8(src1); - res = vmull_u8(m, tmp0); - res = vmlal_u8(res, max_minus_m, tmp1); - vst1_u8(dst, vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)); - src0 += src0_stride; - src1 += src1_stride; - dst += dst_stride; - } - } else if (w == 4) { - for (int i = 0; i < h; i += 2) { - __builtin_prefetch(src0 + 0 * src0_stride); - __builtin_prefetch(src0 + 1 * src0_stride); - __builtin_prefetch(src1 + 0 * src1_stride); - __builtin_prefetch(src1 + 1 * src1_stride); - const uint16x4_t m1 = vdup_n_u16((uint16_t)mask[i]); - const uint16x4_t m2 = vdup_n_u16((uint16_t)mask[i + 1]); - const uint8x8_t m = vmovn_u16(vcombine_u16(m1, m2)); - const uint16x4_t max_minus_m1 = vdup_n_u16(64 - (uint16_t)mask[i]); - const uint16x4_t max_minus_m2 = vdup_n_u16(64 - (uint16_t)mask[i + 1]); - const uint8x8_t max_minus_m = - vmovn_u16(vcombine_u16(max_minus_m1, max_minus_m2)); - load_unaligned_u8_4x2(src0, src0_stride, &tmp0_32); - tmp0 = vreinterpret_u8_u32(tmp0_32); - load_unaligned_u8_4x2(src1, src1_stride, &tmp1_32); - tmp1 = vreinterpret_u8_u32(tmp1_32); - res = vmull_u8(m, tmp0); - res = vmlal_u8(res, max_minus_m, tmp1); - vst1_lane_u32( - (uint32_t *)(dst + (0 * dst_stride)), - vreinterpret_u32_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 0); - vst1_lane_u32( - (uint32_t *)(dst + (1 * dst_stride)), - vreinterpret_u32_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 1); - src0 += (2 * src0_stride); - src1 += (2 * src1_stride); - dst += (2 * dst_stride); - } - } else if (w == 2) { - for (int i = 0; i < h; i += 2) { - __builtin_prefetch(src0 + 0 * src0_stride); - __builtin_prefetch(src0 + 1 * src0_stride); - __builtin_prefetch(src1 + 0 * src1_stride); - __builtin_prefetch(src1 + 1 * src1_stride); - const uint8x8_t m1 = vdup_n_u8(mask[i]); - const uint8x8_t m2 = vdup_n_u8(mask[i + 1]); - const uint16x4x2_t m_trn = - vtrn_u16(vreinterpret_u16_u8(m1), vreinterpret_u16_u8(m2)); - const uint8x8_t m = vreinterpret_u8_u16(m_trn.val[0]); - const uint8x8_t max_minus_m1 = vdup_n_u8(64 - mask[i]); - const uint8x8_t max_minus_m2 = vdup_n_u8(64 - mask[i + 1]); - const uint16x4x2_t max_minus_m_trn = vtrn_u16( - vreinterpret_u16_u8(max_minus_m1), vreinterpret_u16_u8(max_minus_m2)); - const uint8x8_t max_minus_m = vreinterpret_u8_u16(max_minus_m_trn.val[0]); - load_unaligned_u8_2x2(src0, src0_stride, &tmp0_16); - tmp0 = vreinterpret_u8_u16(tmp0_16); - load_unaligned_u8_2x2(src1, src1_stride, &tmp1_16); - tmp1 = vreinterpret_u8_u16(tmp1_16); - res = vmull_u8(m, tmp0); - res = vmlal_u8(res, max_minus_m, tmp1); - vst1_lane_u16( - (uint16_t *)(dst + (0 * dst_stride)), - vreinterpret_u16_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 0); - vst1_lane_u16( - (uint16_t *)(dst + (1 * dst_stride)), - vreinterpret_u16_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 1); - src0 += (2 * src0_stride); - src1 += (2 * src1_stride); - dst += (2 * dst_stride); - } - } -} diff --git a/third_party/aom/av1/common/arm/cfl_neon.c b/third_party/aom/av1/common/arm/cfl_neon.c deleted file mode 100644 index 39025b5e5..000000000 --- a/third_party/aom/av1/common/arm/cfl_neon.c +++ /dev/null @@ -1,584 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#include <arm_neon.h> - -#include "config/av1_rtcd.h" - -#include "av1/common/cfl.h" - -static INLINE void vldsubstq_s16(int16_t *dst, const uint16_t *src, int offset, - int16x8_t sub) { - vst1q_s16(dst + offset, - vsubq_s16(vreinterpretq_s16_u16(vld1q_u16(src + offset)), sub)); -} - -static INLINE uint16x8_t vldaddq_u16(const uint16_t *buf, size_t offset) { - return vaddq_u16(vld1q_u16(buf), vld1q_u16(buf + offset)); -} - -// Load half of a vector and duplicated in other half -static INLINE uint8x8_t vldh_dup_u8(const uint8_t *ptr) { - return vreinterpret_u8_u32(vld1_dup_u32((const uint32_t *)ptr)); -} - -// Store half of a vector. -static INLINE void vsth_u16(uint16_t *ptr, uint16x4_t val) { - *((uint32_t *)ptr) = vreinterpret_u32_u16(val)[0]; -} - -// Store half of a vector. -static INLINE void vsth_u8(uint8_t *ptr, uint8x8_t val) { - *((uint32_t *)ptr) = vreinterpret_u32_u8(val)[0]; -} - -static void cfl_luma_subsampling_420_lbd_neon(const uint8_t *input, - int input_stride, - uint16_t *pred_buf_q3, int width, - int height) { - const uint16_t *end = pred_buf_q3 + (height >> 1) * CFL_BUF_LINE; - const int luma_stride = input_stride << 1; - do { - if (width == 4) { - const uint16x4_t top = vpaddl_u8(vldh_dup_u8(input)); - const uint16x4_t sum = vpadal_u8(top, vldh_dup_u8(input + input_stride)); - vsth_u16(pred_buf_q3, vshl_n_u16(sum, 1)); - } else if (width == 8) { - const uint16x4_t top = vpaddl_u8(vld1_u8(input)); - const uint16x4_t sum = vpadal_u8(top, vld1_u8(input + input_stride)); - vst1_u16(pred_buf_q3, vshl_n_u16(sum, 1)); - } else if (width == 16) { - const uint16x8_t top = vpaddlq_u8(vld1q_u8(input)); - const uint16x8_t sum = vpadalq_u8(top, vld1q_u8(input + input_stride)); - vst1q_u16(pred_buf_q3, vshlq_n_u16(sum, 1)); - } else { - const uint8x8x4_t top = vld4_u8(input); - const uint8x8x4_t bot = vld4_u8(input + input_stride); - // equivalent to a vpaddlq_u8 (because vld4q interleaves) - const uint16x8_t top_0 = vaddl_u8(top.val[0], top.val[1]); - // equivalent to a vpaddlq_u8 (because vld4q interleaves) - const uint16x8_t bot_0 = vaddl_u8(bot.val[0], bot.val[1]); - // equivalent to a vpaddlq_u8 (because vld4q interleaves) - const uint16x8_t top_1 = vaddl_u8(top.val[2], top.val[3]); - // equivalent to a vpaddlq_u8 (because vld4q interleaves) - const uint16x8_t bot_1 = vaddl_u8(bot.val[2], bot.val[3]); - uint16x8x2_t sum; - sum.val[0] = vshlq_n_u16(vaddq_u16(top_0, bot_0), 1); - sum.val[1] = vshlq_n_u16(vaddq_u16(top_1, bot_1), 1); - vst2q_u16(pred_buf_q3, sum); - } - input += luma_stride; - } while ((pred_buf_q3 += CFL_BUF_LINE) < end); -} - -static void cfl_luma_subsampling_422_lbd_neon(const uint8_t *input, - int input_stride, - uint16_t *pred_buf_q3, int width, - int height) { - const uint16_t *end = pred_buf_q3 + height * CFL_BUF_LINE; - do { - if (width == 4) { - const uint16x4_t top = vpaddl_u8(vldh_dup_u8(input)); - vsth_u16(pred_buf_q3, vshl_n_u16(top, 2)); - } else if (width == 8) { - const uint16x4_t top = vpaddl_u8(vld1_u8(input)); - vst1_u16(pred_buf_q3, vshl_n_u16(top, 2)); - } else if (width == 16) { - const uint16x8_t top = vpaddlq_u8(vld1q_u8(input)); - vst1q_u16(pred_buf_q3, vshlq_n_u16(top, 2)); - } else { - const uint8x8x4_t top = vld4_u8(input); - uint16x8x2_t sum; - // vaddl_u8 is equivalent to a vpaddlq_u8 (because vld4q interleaves) - sum.val[0] = vshlq_n_u16(vaddl_u8(top.val[0], top.val[1]), 2); - sum.val[1] = vshlq_n_u16(vaddl_u8(top.val[2], top.val[3]), 2); - vst2q_u16(pred_buf_q3, sum); - } - input += input_stride; - } while ((pred_buf_q3 += CFL_BUF_LINE) < end); -} - -static void cfl_luma_subsampling_444_lbd_neon(const uint8_t *input, - int input_stride, - uint16_t *pred_buf_q3, int width, - int height) { - const uint16_t *end = pred_buf_q3 + height * CFL_BUF_LINE; - do { - if (width == 4) { - const uint16x8_t top = vshll_n_u8(vldh_dup_u8(input), 3); - vst1_u16(pred_buf_q3, vget_low_u16(top)); - } else if (width == 8) { - const uint16x8_t top = vshll_n_u8(vld1_u8(input), 3); - vst1q_u16(pred_buf_q3, top); - } else { - const uint8x16_t top = vld1q_u8(input); - vst1q_u16(pred_buf_q3, vshll_n_u8(vget_low_u8(top), 3)); - vst1q_u16(pred_buf_q3 + 8, vshll_n_u8(vget_high_u8(top), 3)); - if (width == 32) { - const uint8x16_t next_top = vld1q_u8(input + 16); - vst1q_u16(pred_buf_q3 + 16, vshll_n_u8(vget_low_u8(next_top), 3)); - vst1q_u16(pred_buf_q3 + 24, vshll_n_u8(vget_high_u8(next_top), 3)); - } - } - input += input_stride; - } while ((pred_buf_q3 += CFL_BUF_LINE) < end); -} - -#ifndef __aarch64__ -uint16x8_t vpaddq_u16(uint16x8_t a, uint16x8_t b) { - return vcombine_u16(vpadd_u16(vget_low_u16(a), vget_high_u16(a)), - vpadd_u16(vget_low_u16(b), vget_high_u16(b))); -} -#endif - -static void cfl_luma_subsampling_420_hbd_neon(const uint16_t *input, - int input_stride, - uint16_t *pred_buf_q3, int width, - int height) { - const uint16_t *end = pred_buf_q3 + (height >> 1) * CFL_BUF_LINE; - const int luma_stride = input_stride << 1; - do { - if (width == 4) { - const uint16x4_t top = vld1_u16(input); - const uint16x4_t bot = vld1_u16(input + input_stride); - const uint16x4_t sum = vadd_u16(top, bot); - const uint16x4_t hsum = vpadd_u16(sum, sum); - vsth_u16(pred_buf_q3, vshl_n_u16(hsum, 1)); - } else if (width < 32) { - const uint16x8_t top = vld1q_u16(input); - const uint16x8_t bot = vld1q_u16(input + input_stride); - const uint16x8_t sum = vaddq_u16(top, bot); - if (width == 8) { - const uint16x4_t hsum = vget_low_u16(vpaddq_u16(sum, sum)); - vst1_u16(pred_buf_q3, vshl_n_u16(hsum, 1)); - } else { - const uint16x8_t top_1 = vld1q_u16(input + 8); - const uint16x8_t bot_1 = vld1q_u16(input + 8 + input_stride); - const uint16x8_t sum_1 = vaddq_u16(top_1, bot_1); - const uint16x8_t hsum = vpaddq_u16(sum, sum_1); - vst1q_u16(pred_buf_q3, vshlq_n_u16(hsum, 1)); - } - } else { - const uint16x8x4_t top = vld4q_u16(input); - const uint16x8x4_t bot = vld4q_u16(input + input_stride); - // equivalent to a vpaddq_u16 (because vld4q interleaves) - const uint16x8_t top_0 = vaddq_u16(top.val[0], top.val[1]); - // equivalent to a vpaddq_u16 (because vld4q interleaves) - const uint16x8_t bot_0 = vaddq_u16(bot.val[0], bot.val[1]); - // equivalent to a vpaddq_u16 (because vld4q interleaves) - const uint16x8_t top_1 = vaddq_u16(top.val[2], top.val[3]); - // equivalent to a vpaddq_u16 (because vld4q interleaves) - const uint16x8_t bot_1 = vaddq_u16(bot.val[2], bot.val[3]); - uint16x8x2_t sum; - sum.val[0] = vshlq_n_u16(vaddq_u16(top_0, bot_0), 1); - sum.val[1] = vshlq_n_u16(vaddq_u16(top_1, bot_1), 1); - vst2q_u16(pred_buf_q3, sum); - } - input += luma_stride; - } while ((pred_buf_q3 += CFL_BUF_LINE) < end); -} - -static void cfl_luma_subsampling_422_hbd_neon(const uint16_t *input, - int input_stride, - uint16_t *pred_buf_q3, int width, - int height) { - const uint16_t *end = pred_buf_q3 + height * CFL_BUF_LINE; - do { - if (width == 4) { - const uint16x4_t top = vld1_u16(input); - const uint16x4_t hsum = vpadd_u16(top, top); - vsth_u16(pred_buf_q3, vshl_n_u16(hsum, 2)); - } else if (width == 8) { - const uint16x4x2_t top = vld2_u16(input); - // equivalent to a vpadd_u16 (because vld2 interleaves) - const uint16x4_t hsum = vadd_u16(top.val[0], top.val[1]); - vst1_u16(pred_buf_q3, vshl_n_u16(hsum, 2)); - } else if (width == 16) { - const uint16x8x2_t top = vld2q_u16(input); - // equivalent to a vpaddq_u16 (because vld2q interleaves) - const uint16x8_t hsum = vaddq_u16(top.val[0], top.val[1]); - vst1q_u16(pred_buf_q3, vshlq_n_u16(hsum, 2)); - } else { - const uint16x8x4_t top = vld4q_u16(input); - // equivalent to a vpaddq_u16 (because vld4q interleaves) - const uint16x8_t hsum_0 = vaddq_u16(top.val[0], top.val[1]); - // equivalent to a vpaddq_u16 (because vld4q interleaves) - const uint16x8_t hsum_1 = vaddq_u16(top.val[2], top.val[3]); - uint16x8x2_t result = { { vshlq_n_u16(hsum_0, 2), - vshlq_n_u16(hsum_1, 2) } }; - vst2q_u16(pred_buf_q3, result); - } - input += input_stride; - } while ((pred_buf_q3 += CFL_BUF_LINE) < end); -} - -static void cfl_luma_subsampling_444_hbd_neon(const uint16_t *input, - int input_stride, - uint16_t *pred_buf_q3, int width, - int height) { - const uint16_t *end = pred_buf_q3 + height * CFL_BUF_LINE; - do { - if (width == 4) { - const uint16x4_t top = vld1_u16(input); - vst1_u16(pred_buf_q3, vshl_n_u16(top, 3)); - } else if (width == 8) { - const uint16x8_t top = vld1q_u16(input); - vst1q_u16(pred_buf_q3, vshlq_n_u16(top, 3)); - } else if (width == 16) { - uint16x8x2_t top = vld2q_u16(input); - top.val[0] = vshlq_n_u16(top.val[0], 3); - top.val[1] = vshlq_n_u16(top.val[1], 3); - vst2q_u16(pred_buf_q3, top); - } else { - uint16x8x4_t top = vld4q_u16(input); - top.val[0] = vshlq_n_u16(top.val[0], 3); - top.val[1] = vshlq_n_u16(top.val[1], 3); - top.val[2] = vshlq_n_u16(top.val[2], 3); - top.val[3] = vshlq_n_u16(top.val[3], 3); - vst4q_u16(pred_buf_q3, top); - } - input += input_stride; - } while ((pred_buf_q3 += CFL_BUF_LINE) < end); -} - -CFL_GET_SUBSAMPLE_FUNCTION(neon) - -static INLINE void subtract_average_neon(const uint16_t *src, int16_t *dst, - int width, int height, - int round_offset, - const int num_pel_log2) { - const uint16_t *const end = src + height * CFL_BUF_LINE; - - // Round offset is not needed, because NEON will handle the rounding. - (void)round_offset; - - // To optimize the use of the CPU pipeline, we process 4 rows per iteration - const int step = 4 * CFL_BUF_LINE; - - // At this stage, the prediction buffer contains scaled reconstructed luma - // pixels, which are positive integer and only require 15 bits. By using - // unsigned integer for the sum, we can do one addition operation inside 16 - // bits (8 lanes) before having to convert to 32 bits (4 lanes). - const uint16_t *sum_buf = src; - uint32x4_t sum_32x4 = { 0, 0, 0, 0 }; - do { - // For all widths, we load, add and combine the data so it fits in 4 lanes. - if (width == 4) { - const uint16x4_t a0 = - vadd_u16(vld1_u16(sum_buf), vld1_u16(sum_buf + CFL_BUF_LINE)); - const uint16x4_t a1 = vadd_u16(vld1_u16(sum_buf + 2 * CFL_BUF_LINE), - vld1_u16(sum_buf + 3 * CFL_BUF_LINE)); - sum_32x4 = vaddq_u32(sum_32x4, vaddl_u16(a0, a1)); - } else if (width == 8) { - const uint16x8_t a0 = vldaddq_u16(sum_buf, CFL_BUF_LINE); - const uint16x8_t a1 = - vldaddq_u16(sum_buf + 2 * CFL_BUF_LINE, CFL_BUF_LINE); - sum_32x4 = vpadalq_u16(sum_32x4, a0); - sum_32x4 = vpadalq_u16(sum_32x4, a1); - } else { - const uint16x8_t row0 = vldaddq_u16(sum_buf, 8); - const uint16x8_t row1 = vldaddq_u16(sum_buf + CFL_BUF_LINE, 8); - const uint16x8_t row2 = vldaddq_u16(sum_buf + 2 * CFL_BUF_LINE, 8); - const uint16x8_t row3 = vldaddq_u16(sum_buf + 3 * CFL_BUF_LINE, 8); - sum_32x4 = vpadalq_u16(sum_32x4, row0); - sum_32x4 = vpadalq_u16(sum_32x4, row1); - sum_32x4 = vpadalq_u16(sum_32x4, row2); - sum_32x4 = vpadalq_u16(sum_32x4, row3); - - if (width == 32) { - const uint16x8_t row0_1 = vldaddq_u16(sum_buf + 16, 8); - const uint16x8_t row1_1 = vldaddq_u16(sum_buf + CFL_BUF_LINE + 16, 8); - const uint16x8_t row2_1 = - vldaddq_u16(sum_buf + 2 * CFL_BUF_LINE + 16, 8); - const uint16x8_t row3_1 = - vldaddq_u16(sum_buf + 3 * CFL_BUF_LINE + 16, 8); - - sum_32x4 = vpadalq_u16(sum_32x4, row0_1); - sum_32x4 = vpadalq_u16(sum_32x4, row1_1); - sum_32x4 = vpadalq_u16(sum_32x4, row2_1); - sum_32x4 = vpadalq_u16(sum_32x4, row3_1); - } - } - sum_buf += step; - } while (sum_buf < end); - - // Permute and add in such a way that each lane contains the block sum. - // [A+C+B+D, B+D+A+C, C+A+D+B, D+B+C+A] -#ifdef __aarch64__ - sum_32x4 = vpaddq_u32(sum_32x4, sum_32x4); - sum_32x4 = vpaddq_u32(sum_32x4, sum_32x4); -#else - uint32x4_t flip = - vcombine_u32(vget_high_u32(sum_32x4), vget_low_u32(sum_32x4)); - sum_32x4 = vaddq_u32(sum_32x4, flip); - sum_32x4 = vaddq_u32(sum_32x4, vrev64q_u32(sum_32x4)); -#endif - - // Computing the average could be done using scalars, but getting off the NEON - // engine introduces latency, so we use vqrshrn. - int16x4_t avg_16x4; - // Constant propagation makes for some ugly code. - switch (num_pel_log2) { - case 4: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 4)); break; - case 5: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 5)); break; - case 6: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 6)); break; - case 7: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 7)); break; - case 8: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 8)); break; - case 9: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 9)); break; - case 10: - avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 10)); - break; - default: assert(0); - } - - if (width == 4) { - do { - vst1_s16(dst, vsub_s16(vreinterpret_s16_u16(vld1_u16(src)), avg_16x4)); - src += CFL_BUF_LINE; - dst += CFL_BUF_LINE; - } while (src < end); - } else { - const int16x8_t avg_16x8 = vcombine_s16(avg_16x4, avg_16x4); - do { - vldsubstq_s16(dst, src, 0, avg_16x8); - vldsubstq_s16(dst, src, CFL_BUF_LINE, avg_16x8); - vldsubstq_s16(dst, src, 2 * CFL_BUF_LINE, avg_16x8); - vldsubstq_s16(dst, src, 3 * CFL_BUF_LINE, avg_16x8); - - if (width > 8) { - vldsubstq_s16(dst, src, 8, avg_16x8); - vldsubstq_s16(dst, src, 8 + CFL_BUF_LINE, avg_16x8); - vldsubstq_s16(dst, src, 8 + 2 * CFL_BUF_LINE, avg_16x8); - vldsubstq_s16(dst, src, 8 + 3 * CFL_BUF_LINE, avg_16x8); - } - if (width == 32) { - vldsubstq_s16(dst, src, 16, avg_16x8); - vldsubstq_s16(dst, src, 16 + CFL_BUF_LINE, avg_16x8); - vldsubstq_s16(dst, src, 16 + 2 * CFL_BUF_LINE, avg_16x8); - vldsubstq_s16(dst, src, 16 + 3 * CFL_BUF_LINE, avg_16x8); - vldsubstq_s16(dst, src, 24, avg_16x8); - vldsubstq_s16(dst, src, 24 + CFL_BUF_LINE, avg_16x8); - vldsubstq_s16(dst, src, 24 + 2 * CFL_BUF_LINE, avg_16x8); - vldsubstq_s16(dst, src, 24 + 3 * CFL_BUF_LINE, avg_16x8); - } - src += step; - dst += step; - } while (src < end); - } -} - -CFL_SUB_AVG_FN(neon) - -// Saturating negate 16-bit integers in a when the corresponding signed 16-bit -// integer in b is negative. -// Notes: -// * Negating INT16_MIN results in INT16_MIN. However, this cannot occur in -// practice, as scaled_luma is the multiplication of two absolute values. -// * In the Intel equivalent, elements in a are zeroed out when the -// corresponding elements in b are zero. Because vsign is used twice in a -// row, with b in the first call becoming a in the second call, there's no -// impact from not zeroing out. -static int16x4_t vsign_s16(int16x4_t a, int16x4_t b) { - const int16x4_t mask = vshr_n_s16(b, 15); - return veor_s16(vadd_s16(a, mask), mask); -} - -// Saturating negate 16-bit integers in a when the corresponding signed 16-bit -// integer in b is negative. -// Notes: -// * Negating INT16_MIN results in INT16_MIN. However, this cannot occur in -// practice, as scaled_luma is the multiplication of two absolute values. -// * In the Intel equivalent, elements in a are zeroed out when the -// corresponding elements in b are zero. Because vsignq is used twice in a -// row, with b in the first call becoming a in the second call, there's no -// impact from not zeroing out. -static int16x8_t vsignq_s16(int16x8_t a, int16x8_t b) { - const int16x8_t mask = vshrq_n_s16(b, 15); - return veorq_s16(vaddq_s16(a, mask), mask); -} - -static INLINE int16x4_t predict_w4(const int16_t *pred_buf_q3, - int16x4_t alpha_sign, int abs_alpha_q12, - int16x4_t dc) { - const int16x4_t ac_q3 = vld1_s16(pred_buf_q3); - const int16x4_t ac_sign = veor_s16(alpha_sign, ac_q3); - int16x4_t scaled_luma = vqrdmulh_n_s16(vabs_s16(ac_q3), abs_alpha_q12); - return vadd_s16(vsign_s16(scaled_luma, ac_sign), dc); -} - -static INLINE int16x8_t predict_w8(const int16_t *pred_buf_q3, - int16x8_t alpha_sign, int abs_alpha_q12, - int16x8_t dc) { - const int16x8_t ac_q3 = vld1q_s16(pred_buf_q3); - const int16x8_t ac_sign = veorq_s16(alpha_sign, ac_q3); - int16x8_t scaled_luma = vqrdmulhq_n_s16(vabsq_s16(ac_q3), abs_alpha_q12); - return vaddq_s16(vsignq_s16(scaled_luma, ac_sign), dc); -} - -static INLINE int16x8x2_t predict_w16(const int16_t *pred_buf_q3, - int16x8_t alpha_sign, int abs_alpha_q12, - int16x8_t dc) { - // vld2q_s16 interleaves, which is not useful for prediction. vst1q_s16_x2 - // does not interleave, but is not currently available in the compilier used - // by the AOM build system. - const int16x8x2_t ac_q3 = vld2q_s16(pred_buf_q3); - const int16x8_t ac_sign_0 = veorq_s16(alpha_sign, ac_q3.val[0]); - const int16x8_t ac_sign_1 = veorq_s16(alpha_sign, ac_q3.val[1]); - const int16x8_t scaled_luma_0 = - vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[0]), abs_alpha_q12); - const int16x8_t scaled_luma_1 = - vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[1]), abs_alpha_q12); - int16x8x2_t result; - result.val[0] = vaddq_s16(vsignq_s16(scaled_luma_0, ac_sign_0), dc); - result.val[1] = vaddq_s16(vsignq_s16(scaled_luma_1, ac_sign_1), dc); - return result; -} - -static INLINE int16x8x4_t predict_w32(const int16_t *pred_buf_q3, - int16x8_t alpha_sign, int abs_alpha_q12, - int16x8_t dc) { - // vld4q_s16 interleaves, which is not useful for prediction. vst1q_s16_x4 - // does not interleave, but is not currently available in the compilier used - // by the AOM build system. - const int16x8x4_t ac_q3 = vld4q_s16(pred_buf_q3); - const int16x8_t ac_sign_0 = veorq_s16(alpha_sign, ac_q3.val[0]); - const int16x8_t ac_sign_1 = veorq_s16(alpha_sign, ac_q3.val[1]); - const int16x8_t ac_sign_2 = veorq_s16(alpha_sign, ac_q3.val[2]); - const int16x8_t ac_sign_3 = veorq_s16(alpha_sign, ac_q3.val[3]); - const int16x8_t scaled_luma_0 = - vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[0]), abs_alpha_q12); - const int16x8_t scaled_luma_1 = - vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[1]), abs_alpha_q12); - const int16x8_t scaled_luma_2 = - vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[2]), abs_alpha_q12); - const int16x8_t scaled_luma_3 = - vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[3]), abs_alpha_q12); - int16x8x4_t result; - result.val[0] = vaddq_s16(vsignq_s16(scaled_luma_0, ac_sign_0), dc); - result.val[1] = vaddq_s16(vsignq_s16(scaled_luma_1, ac_sign_1), dc); - result.val[2] = vaddq_s16(vsignq_s16(scaled_luma_2, ac_sign_2), dc); - result.val[3] = vaddq_s16(vsignq_s16(scaled_luma_3, ac_sign_3), dc); - return result; -} - -static INLINE void cfl_predict_lbd_neon(const int16_t *pred_buf_q3, - uint8_t *dst, int dst_stride, - int alpha_q3, int width, int height) { - const int16_t abs_alpha_q12 = abs(alpha_q3) << 9; - const int16_t *const end = pred_buf_q3 + height * CFL_BUF_LINE; - if (width == 4) { - const int16x4_t alpha_sign = vdup_n_s16(alpha_q3); - const int16x4_t dc = vdup_n_s16(*dst); - do { - const int16x4_t pred = - predict_w4(pred_buf_q3, alpha_sign, abs_alpha_q12, dc); - vsth_u8(dst, vqmovun_s16(vcombine_s16(pred, pred))); - dst += dst_stride; - } while ((pred_buf_q3 += CFL_BUF_LINE) < end); - } else { - const int16x8_t alpha_sign = vdupq_n_s16(alpha_q3); - const int16x8_t dc = vdupq_n_s16(*dst); - do { - if (width == 8) { - vst1_u8(dst, vqmovun_s16(predict_w8(pred_buf_q3, alpha_sign, - abs_alpha_q12, dc))); - } else if (width == 16) { - const int16x8x2_t pred = - predict_w16(pred_buf_q3, alpha_sign, abs_alpha_q12, dc); - const uint8x8x2_t predun = { { vqmovun_s16(pred.val[0]), - vqmovun_s16(pred.val[1]) } }; - vst2_u8(dst, predun); - } else { - const int16x8x4_t pred = - predict_w32(pred_buf_q3, alpha_sign, abs_alpha_q12, dc); - const uint8x8x4_t predun = { - { vqmovun_s16(pred.val[0]), vqmovun_s16(pred.val[1]), - vqmovun_s16(pred.val[2]), vqmovun_s16(pred.val[3]) } - }; - vst4_u8(dst, predun); - } - dst += dst_stride; - } while ((pred_buf_q3 += CFL_BUF_LINE) < end); - } -} - -CFL_PREDICT_FN(neon, lbd) - -static INLINE uint16x4_t clamp_s16(int16x4_t a, int16x4_t max) { - return vreinterpret_u16_s16(vmax_s16(vmin_s16(a, max), vdup_n_s16(0))); -} - -static INLINE uint16x8_t clampq_s16(int16x8_t a, int16x8_t max) { - return vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(a, max), vdupq_n_s16(0))); -} - -static INLINE uint16x8x2_t clamp2q_s16(int16x8x2_t a, int16x8_t max) { - uint16x8x2_t result; - result.val[0] = vreinterpretq_u16_s16( - vmaxq_s16(vminq_s16(a.val[0], max), vdupq_n_s16(0))); - result.val[1] = vreinterpretq_u16_s16( - vmaxq_s16(vminq_s16(a.val[1], max), vdupq_n_s16(0))); - return result; -} - -static INLINE uint16x8x4_t clamp4q_s16(int16x8x4_t a, int16x8_t max) { - uint16x8x4_t result; - result.val[0] = vreinterpretq_u16_s16( - vmaxq_s16(vminq_s16(a.val[0], max), vdupq_n_s16(0))); - result.val[1] = vreinterpretq_u16_s16( - vmaxq_s16(vminq_s16(a.val[1], max), vdupq_n_s16(0))); - result.val[2] = vreinterpretq_u16_s16( - vmaxq_s16(vminq_s16(a.val[2], max), vdupq_n_s16(0))); - result.val[3] = vreinterpretq_u16_s16( - vmaxq_s16(vminq_s16(a.val[3], max), vdupq_n_s16(0))); - return result; -} - -static INLINE void cfl_predict_hbd_neon(const int16_t *pred_buf_q3, - uint16_t *dst, int dst_stride, - int alpha_q3, int bd, int width, - int height) { - const int max = (1 << bd) - 1; - const int16_t abs_alpha_q12 = abs(alpha_q3) << 9; - const int16_t *const end = pred_buf_q3 + height * CFL_BUF_LINE; - if (width == 4) { - const int16x4_t alpha_sign = vdup_n_s16(alpha_q3); - const int16x4_t dc = vdup_n_s16(*dst); - const int16x4_t max_16x4 = vdup_n_s16(max); - do { - const int16x4_t scaled_luma = - predict_w4(pred_buf_q3, alpha_sign, abs_alpha_q12, dc); - vst1_u16(dst, clamp_s16(scaled_luma, max_16x4)); - dst += dst_stride; - } while ((pred_buf_q3 += CFL_BUF_LINE) < end); - } else { - const int16x8_t alpha_sign = vdupq_n_s16(alpha_q3); - const int16x8_t dc = vdupq_n_s16(*dst); - const int16x8_t max_16x8 = vdupq_n_s16(max); - do { - if (width == 8) { - const int16x8_t pred = - predict_w8(pred_buf_q3, alpha_sign, abs_alpha_q12, dc); - vst1q_u16(dst, clampq_s16(pred, max_16x8)); - } else if (width == 16) { - const int16x8x2_t pred = - predict_w16(pred_buf_q3, alpha_sign, abs_alpha_q12, dc); - vst2q_u16(dst, clamp2q_s16(pred, max_16x8)); - } else { - const int16x8x4_t pred = - predict_w32(pred_buf_q3, alpha_sign, abs_alpha_q12, dc); - vst4q_u16(dst, clamp4q_s16(pred, max_16x8)); - } - dst += dst_stride; - } while ((pred_buf_q3 += CFL_BUF_LINE) < end); - } -} - -CFL_PREDICT_FN(neon, hbd) diff --git a/third_party/aom/av1/common/arm/convolve_neon.c b/third_party/aom/av1/common/arm/convolve_neon.c deleted file mode 100644 index d0c4f8ff6..000000000 --- a/third_party/aom/av1/common/arm/convolve_neon.c +++ /dev/null @@ -1,1455 +0,0 @@ -/* - * - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <assert.h> -#include <arm_neon.h> - -#include "config/av1_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_ports/mem.h" -#include "av1/common/convolve.h" -#include "av1/common/filter.h" -#include "av1/common/arm/convolve_neon.h" -#include "av1/common/arm/mem_neon.h" -#include "av1/common/arm/transpose_neon.h" - -static INLINE int16x4_t convolve8_4x4(const int16x4_t s0, const int16x4_t s1, - const int16x4_t s2, const int16x4_t s3, - const int16x4_t s4, const int16x4_t s5, - const int16x4_t s6, const int16x4_t s7, - const int16_t *filter) { - int16x4_t sum; - - sum = vmul_n_s16(s0, filter[0]); - sum = vmla_n_s16(sum, s1, filter[1]); - sum = vmla_n_s16(sum, s2, filter[2]); - sum = vmla_n_s16(sum, s5, filter[5]); - sum = vmla_n_s16(sum, s6, filter[6]); - sum = vmla_n_s16(sum, s7, filter[7]); - /* filter[3] can take a max value of 128. So the max value of the result : - * 128*255 + sum > 16 bits - */ - sum = vqadd_s16(sum, vmul_n_s16(s3, filter[3])); - sum = vqadd_s16(sum, vmul_n_s16(s4, filter[4])); - - return sum; -} - -static INLINE uint8x8_t convolve8_horiz_8x8( - const int16x8_t s0, const int16x8_t s1, const int16x8_t s2, - const int16x8_t s3, const int16x8_t s4, const int16x8_t s5, - const int16x8_t s6, const int16x8_t s7, const int16_t *filter, - const int16x8_t shift_round_0, const int16x8_t shift_by_bits) { - int16x8_t sum; - - sum = vmulq_n_s16(s0, filter[0]); - sum = vmlaq_n_s16(sum, s1, filter[1]); - sum = vmlaq_n_s16(sum, s2, filter[2]); - sum = vmlaq_n_s16(sum, s5, filter[5]); - sum = vmlaq_n_s16(sum, s6, filter[6]); - sum = vmlaq_n_s16(sum, s7, filter[7]); - /* filter[3] can take a max value of 128. So the max value of the result : - * 128*255 + sum > 16 bits - */ - sum = vqaddq_s16(sum, vmulq_n_s16(s3, filter[3])); - sum = vqaddq_s16(sum, vmulq_n_s16(s4, filter[4])); - - sum = vqrshlq_s16(sum, shift_round_0); - sum = vqrshlq_s16(sum, shift_by_bits); - - return vqmovun_s16(sum); -} - -#if !defined(__aarch64__) -static INLINE uint8x8_t convolve8_horiz_4x1( - const int16x4_t s0, const int16x4_t s1, const int16x4_t s2, - const int16x4_t s3, const int16x4_t s4, const int16x4_t s5, - const int16x4_t s6, const int16x4_t s7, const int16_t *filter, - const int16x4_t shift_round_0, const int16x4_t shift_by_bits) { - int16x4_t sum; - - sum = vmul_n_s16(s0, filter[0]); - sum = vmla_n_s16(sum, s1, filter[1]); - sum = vmla_n_s16(sum, s2, filter[2]); - sum = vmla_n_s16(sum, s5, filter[5]); - sum = vmla_n_s16(sum, s6, filter[6]); - sum = vmla_n_s16(sum, s7, filter[7]); - /* filter[3] can take a max value of 128. So the max value of the result : - * 128*255 + sum > 16 bits - */ - sum = vqadd_s16(sum, vmul_n_s16(s3, filter[3])); - sum = vqadd_s16(sum, vmul_n_s16(s4, filter[4])); - - sum = vqrshl_s16(sum, shift_round_0); - sum = vqrshl_s16(sum, shift_by_bits); - - return vqmovun_s16(vcombine_s16(sum, sum)); -} -#endif // !defined(__arch64__) - -static INLINE uint8x8_t convolve8_vert_8x4( - const int16x8_t s0, const int16x8_t s1, const int16x8_t s2, - const int16x8_t s3, const int16x8_t s4, const int16x8_t s5, - const int16x8_t s6, const int16x8_t s7, const int16_t *filter) { - int16x8_t sum; - - sum = vmulq_n_s16(s0, filter[0]); - sum = vmlaq_n_s16(sum, s1, filter[1]); - sum = vmlaq_n_s16(sum, s2, filter[2]); - sum = vmlaq_n_s16(sum, s5, filter[5]); - sum = vmlaq_n_s16(sum, s6, filter[6]); - sum = vmlaq_n_s16(sum, s7, filter[7]); - /* filter[3] can take a max value of 128. So the max value of the result : - * 128*255 + sum > 16 bits - */ - sum = vqaddq_s16(sum, vmulq_n_s16(s3, filter[3])); - sum = vqaddq_s16(sum, vmulq_n_s16(s4, filter[4])); - - return vqrshrun_n_s16(sum, FILTER_BITS); -} - -static INLINE uint16x4_t convolve8_vert_4x4_s32( - const int16x4_t s0, const int16x4_t s1, const int16x4_t s2, - const int16x4_t s3, const int16x4_t s4, const int16x4_t s5, - const int16x4_t s6, const int16x4_t s7, const int16_t *y_filter, - const int32x4_t round_shift_vec, const int32x4_t offset_const, - const int32x4_t sub_const_vec) { - int32x4_t sum0; - uint16x4_t res; - const int32x4_t zero = vdupq_n_s32(0); - - sum0 = vmull_n_s16(s0, y_filter[0]); - sum0 = vmlal_n_s16(sum0, s1, y_filter[1]); - sum0 = vmlal_n_s16(sum0, s2, y_filter[2]); - sum0 = vmlal_n_s16(sum0, s3, y_filter[3]); - sum0 = vmlal_n_s16(sum0, s4, y_filter[4]); - sum0 = vmlal_n_s16(sum0, s5, y_filter[5]); - sum0 = vmlal_n_s16(sum0, s6, y_filter[6]); - sum0 = vmlal_n_s16(sum0, s7, y_filter[7]); - - sum0 = vaddq_s32(sum0, offset_const); - sum0 = vqrshlq_s32(sum0, round_shift_vec); - sum0 = vsubq_s32(sum0, sub_const_vec); - sum0 = vmaxq_s32(sum0, zero); - - res = vmovn_u32(vreinterpretq_u32_s32(sum0)); - - return res; -} - -static INLINE uint8x8_t convolve8_vert_8x4_s32( - const int16x8_t s0, const int16x8_t s1, const int16x8_t s2, - const int16x8_t s3, const int16x8_t s4, const int16x8_t s5, - const int16x8_t s6, const int16x8_t s7, const int16_t *y_filter, - const int32x4_t round_shift_vec, const int32x4_t offset_const, - const int32x4_t sub_const_vec, const int16x8_t vec_round_bits) { - int32x4_t sum0, sum1; - uint16x8_t res; - const int32x4_t zero = vdupq_n_s32(0); - - sum0 = vmull_n_s16(vget_low_s16(s0), y_filter[0]); - sum0 = vmlal_n_s16(sum0, vget_low_s16(s1), y_filter[1]); - sum0 = vmlal_n_s16(sum0, vget_low_s16(s2), y_filter[2]); - sum0 = vmlal_n_s16(sum0, vget_low_s16(s3), y_filter[3]); - sum0 = vmlal_n_s16(sum0, vget_low_s16(s4), y_filter[4]); - sum0 = vmlal_n_s16(sum0, vget_low_s16(s5), y_filter[5]); - sum0 = vmlal_n_s16(sum0, vget_low_s16(s6), y_filter[6]); - sum0 = vmlal_n_s16(sum0, vget_low_s16(s7), y_filter[7]); - - sum1 = vmull_n_s16(vget_high_s16(s0), y_filter[0]); - sum1 = vmlal_n_s16(sum1, vget_high_s16(s1), y_filter[1]); - sum1 = vmlal_n_s16(sum1, vget_high_s16(s2), y_filter[2]); - sum1 = vmlal_n_s16(sum1, vget_high_s16(s3), y_filter[3]); - sum1 = vmlal_n_s16(sum1, vget_high_s16(s4), y_filter[4]); - sum1 = vmlal_n_s16(sum1, vget_high_s16(s5), y_filter[5]); - sum1 = vmlal_n_s16(sum1, vget_high_s16(s6), y_filter[6]); - sum1 = vmlal_n_s16(sum1, vget_high_s16(s7), y_filter[7]); - - sum0 = vaddq_s32(sum0, offset_const); - sum1 = vaddq_s32(sum1, offset_const); - sum0 = vqrshlq_s32(sum0, round_shift_vec); - sum1 = vqrshlq_s32(sum1, round_shift_vec); - sum0 = vsubq_s32(sum0, sub_const_vec); - sum1 = vsubq_s32(sum1, sub_const_vec); - sum0 = vmaxq_s32(sum0, zero); - sum1 = vmaxq_s32(sum1, zero); - res = vcombine_u16(vqmovn_u32(vreinterpretq_u32_s32(sum0)), - vqmovn_u32(vreinterpretq_u32_s32(sum1))); - - res = vqrshlq_u16(res, vec_round_bits); - - return vqmovn_u16(res); -} - -void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - const uint8_t horiz_offset = filter_params_x->taps / 2 - 1; - const int8_t bits = FILTER_BITS - conv_params->round_0; - - (void)subpel_y_q4; - (void)conv_params; - (void)filter_params_y; - - uint8x8_t t0; -#if defined(__aarch64__) - uint8x8_t t1, t2, t3; -#endif - - assert(bits >= 0); - assert((FILTER_BITS - conv_params->round_1) >= 0 || - ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS)); - - const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( - filter_params_x, subpel_x_q4 & SUBPEL_MASK); - - const int16x8_t shift_round_0 = vdupq_n_s16(-conv_params->round_0); - const int16x8_t shift_by_bits = vdupq_n_s16(-bits); - - src -= horiz_offset; -#if defined(__aarch64__) - if (h == 4) { - uint8x8_t d01, d23; - int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, d0, d1, d2, d3; - int16x8_t d01_temp, d23_temp; - - __builtin_prefetch(src + 0 * src_stride); - __builtin_prefetch(src + 1 * src_stride); - __builtin_prefetch(src + 2 * src_stride); - __builtin_prefetch(src + 3 * src_stride); - - load_u8_8x4(src, src_stride, &t0, &t1, &t2, &t3); - transpose_u8_8x4(&t0, &t1, &t2, &t3); - - s0 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0))); - s1 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1))); - s2 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2))); - s3 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3))); - s4 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t0))); - s5 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t1))); - s6 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t2))); - __builtin_prefetch(dst + 0 * dst_stride); - __builtin_prefetch(dst + 1 * dst_stride); - __builtin_prefetch(dst + 2 * dst_stride); - __builtin_prefetch(dst + 3 * dst_stride); - src += 7; - - do { - load_u8_8x4(src, src_stride, &t0, &t1, &t2, &t3); - transpose_u8_8x4(&t0, &t1, &t2, &t3); - - s7 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0))); - s8 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1))); - s9 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2))); - s10 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3))); - - d0 = convolve8_4x4(s0, s1, s2, s3, s4, s5, s6, s7, x_filter); - - d1 = convolve8_4x4(s1, s2, s3, s4, s5, s6, s7, s8, x_filter); - - d2 = convolve8_4x4(s2, s3, s4, s5, s6, s7, s8, s9, x_filter); - - d3 = convolve8_4x4(s3, s4, s5, s6, s7, s8, s9, s10, x_filter); - - d01_temp = vqrshlq_s16(vcombine_s16(d0, d1), shift_round_0); - d23_temp = vqrshlq_s16(vcombine_s16(d2, d3), shift_round_0); - - d01_temp = vqrshlq_s16(d01_temp, shift_by_bits); - d23_temp = vqrshlq_s16(d23_temp, shift_by_bits); - - d01 = vqmovun_s16(d01_temp); - d23 = vqmovun_s16(d23_temp); - - transpose_u8_4x4(&d01, &d23); - - if (w != 2) { - vst1_lane_u32((uint32_t *)(dst + 0 * dst_stride), // 00 01 02 03 - vreinterpret_u32_u8(d01), 0); - vst1_lane_u32((uint32_t *)(dst + 1 * dst_stride), // 10 11 12 13 - vreinterpret_u32_u8(d23), 0); - vst1_lane_u32((uint32_t *)(dst + 2 * dst_stride), // 20 21 22 23 - vreinterpret_u32_u8(d01), 1); - vst1_lane_u32((uint32_t *)(dst + 3 * dst_stride), // 30 31 32 33 - vreinterpret_u32_u8(d23), 1); - } else { - vst1_lane_u16((uint16_t *)(dst + 0 * dst_stride), // 00 01 - vreinterpret_u16_u8(d01), 0); - vst1_lane_u16((uint16_t *)(dst + 1 * dst_stride), // 10 11 - vreinterpret_u16_u8(d23), 0); - vst1_lane_u16((uint16_t *)(dst + 2 * dst_stride), // 20 21 - vreinterpret_u16_u8(d01), 2); - vst1_lane_u16((uint16_t *)(dst + 3 * dst_stride), // 30 31 - vreinterpret_u16_u8(d23), 2); - } - - s0 = s4; - s1 = s5; - s2 = s6; - s3 = s7; - s4 = s8; - s5 = s9; - s6 = s10; - src += 4; - dst += 4; - w -= 4; - } while (w > 0); - } else { -#endif - int width; - const uint8_t *s; - int16x8_t s0, s1, s2, s3, s4, s5, s6, s7; - -#if defined(__aarch64__) - int16x8_t s8, s9, s10; - uint8x8_t t4, t5, t6, t7; -#endif - - if (w <= 4) { -#if defined(__aarch64__) - do { - load_u8_8x8(src, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); - s1 = vreinterpretq_s16_u16(vmovl_u8(t1)); - s2 = vreinterpretq_s16_u16(vmovl_u8(t2)); - s3 = vreinterpretq_s16_u16(vmovl_u8(t3)); - s4 = vreinterpretq_s16_u16(vmovl_u8(t4)); - s5 = vreinterpretq_s16_u16(vmovl_u8(t5)); - s6 = vreinterpretq_s16_u16(vmovl_u8(t6)); - - load_u8_8x8(src + 7, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, - &t7); - src += 8 * src_stride; - __builtin_prefetch(dst + 0 * dst_stride); - __builtin_prefetch(dst + 1 * dst_stride); - __builtin_prefetch(dst + 2 * dst_stride); - __builtin_prefetch(dst + 3 * dst_stride); - __builtin_prefetch(dst + 4 * dst_stride); - __builtin_prefetch(dst + 5 * dst_stride); - __builtin_prefetch(dst + 6 * dst_stride); - __builtin_prefetch(dst + 7 * dst_stride); - - transpose_u8_4x8(&t0, &t1, &t2, &t3, t4, t5, t6, t7); - - s7 = vreinterpretq_s16_u16(vmovl_u8(t0)); - s8 = vreinterpretq_s16_u16(vmovl_u8(t1)); - s9 = vreinterpretq_s16_u16(vmovl_u8(t2)); - s10 = vreinterpretq_s16_u16(vmovl_u8(t3)); - - __builtin_prefetch(src + 0 * src_stride); - __builtin_prefetch(src + 1 * src_stride); - __builtin_prefetch(src + 2 * src_stride); - __builtin_prefetch(src + 3 * src_stride); - __builtin_prefetch(src + 4 * src_stride); - __builtin_prefetch(src + 5 * src_stride); - __builtin_prefetch(src + 6 * src_stride); - __builtin_prefetch(src + 7 * src_stride); - t0 = convolve8_horiz_8x8(s0, s1, s2, s3, s4, s5, s6, s7, x_filter, - shift_round_0, shift_by_bits); - t1 = convolve8_horiz_8x8(s1, s2, s3, s4, s5, s6, s7, s8, x_filter, - shift_round_0, shift_by_bits); - t2 = convolve8_horiz_8x8(s2, s3, s4, s5, s6, s7, s8, s9, x_filter, - shift_round_0, shift_by_bits); - t3 = convolve8_horiz_8x8(s3, s4, s5, s6, s7, s8, s9, s10, x_filter, - shift_round_0, shift_by_bits); - - transpose_u8_8x4(&t0, &t1, &t2, &t3); - - if ((w == 4) && (h > 4)) { - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t0), - 0); // 00 01 02 03 - dst += dst_stride; - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t1), - 0); // 10 11 12 13 - dst += dst_stride; - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t2), - 0); // 20 21 22 23 - dst += dst_stride; - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t3), - 0); // 30 31 32 33 - dst += dst_stride; - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t0), - 1); // 40 41 42 43 - dst += dst_stride; - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t1), - 1); // 50 51 52 53 - dst += dst_stride; - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t2), - 1); // 60 61 62 63 - dst += dst_stride; - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t3), - 1); // 70 71 72 73 - dst += dst_stride; - } else if ((w == 4) && (h == 2)) { - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t0), - 0); // 00 01 02 03 - dst += dst_stride; - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t1), - 0); // 10 11 12 13 - dst += dst_stride; - } else if ((w == 2) && (h > 4)) { - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t0), 0); // 00 01 - dst += dst_stride; - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t1), 0); // 10 11 - dst += dst_stride; - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t2), 0); // 20 21 - dst += dst_stride; - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t3), 0); // 30 31 - dst += dst_stride; - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t0), 2); // 40 41 - dst += dst_stride; - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t1), 2); // 50 51 - dst += dst_stride; - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t2), 2); // 60 61 - dst += dst_stride; - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t3), 2); // 70 71 - dst += dst_stride; - } else if ((w == 2) && (h == 2)) { - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t0), 0); // 00 01 - dst += dst_stride; - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t1), 0); // 10 11 - dst += dst_stride; - } - h -= 8; - } while (h > 0); -#else - int16x8_t tt0; - int16x4_t x0, x1, x2, x3, x4, x5, x6, x7; - const int16x4_t shift_round_0_low = vget_low_s16(shift_round_0); - const int16x4_t shift_by_bits_low = vget_low_s16(shift_by_bits); - do { - t0 = vld1_u8(src); // a0 a1 a2 a3 a4 a5 a6 a7 - tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); - x0 = vget_low_s16(tt0); // a0 a1 a2 a3 - x4 = vget_high_s16(tt0); // a4 a5 a6 a7 - - t0 = vld1_u8(src + 8); // a8 a9 a10 a11 a12 a13 a14 a15 - tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); - x7 = vget_low_s16(tt0); // a8 a9 a10 a11 - - x1 = vext_s16(x0, x4, 1); // a1 a2 a3 a4 - x2 = vext_s16(x0, x4, 2); // a2 a3 a4 a5 - x3 = vext_s16(x0, x4, 3); // a3 a4 a5 a6 - x5 = vext_s16(x4, x7, 1); // a5 a6 a7 a8 - x6 = vext_s16(x4, x7, 2); // a6 a7 a8 a9 - x7 = vext_s16(x4, x7, 3); // a7 a8 a9 a10 - - src += src_stride; - - t0 = convolve8_horiz_4x1(x0, x1, x2, x3, x4, x5, x6, x7, x_filter, - shift_round_0_low, shift_by_bits_low); - - if (w == 4) { - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t0), - 0); // 00 01 02 03 - dst += dst_stride; - } else if (w == 2) { - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t0), 0); // 00 01 - dst += dst_stride; - } - h -= 1; - } while (h > 0); -#endif - } else { - uint8_t *d; - int16x8_t s11; -#if defined(__aarch64__) - int16x8_t s12, s13, s14; - do { - __builtin_prefetch(src + 0 * src_stride); - __builtin_prefetch(src + 1 * src_stride); - __builtin_prefetch(src + 2 * src_stride); - __builtin_prefetch(src + 3 * src_stride); - __builtin_prefetch(src + 4 * src_stride); - __builtin_prefetch(src + 5 * src_stride); - __builtin_prefetch(src + 6 * src_stride); - __builtin_prefetch(src + 7 * src_stride); - load_u8_8x8(src, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); - s1 = vreinterpretq_s16_u16(vmovl_u8(t1)); - s2 = vreinterpretq_s16_u16(vmovl_u8(t2)); - s3 = vreinterpretq_s16_u16(vmovl_u8(t3)); - s4 = vreinterpretq_s16_u16(vmovl_u8(t4)); - s5 = vreinterpretq_s16_u16(vmovl_u8(t5)); - s6 = vreinterpretq_s16_u16(vmovl_u8(t6)); - - width = w; - s = src + 7; - d = dst; - __builtin_prefetch(dst + 0 * dst_stride); - __builtin_prefetch(dst + 1 * dst_stride); - __builtin_prefetch(dst + 2 * dst_stride); - __builtin_prefetch(dst + 3 * dst_stride); - __builtin_prefetch(dst + 4 * dst_stride); - __builtin_prefetch(dst + 5 * dst_stride); - __builtin_prefetch(dst + 6 * dst_stride); - __builtin_prefetch(dst + 7 * dst_stride); - - do { - load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - s7 = vreinterpretq_s16_u16(vmovl_u8(t0)); - s8 = vreinterpretq_s16_u16(vmovl_u8(t1)); - s9 = vreinterpretq_s16_u16(vmovl_u8(t2)); - s10 = vreinterpretq_s16_u16(vmovl_u8(t3)); - s11 = vreinterpretq_s16_u16(vmovl_u8(t4)); - s12 = vreinterpretq_s16_u16(vmovl_u8(t5)); - s13 = vreinterpretq_s16_u16(vmovl_u8(t6)); - s14 = vreinterpretq_s16_u16(vmovl_u8(t7)); - - t0 = convolve8_horiz_8x8(s0, s1, s2, s3, s4, s5, s6, s7, x_filter, - shift_round_0, shift_by_bits); - - t1 = convolve8_horiz_8x8(s1, s2, s3, s4, s5, s6, s7, s8, x_filter, - shift_round_0, shift_by_bits); - - t2 = convolve8_horiz_8x8(s2, s3, s4, s5, s6, s7, s8, s9, x_filter, - shift_round_0, shift_by_bits); - - t3 = convolve8_horiz_8x8(s3, s4, s5, s6, s7, s8, s9, s10, x_filter, - shift_round_0, shift_by_bits); - - t4 = convolve8_horiz_8x8(s4, s5, s6, s7, s8, s9, s10, s11, x_filter, - shift_round_0, shift_by_bits); - - t5 = convolve8_horiz_8x8(s5, s6, s7, s8, s9, s10, s11, s12, x_filter, - shift_round_0, shift_by_bits); - - t6 = convolve8_horiz_8x8(s6, s7, s8, s9, s10, s11, s12, s13, x_filter, - shift_round_0, shift_by_bits); - - t7 = convolve8_horiz_8x8(s7, s8, s9, s10, s11, s12, s13, s14, - x_filter, shift_round_0, shift_by_bits); - - transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - if (h != 2) { - store_u8_8x8(d, dst_stride, t0, t1, t2, t3, t4, t5, t6, t7); - } else { - store_row2_u8_8x8(d, dst_stride, t0, t1); - } - s0 = s8; - s1 = s9; - s2 = s10; - s3 = s11; - s4 = s12; - s5 = s13; - s6 = s14; - s += 8; - d += 8; - width -= 8; - } while (width > 0); - src += 8 * src_stride; - dst += 8 * dst_stride; - h -= 8; - } while (h > 0); -#else - do { - t0 = vld1_u8(src); // a0 a1 a2 a3 a4 a5 a6 a7 - s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); - - width = w; - s = src + 8; - d = dst; - __builtin_prefetch(dst); - - do { - t0 = vld1_u8(s); // a8 a9 a10 a11 a12 a13 a14 a15 - s7 = vreinterpretq_s16_u16(vmovl_u8(t0)); - s11 = s0; - s0 = s7; - - s1 = vextq_s16(s11, s7, 1); // a1 a2 a3 a4 a5 a6 a7 a8 - s2 = vextq_s16(s11, s7, 2); // a2 a3 a4 a5 a6 a7 a8 a9 - s3 = vextq_s16(s11, s7, 3); // a3 a4 a5 a6 a7 a8 a9 a10 - s4 = vextq_s16(s11, s7, 4); // a4 a5 a6 a7 a8 a9 a10 a11 - s5 = vextq_s16(s11, s7, 5); // a5 a6 a7 a8 a9 a10 a11 a12 - s6 = vextq_s16(s11, s7, 6); // a6 a7 a8 a9 a10 a11 a12 a13 - s7 = vextq_s16(s11, s7, 7); // a7 a8 a9 a10 a11 a12 a13 a14 - - t0 = convolve8_horiz_8x8(s11, s1, s2, s3, s4, s5, s6, s7, x_filter, - shift_round_0, shift_by_bits); - vst1_u8(d, t0); - - s += 8; - d += 8; - width -= 8; - } while (width > 0); - src += src_stride; - dst += dst_stride; - h -= 1; - } while (h > 0); -#endif - } -#if defined(__aarch64__) - } -#endif -} - -void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - const int vert_offset = filter_params_y->taps / 2 - 1; - - src -= vert_offset * src_stride; - - (void)filter_params_x; - (void)subpel_x_q4; - (void)conv_params; - - assert(conv_params->round_0 <= FILTER_BITS); - assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) || - ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS))); - - const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( - filter_params_y, subpel_y_q4 & SUBPEL_MASK); - - if (w <= 4) { - uint8x8_t d01; - int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, d0; -#if defined(__aarch64__) - uint8x8_t d23; - int16x4_t s8, s9, s10, d1, d2, d3; -#endif - s0 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); - src += src_stride; - s1 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); - src += src_stride; - s2 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); - src += src_stride; - s3 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); - src += src_stride; - s4 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); - src += src_stride; - s5 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); - src += src_stride; - s6 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); - src += src_stride; - - do { - s7 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); - src += src_stride; -#if defined(__aarch64__) - s8 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); - src += src_stride; - s9 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); - src += src_stride; - s10 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); - src += src_stride; - - __builtin_prefetch(dst + 0 * dst_stride); - __builtin_prefetch(dst + 1 * dst_stride); - __builtin_prefetch(dst + 2 * dst_stride); - __builtin_prefetch(dst + 3 * dst_stride); - __builtin_prefetch(src + 0 * src_stride); - __builtin_prefetch(src + 1 * src_stride); - __builtin_prefetch(src + 2 * src_stride); - __builtin_prefetch(src + 3 * src_stride); - d0 = convolve8_4x4(s0, s1, s2, s3, s4, s5, s6, s7, y_filter); - d1 = convolve8_4x4(s1, s2, s3, s4, s5, s6, s7, s8, y_filter); - d2 = convolve8_4x4(s2, s3, s4, s5, s6, s7, s8, s9, y_filter); - d3 = convolve8_4x4(s3, s4, s5, s6, s7, s8, s9, s10, y_filter); - - d01 = vqrshrun_n_s16(vcombine_s16(d0, d1), FILTER_BITS); - d23 = vqrshrun_n_s16(vcombine_s16(d2, d3), FILTER_BITS); - if ((w == 4) && (h != 2)) { - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), - 0); // 00 01 02 03 - dst += dst_stride; - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), - 1); // 10 11 12 13 - dst += dst_stride; - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), - 0); // 20 21 22 23 - dst += dst_stride; - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), - 1); // 30 31 32 33 - dst += dst_stride; - } else if ((w == 4) && (h == 2)) { - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), - 0); // 00 01 02 03 - dst += dst_stride; - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), - 1); // 10 11 12 13 - dst += dst_stride; - } else if ((w == 2) && (h != 2)) { - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d01), 0); // 00 01 - dst += dst_stride; - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d01), 2); // 10 11 - dst += dst_stride; - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d23), 0); // 20 21 - dst += dst_stride; - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d23), 2); // 30 31 - dst += dst_stride; - } else if ((w == 2) && (h == 2)) { - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d01), 0); // 00 01 - dst += dst_stride; - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d01), 2); // 10 11 - dst += dst_stride; - } - s0 = s4; - s1 = s5; - s2 = s6; - s3 = s7; - s4 = s8; - s5 = s9; - s6 = s10; - h -= 4; -#else - __builtin_prefetch(dst + 0 * dst_stride); - __builtin_prefetch(src + 0 * src_stride); - - d0 = convolve8_4x4(s0, s1, s2, s3, s4, s5, s6, s7, y_filter); - - d01 = vqrshrun_n_s16(vcombine_s16(d0, d0), FILTER_BITS); - - if (w == 4) { - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 0); - dst += dst_stride; - } else if (w == 2) { - vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d01), 0); - dst += dst_stride; - } - s0 = s1; - s1 = s2; - s2 = s3; - s3 = s4; - s4 = s5; - s5 = s6; - s6 = s7; - h -= 1; -#endif - } while (h > 0); - } else { - int height; - const uint8_t *s; - uint8_t *d; - uint8x8_t t0; - int16x8_t s0, s1, s2, s3, s4, s5, s6, s7; -#if defined(__aarch64__) - uint8x8_t t1, t2, t3; - int16x8_t s8, s9, s10; -#endif - do { - __builtin_prefetch(src + 0 * src_stride); - __builtin_prefetch(src + 1 * src_stride); - __builtin_prefetch(src + 2 * src_stride); - __builtin_prefetch(src + 3 * src_stride); - __builtin_prefetch(src + 4 * src_stride); - __builtin_prefetch(src + 5 * src_stride); - __builtin_prefetch(src + 6 * src_stride); - s = src; - s0 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); - s += src_stride; - s1 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); - s += src_stride; - s2 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); - s += src_stride; - s3 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); - s += src_stride; - s4 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); - s += src_stride; - s5 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); - s += src_stride; - s6 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); - s += src_stride; - d = dst; - height = h; - - do { - s7 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); - s += src_stride; -#if defined(__aarch64__) - s8 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); - s += src_stride; - s9 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); - s += src_stride; - s10 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); - s += src_stride; - - __builtin_prefetch(d + 0 * dst_stride); - __builtin_prefetch(d + 1 * dst_stride); - __builtin_prefetch(d + 2 * dst_stride); - __builtin_prefetch(d + 3 * dst_stride); - __builtin_prefetch(s + 0 * src_stride); - __builtin_prefetch(s + 1 * src_stride); - __builtin_prefetch(s + 2 * src_stride); - __builtin_prefetch(s + 3 * src_stride); - t0 = convolve8_vert_8x4(s0, s1, s2, s3, s4, s5, s6, s7, y_filter); - t1 = convolve8_vert_8x4(s1, s2, s3, s4, s5, s6, s7, s8, y_filter); - t2 = convolve8_vert_8x4(s2, s3, s4, s5, s6, s7, s8, s9, y_filter); - t3 = convolve8_vert_8x4(s3, s4, s5, s6, s7, s8, s9, s10, y_filter); - if (h != 2) { - vst1_u8(d, t0); - d += dst_stride; - vst1_u8(d, t1); - d += dst_stride; - vst1_u8(d, t2); - d += dst_stride; - vst1_u8(d, t3); - d += dst_stride; - } else { - vst1_u8(d, t0); - d += dst_stride; - vst1_u8(d, t1); - d += dst_stride; - } - s0 = s4; - s1 = s5; - s2 = s6; - s3 = s7; - s4 = s8; - s5 = s9; - s6 = s10; - height -= 4; -#else - __builtin_prefetch(d); - __builtin_prefetch(s); - - t0 = convolve8_vert_8x4(s0, s1, s2, s3, s4, s5, s6, s7, y_filter); - - vst1_u8(d, t0); - d += dst_stride; - - s0 = s1; - s1 = s2; - s2 = s3; - s3 = s4; - s4 = s5; - s5 = s6; - s6 = s7; - height -= 1; -#endif - } while (height > 0); - src += 8; - dst += 8; - w -= 8; - } while (w > 0); - } -} - -void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - int im_dst_stride; - int width, height; - uint8x8_t t0; -#if defined(__aarch64__) - uint8x8_t t1, t2, t3, t4, t5, t6, t7; -#endif - - DECLARE_ALIGNED(16, int16_t, - im_block[(MAX_SB_SIZE + HORIZ_EXTRA_ROWS) * MAX_SB_SIZE]); - - const int bd = 8; - const int im_h = h + filter_params_y->taps - 1; - const int im_stride = MAX_SB_SIZE; - const int vert_offset = filter_params_y->taps / 2 - 1; - const int horiz_offset = filter_params_x->taps / 2 - 1; - - const uint8_t *src_ptr = src - vert_offset * src_stride - horiz_offset; - const uint8_t *s; - int16_t *dst_ptr; - - dst_ptr = im_block; - im_dst_stride = im_stride; - height = im_h; - width = w; - - const int16_t round_bits = - FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1; - const int16x8_t vec_round_bits = vdupq_n_s16(-round_bits); - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( - filter_params_x, subpel_x_q4 & SUBPEL_MASK); - - int16_t x_filter_tmp[8]; - int16x8_t filter_x_coef = vld1q_s16(x_filter); - - // filter coeffs are even, so downshifting by 1 to reduce intermediate - // precision requirements. - filter_x_coef = vshrq_n_s16(filter_x_coef, 1); - vst1q_s16(&x_filter_tmp[0], filter_x_coef); - - assert(conv_params->round_0 > 0); - - if (w <= 4) { - int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, d0; -#if defined(__aarch64__) - int16x4_t s8, s9, s10, d1, d2, d3; -#endif - - const int16x4_t horiz_const = vdup_n_s16((1 << (bd + FILTER_BITS - 2))); - const int16x4_t shift_round_0 = vdup_n_s16(-(conv_params->round_0 - 1)); - - do { - s = src_ptr; - -#if defined(__aarch64__) - __builtin_prefetch(s + 0 * src_stride); - __builtin_prefetch(s + 1 * src_stride); - __builtin_prefetch(s + 2 * src_stride); - __builtin_prefetch(s + 3 * src_stride); - - load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3); - transpose_u8_8x4(&t0, &t1, &t2, &t3); - - s0 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0))); - s1 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1))); - s2 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2))); - s3 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3))); - s4 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t0))); - s5 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t1))); - s6 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t2))); - - __builtin_prefetch(dst_ptr + 0 * im_dst_stride); - __builtin_prefetch(dst_ptr + 1 * im_dst_stride); - __builtin_prefetch(dst_ptr + 2 * im_dst_stride); - __builtin_prefetch(dst_ptr + 3 * im_dst_stride); - s += 7; - - load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3); - transpose_u8_8x4(&t0, &t1, &t2, &t3); - - s7 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0))); - s8 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1))); - s9 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2))); - s10 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3))); - - d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp, - horiz_const, shift_round_0); - d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp, - horiz_const, shift_round_0); - d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp, - horiz_const, shift_round_0); - d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp, - horiz_const, shift_round_0); - - transpose_s16_4x4d(&d0, &d1, &d2, &d3); - if (w == 4) { - vst1_s16((dst_ptr + 0 * im_dst_stride), d0); - vst1_s16((dst_ptr + 1 * im_dst_stride), d1); - vst1_s16((dst_ptr + 2 * im_dst_stride), d2); - vst1_s16((dst_ptr + 3 * im_dst_stride), d3); - } else if (w == 2) { - vst1_lane_u32((uint32_t *)(dst_ptr + 0 * im_dst_stride), - vreinterpret_u32_s16(d0), 0); - vst1_lane_u32((uint32_t *)(dst_ptr + 1 * im_dst_stride), - vreinterpret_u32_s16(d1), 0); - vst1_lane_u32((uint32_t *)(dst_ptr + 2 * im_dst_stride), - vreinterpret_u32_s16(d2), 0); - vst1_lane_u32((uint32_t *)(dst_ptr + 3 * im_dst_stride), - vreinterpret_u32_s16(d3), 0); - } - src_ptr += 4 * src_stride; - dst_ptr += 4 * im_dst_stride; - height -= 4; -#else - int16x8_t tt0; - - __builtin_prefetch(s); - - t0 = vld1_u8(s); // a0 a1 a2 a3 a4 a5 a6 a7 - tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); - s0 = vget_low_s16(tt0); - s4 = vget_high_s16(tt0); - - __builtin_prefetch(dst_ptr); - s += 8; - - t0 = vld1_u8(s); // a8 a9 a10 a11 a12 a13 a14 a15 - s7 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0))); - - s1 = vext_s16(s0, s4, 1); // a1 a2 a3 a4 - s2 = vext_s16(s0, s4, 2); // a2 a3 a4 a5 - s3 = vext_s16(s0, s4, 3); // a3 a4 a5 a6 - s5 = vext_s16(s4, s7, 1); // a5 a6 a7 a8 - s6 = vext_s16(s4, s7, 2); // a6 a7 a8 a9 - s7 = vext_s16(s4, s7, 3); // a7 a8 a9 a10 - - d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp, - horiz_const, shift_round_0); - - if (w == 4) { - vst1_s16(dst_ptr, d0); - dst_ptr += im_dst_stride; - } else if (w == 2) { - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_s16(d0), 0); - dst_ptr += im_dst_stride; - } - - src_ptr += src_stride; - height -= 1; -#endif - } while (height > 0); - } else { - int16_t *d_tmp; - int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, res0; -#if defined(__aarch64__) - int16x8_t s8, s9, s10, res1, res2, res3, res4, res5, res6, res7; - int16x8_t s11, s12, s13, s14; -#endif - - const int16x8_t horiz_const = vdupq_n_s16((1 << (bd + FILTER_BITS - 2))); - const int16x8_t shift_round_0 = vdupq_n_s16(-(conv_params->round_0 - 1)); - -#if defined(__aarch64__) - do { - __builtin_prefetch(src_ptr + 0 * src_stride); - __builtin_prefetch(src_ptr + 1 * src_stride); - __builtin_prefetch(src_ptr + 2 * src_stride); - __builtin_prefetch(src_ptr + 3 * src_stride); - __builtin_prefetch(src_ptr + 4 * src_stride); - __builtin_prefetch(src_ptr + 5 * src_stride); - __builtin_prefetch(src_ptr + 6 * src_stride); - __builtin_prefetch(src_ptr + 7 * src_stride); - - load_u8_8x8(src_ptr, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - - transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - - s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); - s1 = vreinterpretq_s16_u16(vmovl_u8(t1)); - s2 = vreinterpretq_s16_u16(vmovl_u8(t2)); - s3 = vreinterpretq_s16_u16(vmovl_u8(t3)); - s4 = vreinterpretq_s16_u16(vmovl_u8(t4)); - s5 = vreinterpretq_s16_u16(vmovl_u8(t5)); - s6 = vreinterpretq_s16_u16(vmovl_u8(t6)); - - width = w; - s = src_ptr + 7; - d_tmp = dst_ptr; - - __builtin_prefetch(dst_ptr + 0 * im_dst_stride); - __builtin_prefetch(dst_ptr + 1 * im_dst_stride); - __builtin_prefetch(dst_ptr + 2 * im_dst_stride); - __builtin_prefetch(dst_ptr + 3 * im_dst_stride); - __builtin_prefetch(dst_ptr + 4 * im_dst_stride); - __builtin_prefetch(dst_ptr + 5 * im_dst_stride); - __builtin_prefetch(dst_ptr + 6 * im_dst_stride); - __builtin_prefetch(dst_ptr + 7 * im_dst_stride); - - do { - load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - - transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - - s7 = vreinterpretq_s16_u16(vmovl_u8(t0)); - s8 = vreinterpretq_s16_u16(vmovl_u8(t1)); - s9 = vreinterpretq_s16_u16(vmovl_u8(t2)); - s10 = vreinterpretq_s16_u16(vmovl_u8(t3)); - s11 = vreinterpretq_s16_u16(vmovl_u8(t4)); - s12 = vreinterpretq_s16_u16(vmovl_u8(t5)); - s13 = vreinterpretq_s16_u16(vmovl_u8(t6)); - s14 = vreinterpretq_s16_u16(vmovl_u8(t7)); - - res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp, - horiz_const, shift_round_0); - res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp, - horiz_const, shift_round_0); - res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp, - horiz_const, shift_round_0); - res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp, - horiz_const, shift_round_0); - res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, x_filter_tmp, - horiz_const, shift_round_0); - res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12, - x_filter_tmp, horiz_const, shift_round_0); - res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13, - x_filter_tmp, horiz_const, shift_round_0); - res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14, - x_filter_tmp, horiz_const, shift_round_0); - - transpose_s16_8x8(&res0, &res1, &res2, &res3, &res4, &res5, &res6, - &res7); - - store_s16_8x8(d_tmp, im_dst_stride, res0, res1, res2, res3, res4, res5, - res6, res7); - - s0 = s8; - s1 = s9; - s2 = s10; - s3 = s11; - s4 = s12; - s5 = s13; - s6 = s14; - s += 8; - d_tmp += 8; - width -= 8; - } while (width > 0); - src_ptr += 8 * src_stride; - dst_ptr += 8 * im_dst_stride; - height -= 8; - } while (height > 0); -#else - do { - t0 = vld1_u8(src_ptr); - s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); // a0 a1 a2 a3 a4 a5 a6 a7 - - width = w; - s = src_ptr + 8; - d_tmp = dst_ptr; - - __builtin_prefetch(dst_ptr); - - do { - t0 = vld1_u8(s); // a8 a9 a10 a11 a12 a13 a14 a15 - s7 = vreinterpretq_s16_u16(vmovl_u8(t0)); - int16x8_t sum = s0; - s0 = s7; - - s1 = vextq_s16(sum, s7, 1); // a1 a2 a3 a4 a5 a6 a7 a8 - s2 = vextq_s16(sum, s7, 2); // a2 a3 a4 a5 a6 a7 a8 a9 - s3 = vextq_s16(sum, s7, 3); // a3 a4 a5 a6 a7 a8 a9 a10 - s4 = vextq_s16(sum, s7, 4); // a4 a5 a6 a7 a8 a9 a10 a11 - s5 = vextq_s16(sum, s7, 5); // a5 a6 a7 a8 a9 a10 a11 a12 - s6 = vextq_s16(sum, s7, 6); // a6 a7 a8 a9 a10 a11 a12 a13 - s7 = vextq_s16(sum, s7, 7); // a7 a8 a9 a10 a11 a12 a13 a14 - - res0 = convolve8_8x8_s16(sum, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp, - horiz_const, shift_round_0); - - vst1q_s16(d_tmp, res0); - - s += 8; - d_tmp += 8; - width -= 8; - } while (width > 0); - src_ptr += src_stride; - dst_ptr += im_dst_stride; - height -= 1; - } while (height > 0); -#endif - } - - // vertical - { - uint8_t *dst_u8_ptr, *d_u8; - int16_t *v_src_ptr, *v_s; - - const int32_t sub_const = (1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1)); - const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( - filter_params_y, subpel_y_q4 & SUBPEL_MASK); - - const int32x4_t round_shift_vec = vdupq_n_s32(-(conv_params->round_1)); - const int32x4_t offset_const = vdupq_n_s32(1 << offset_bits); - const int32x4_t sub_const_vec = vdupq_n_s32(sub_const); - - src_stride = im_stride; - v_src_ptr = im_block; - dst_u8_ptr = dst; - - height = h; - width = w; - - if (width <= 4) { - int16x4_t s0, s1, s2, s3, s4, s5, s6, s7; - uint16x4_t d0; - uint16x8_t dd0; - uint8x8_t d01; - -#if defined(__aarch64__) - int16x4_t s8, s9, s10; - uint16x4_t d1, d2, d3; - uint16x8_t dd1; - uint8x8_t d23; -#endif - - d_u8 = dst_u8_ptr; - v_s = v_src_ptr; - - __builtin_prefetch(v_s + 0 * im_stride); - __builtin_prefetch(v_s + 1 * im_stride); - __builtin_prefetch(v_s + 2 * im_stride); - __builtin_prefetch(v_s + 3 * im_stride); - __builtin_prefetch(v_s + 4 * im_stride); - __builtin_prefetch(v_s + 5 * im_stride); - __builtin_prefetch(v_s + 6 * im_stride); - __builtin_prefetch(v_s + 7 * im_stride); - - load_s16_4x8(v_s, im_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7); - v_s += (7 * im_stride); - - do { -#if defined(__aarch64__) - load_s16_4x4(v_s, im_stride, &s7, &s8, &s9, &s10); - v_s += (im_stride << 2); - - __builtin_prefetch(d_u8 + 0 * dst_stride); - __builtin_prefetch(d_u8 + 1 * dst_stride); - __builtin_prefetch(d_u8 + 2 * dst_stride); - __builtin_prefetch(d_u8 + 3 * dst_stride); - - d0 = convolve8_vert_4x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter, - round_shift_vec, offset_const, - sub_const_vec); - d1 = convolve8_vert_4x4_s32(s1, s2, s3, s4, s5, s6, s7, s8, y_filter, - round_shift_vec, offset_const, - sub_const_vec); - d2 = convolve8_vert_4x4_s32(s2, s3, s4, s5, s6, s7, s8, s9, y_filter, - round_shift_vec, offset_const, - sub_const_vec); - d3 = convolve8_vert_4x4_s32(s3, s4, s5, s6, s7, s8, s9, s10, y_filter, - round_shift_vec, offset_const, - sub_const_vec); - - dd0 = vqrshlq_u16(vcombine_u16(d0, d1), vec_round_bits); - dd1 = vqrshlq_u16(vcombine_u16(d2, d3), vec_round_bits); - - d01 = vqmovn_u16(dd0); - d23 = vqmovn_u16(dd1); - - if ((w == 4) && (h != 2)) { - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01), - 0); // 00 01 02 03 - d_u8 += dst_stride; - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01), - 1); // 10 11 12 13 - d_u8 += dst_stride; - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d23), - 0); // 20 21 22 23 - d_u8 += dst_stride; - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d23), - 1); // 30 31 32 33 - d_u8 += dst_stride; - } else if ((w == 2) && (h != 2)) { - vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01), - 0); // 00 01 - d_u8 += dst_stride; - vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01), - 2); // 10 11 - d_u8 += dst_stride; - vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d23), - 0); // 20 21 - d_u8 += dst_stride; - vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d23), - 2); // 30 31 - d_u8 += dst_stride; - } else if ((w == 4) && (h == 2)) { - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01), - 0); // 00 01 02 03 - d_u8 += dst_stride; - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01), - 1); // 10 11 12 13 - d_u8 += dst_stride; - } else if ((w == 2) && (h == 2)) { - vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01), - 0); // 00 01 - d_u8 += dst_stride; - vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01), - 2); // 10 11 - d_u8 += dst_stride; - } - - s0 = s4; - s1 = s5; - s2 = s6; - s3 = s7; - s4 = s8; - s5 = s9; - s6 = s10; - height -= 4; -#else - s7 = vld1_s16(v_s); - v_s += im_stride; - - __builtin_prefetch(d_u8 + 0 * dst_stride); - - d0 = convolve8_vert_4x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter, - round_shift_vec, offset_const, - sub_const_vec); - - dd0 = vqrshlq_u16(vcombine_u16(d0, d0), vec_round_bits); - d01 = vqmovn_u16(dd0); - - if (w == 4) { - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01), - 0); // 00 01 02 03 - d_u8 += dst_stride; - - } else if (w == 2) { - vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01), - 0); // 00 01 - d_u8 += dst_stride; - } - - s0 = s1; - s1 = s2; - s2 = s3; - s3 = s4; - s4 = s5; - s5 = s6; - s6 = s7; - height -= 1; -#endif - } while (height > 0); - } else { - // if width is a multiple of 8 & height is a multiple of 4 - int16x8_t s0, s1, s2, s3, s4, s5, s6, s7; - uint8x8_t res0; -#if defined(__aarch64__) - int16x8_t s8, s9, s10; - uint8x8_t res1, res2, res3; -#endif - - do { - __builtin_prefetch(v_src_ptr + 0 * im_stride); - __builtin_prefetch(v_src_ptr + 1 * im_stride); - __builtin_prefetch(v_src_ptr + 2 * im_stride); - __builtin_prefetch(v_src_ptr + 3 * im_stride); - __builtin_prefetch(v_src_ptr + 4 * im_stride); - __builtin_prefetch(v_src_ptr + 5 * im_stride); - __builtin_prefetch(v_src_ptr + 6 * im_stride); - __builtin_prefetch(v_src_ptr + 7 * im_stride); - - v_s = v_src_ptr; - load_s16_8x8(v_s, im_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7); - v_s += (7 * im_stride); - - d_u8 = dst_u8_ptr; - height = h; - - do { -#if defined(__aarch64__) - load_s16_8x4(v_s, im_stride, &s7, &s8, &s9, &s10); - v_s += (im_stride << 2); - - __builtin_prefetch(d_u8 + 4 * dst_stride); - __builtin_prefetch(d_u8 + 5 * dst_stride); - __builtin_prefetch(d_u8 + 6 * dst_stride); - __builtin_prefetch(d_u8 + 7 * dst_stride); - - res0 = convolve8_vert_8x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, - y_filter, round_shift_vec, offset_const, - sub_const_vec, vec_round_bits); - res1 = convolve8_vert_8x4_s32(s1, s2, s3, s4, s5, s6, s7, s8, - y_filter, round_shift_vec, offset_const, - sub_const_vec, vec_round_bits); - res2 = convolve8_vert_8x4_s32(s2, s3, s4, s5, s6, s7, s8, s9, - y_filter, round_shift_vec, offset_const, - sub_const_vec, vec_round_bits); - res3 = convolve8_vert_8x4_s32(s3, s4, s5, s6, s7, s8, s9, s10, - y_filter, round_shift_vec, offset_const, - sub_const_vec, vec_round_bits); - - if (h != 2) { - vst1_u8(d_u8, res0); - d_u8 += dst_stride; - vst1_u8(d_u8, res1); - d_u8 += dst_stride; - vst1_u8(d_u8, res2); - d_u8 += dst_stride; - vst1_u8(d_u8, res3); - d_u8 += dst_stride; - } else { - vst1_u8(d_u8, res0); - d_u8 += dst_stride; - vst1_u8(d_u8, res1); - d_u8 += dst_stride; - } - s0 = s4; - s1 = s5; - s2 = s6; - s3 = s7; - s4 = s8; - s5 = s9; - s6 = s10; - height -= 4; -#else - s7 = vld1q_s16(v_s); - v_s += im_stride; - - __builtin_prefetch(d_u8 + 0 * dst_stride); - - res0 = convolve8_vert_8x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, - y_filter, round_shift_vec, offset_const, - sub_const_vec, vec_round_bits); - - vst1_u8(d_u8, res0); - d_u8 += dst_stride; - - s0 = s1; - s1 = s2; - s2 = s3; - s3 = s4; - s4 = s5; - s5 = s6; - s6 = s7; - height -= 1; -#endif - } while (height > 0); - v_src_ptr += 8; - dst_u8_ptr += 8; - w -= 8; - } while (w > 0); - } - } -} -void av1_convolve_2d_copy_sr_neon(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - (void)filter_params_x; - (void)filter_params_y; - (void)subpel_x_q4; - (void)subpel_y_q4; - (void)conv_params; - - const uint8_t *src1; - uint8_t *dst1; - int y; - - if (!(w & 0x0F)) { - for (y = 0; y < h; ++y) { - src1 = src; - dst1 = dst; - for (int x = 0; x < (w >> 4); ++x) { - vst1q_u8(dst1, vld1q_u8(src1)); - src1 += 16; - dst1 += 16; - } - src += src_stride; - dst += dst_stride; - } - } else if (!(w & 0x07)) { - for (y = 0; y < h; ++y) { - vst1_u8(dst, vld1_u8(src)); - src += src_stride; - dst += dst_stride; - } - } else if (!(w & 0x03)) { - for (y = 0; y < h; ++y) { - vst1_lane_u32((uint32_t *)(dst), vreinterpret_u32_u8(vld1_u8(src)), 0); - src += src_stride; - dst += dst_stride; - } - } else if (!(w & 0x01)) { - for (y = 0; y < h; ++y) { - vst1_lane_u16((uint16_t *)(dst), vreinterpret_u16_u8(vld1_u8(src)), 0); - src += src_stride; - dst += dst_stride; - } - } -} diff --git a/third_party/aom/av1/common/arm/convolve_neon.h b/third_party/aom/av1/common/arm/convolve_neon.h deleted file mode 100644 index f382984f2..000000000 --- a/third_party/aom/av1/common/arm/convolve_neon.h +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef AOM_AV1_COMMON_ARM_CONVOLVE_NEON_H_ -#define AOM_AV1_COMMON_ARM_CONVOLVE_NEON_H_ - -#include <arm_neon.h> - -#define HORIZ_EXTRA_ROWS ((SUBPEL_TAPS + 7) & ~0x07) - -static INLINE uint8x8_t wiener_convolve8_vert_4x8( - const int16x8_t s0, const int16x8_t s1, const int16x8_t s2, - const int16x8_t s3, const int16x8_t s4, const int16x8_t s5, - const int16x8_t s6, int16_t *filter_y, const int bd, - const int round1_bits) { - int16x8_t ss0, ss1, ss2; - int32x4_t sum0, sum1; - uint16x4_t tmp0, tmp1; - uint16x8_t tmp; - uint8x8_t res; - - const int32_t round_const = (1 << (bd + round1_bits - 1)); - const int32x4_t round_bits = vdupq_n_s32(-round1_bits); - const int32x4_t zero = vdupq_n_s32(0); - const int32x4_t round_vec = vdupq_n_s32(round_const); - - ss0 = vaddq_s16(s0, s6); - ss1 = vaddq_s16(s1, s5); - ss2 = vaddq_s16(s2, s4); - - sum0 = vmull_n_s16(vget_low_s16(ss0), filter_y[0]); - sum0 = vmlal_n_s16(sum0, vget_low_s16(ss1), filter_y[1]); - sum0 = vmlal_n_s16(sum0, vget_low_s16(ss2), filter_y[2]); - sum0 = vmlal_n_s16(sum0, vget_low_s16(s3), filter_y[3]); - - sum1 = vmull_n_s16(vget_high_s16(ss0), filter_y[0]); - sum1 = vmlal_n_s16(sum1, vget_high_s16(ss1), filter_y[1]); - sum1 = vmlal_n_s16(sum1, vget_high_s16(ss2), filter_y[2]); - sum1 = vmlal_n_s16(sum1, vget_high_s16(s3), filter_y[3]); - - sum0 = vsubq_s32(sum0, round_vec); - sum1 = vsubq_s32(sum1, round_vec); - - /* right shift & rounding */ - sum0 = vrshlq_s32(sum0, round_bits); - sum1 = vrshlq_s32(sum1, round_bits); - - sum0 = vmaxq_s32(sum0, zero); - sum1 = vmaxq_s32(sum1, zero); - - /* from int32x4_t to uint8x8_t */ - tmp0 = vqmovn_u32(vreinterpretq_u32_s32(sum0)); - tmp1 = vqmovn_u32(vreinterpretq_u32_s32(sum1)); - tmp = vcombine_u16(tmp0, tmp1); - res = vqmovn_u16(tmp); - - return res; -} - -static INLINE uint16x8_t wiener_convolve8_horiz_8x8( - const int16x8_t s0, const int16x8_t s1, const int16x8_t s2, - const int16x8_t s3, int16_t *filter_x, const int bd, - const int round0_bits) { - int16x8_t sum; - uint16x8_t res; - int32x4_t sum_0, sum_1; - int32x4_t s3_0, s3_1; - const int32_t round_const_0 = (1 << (bd + FILTER_BITS - 1)); - const int32_t round_const_1 = (1 << ((bd) + 1 + FILTER_BITS - round0_bits)); - - /* for the purpose of right shift by { conv_params->round_0 } */ - const int32x4_t round_bits = vdupq_n_s32(-round0_bits); - - const int32x4_t round_vec_0 = vdupq_n_s32(round_const_0); - const int32x4_t round_vec_1 = vdupq_n_s32(round_const_1); - - sum = vmulq_n_s16(s0, filter_x[0]); - sum = vmlaq_n_s16(sum, s1, filter_x[1]); - sum = vmlaq_n_s16(sum, s2, filter_x[2]); - - /* sum from 16x8 to 2 32x4 registers */ - sum_0 = vmovl_s16(vget_low_s16(sum)); - sum_1 = vmovl_s16(vget_high_s16(sum)); - - /* s[3]*128 -- and filter coef max can be 128 - * then max value possible = 128*128*255 exceeding 16 bit - */ - - s3_0 = vmull_n_s16(vget_low_s16(s3), filter_x[3]); - s3_1 = vmull_n_s16(vget_high_s16(s3), filter_x[3]); - sum_0 = vaddq_s32(sum_0, s3_0); - sum_1 = vaddq_s32(sum_1, s3_1); - - /* Add the constant value */ - sum_0 = vaddq_s32(sum_0, round_vec_0); - sum_1 = vaddq_s32(sum_1, round_vec_0); - - /* right shift & rounding & saturating */ - sum_0 = vqrshlq_s32(sum_0, round_bits); - sum_1 = vqrshlq_s32(sum_1, round_bits); - - /* Clipping to max value */ - sum_0 = vminq_s32(sum_0, round_vec_1); - sum_1 = vminq_s32(sum_1, round_vec_1); - - res = vcombine_u16(vqmovun_s32(sum_0), vqmovun_s32(sum_1)); - return res; -} - -static INLINE uint16x4_t wiener_convolve8_horiz_4x8( - const int16x4_t s0, const int16x4_t s1, const int16x4_t s2, - const int16x4_t s3, const int16x4_t s4, const int16x4_t s5, - const int16x4_t s6, int16_t *filter_x, const int bd, - const int round0_bits) { - uint16x4_t res; - int32x4_t sum_0, s3_0; - int16x4_t sum, temp0, temp1, temp2; - - const int32_t round_const_0 = (1 << (bd + FILTER_BITS - 1)); - const int32_t round_const_1 = (1 << ((bd) + 1 + FILTER_BITS - round0_bits)); - const int32x4_t round_bits = vdupq_n_s32(-round0_bits); - const int32x4_t zero = vdupq_n_s32(0); - const int32x4_t round_vec_0 = vdupq_n_s32(round_const_0); - const int32x4_t round_vec_1 = vdupq_n_s32(round_const_1); - - temp0 = vadd_s16(s0, s6); - temp1 = vadd_s16(s1, s5); - temp2 = vadd_s16(s2, s4); - - sum = vmul_n_s16(temp0, filter_x[0]); - sum = vmla_n_s16(sum, temp1, filter_x[1]); - sum = vmla_n_s16(sum, temp2, filter_x[2]); - sum_0 = vmovl_s16(sum); - - /* s[3]*128 -- and filter coff max can be 128. - * then max value possible = 128*128*255 Therefore, 32 bits are required to - * hold the result. - */ - s3_0 = vmull_n_s16(s3, filter_x[3]); - sum_0 = vaddq_s32(sum_0, s3_0); - - sum_0 = vaddq_s32(sum_0, round_vec_0); - sum_0 = vrshlq_s32(sum_0, round_bits); - - sum_0 = vmaxq_s32(sum_0, zero); - sum_0 = vminq_s32(sum_0, round_vec_1); - res = vqmovun_s32(sum_0); - return res; -} - -static INLINE int16x8_t -convolve8_8x8_s16(const int16x8_t s0, const int16x8_t s1, const int16x8_t s2, - const int16x8_t s3, const int16x8_t s4, const int16x8_t s5, - const int16x8_t s6, const int16x8_t s7, const int16_t *filter, - const int16x8_t horiz_const, const int16x8_t shift_round_0) { - int16x8_t sum; - int16x8_t res; - - sum = horiz_const; - sum = vmlaq_n_s16(sum, s0, filter[0]); - sum = vmlaq_n_s16(sum, s1, filter[1]); - sum = vmlaq_n_s16(sum, s2, filter[2]); - sum = vmlaq_n_s16(sum, s3, filter[3]); - sum = vmlaq_n_s16(sum, s4, filter[4]); - sum = vmlaq_n_s16(sum, s5, filter[5]); - sum = vmlaq_n_s16(sum, s6, filter[6]); - sum = vmlaq_n_s16(sum, s7, filter[7]); - - res = vqrshlq_s16(sum, shift_round_0); - - return res; -} - -static INLINE int16x4_t -convolve8_4x4_s16(const int16x4_t s0, const int16x4_t s1, const int16x4_t s2, - const int16x4_t s3, const int16x4_t s4, const int16x4_t s5, - const int16x4_t s6, const int16x4_t s7, const int16_t *filter, - const int16x4_t horiz_const, const int16x4_t shift_round_0) { - int16x4_t sum; - sum = horiz_const; - sum = vmla_n_s16(sum, s0, filter[0]); - sum = vmla_n_s16(sum, s1, filter[1]); - sum = vmla_n_s16(sum, s2, filter[2]); - sum = vmla_n_s16(sum, s3, filter[3]); - sum = vmla_n_s16(sum, s4, filter[4]); - sum = vmla_n_s16(sum, s5, filter[5]); - sum = vmla_n_s16(sum, s6, filter[6]); - sum = vmla_n_s16(sum, s7, filter[7]); - - sum = vqrshl_s16(sum, shift_round_0); - - return sum; -} - -static INLINE uint16x4_t convolve8_4x4_s32( - const int16x4_t s0, const int16x4_t s1, const int16x4_t s2, - const int16x4_t s3, const int16x4_t s4, const int16x4_t s5, - const int16x4_t s6, const int16x4_t s7, const int16_t *y_filter, - const int32x4_t round_shift_vec, const int32x4_t offset_const) { - int32x4_t sum0; - uint16x4_t res; - const int32x4_t zero = vdupq_n_s32(0); - - sum0 = vmull_n_s16(s0, y_filter[0]); - sum0 = vmlal_n_s16(sum0, s1, y_filter[1]); - sum0 = vmlal_n_s16(sum0, s2, y_filter[2]); - sum0 = vmlal_n_s16(sum0, s3, y_filter[3]); - sum0 = vmlal_n_s16(sum0, s4, y_filter[4]); - sum0 = vmlal_n_s16(sum0, s5, y_filter[5]); - sum0 = vmlal_n_s16(sum0, s6, y_filter[6]); - sum0 = vmlal_n_s16(sum0, s7, y_filter[7]); - - sum0 = vaddq_s32(sum0, offset_const); - sum0 = vqrshlq_s32(sum0, round_shift_vec); - sum0 = vmaxq_s32(sum0, zero); - res = vmovn_u32(vreinterpretq_u32_s32(sum0)); - - return res; -} - -#endif // AOM_AV1_COMMON_ARM_CONVOLVE_NEON_H_ diff --git a/third_party/aom/av1/common/arm/jnt_convolve_neon.c b/third_party/aom/av1/common/arm/jnt_convolve_neon.c deleted file mode 100644 index e5674ef7c..000000000 --- a/third_party/aom/av1/common/arm/jnt_convolve_neon.c +++ /dev/null @@ -1,1740 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <arm_neon.h> -#include <assert.h> - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" - -#include "aom_dsp/txfm_common.h" -#include "aom_ports/mem.h" -#include "av1/common/common.h" -#include "av1/common/arm/convolve_neon.h" -#include "av1/common/arm/mem_neon.h" -#include "av1/common/arm/transpose_neon.h" - -#if !defined(__aarch64__) -static INLINE void compute_avg_4x1(uint16x4_t res0, uint16x4_t d0, - const uint16_t fwd_offset, - const uint16_t bck_offset, - const int16x4_t sub_const_vec, - const int16_t round_bits, - const int use_jnt_comp_avg, uint8x8_t *t0) { - int16x4_t tmp0; - uint16x4_t tmp_u0; - uint32x4_t sum0; - int32x4_t dst0; - int16x8_t tmp4; - - if (use_jnt_comp_avg) { - const int32x4_t round_bits_vec = vdupq_n_s32((int32_t)(-round_bits)); - - sum0 = vmull_n_u16(res0, fwd_offset); - sum0 = vmlal_n_u16(sum0, d0, bck_offset); - - sum0 = vshrq_n_u32(sum0, DIST_PRECISION_BITS); - - dst0 = vsubq_s32(vreinterpretq_s32_u32(sum0), vmovl_s16(sub_const_vec)); - - dst0 = vqrshlq_s32(dst0, round_bits_vec); - - tmp0 = vqmovn_s32(dst0); - tmp4 = vcombine_s16(tmp0, tmp0); - - *t0 = vqmovun_s16(tmp4); - } else { - const int16x4_t round_bits_vec = vdup_n_s16(-round_bits); - tmp_u0 = vhadd_u16(res0, d0); - - tmp0 = vsub_s16(vreinterpret_s16_u16(tmp_u0), sub_const_vec); - - tmp0 = vqrshl_s16(tmp0, round_bits_vec); - - tmp4 = vcombine_s16(tmp0, tmp0); - - *t0 = vqmovun_s16(tmp4); - } -} - -static INLINE void compute_avg_8x1(uint16x8_t res0, uint16x8_t d0, - const uint16_t fwd_offset, - const uint16_t bck_offset, - const int16x4_t sub_const, - const int16_t round_bits, - const int use_jnt_comp_avg, uint8x8_t *t0) { - int16x4_t tmp0, tmp2; - int16x8_t f0; - uint32x4_t sum0, sum2; - int32x4_t dst0, dst2; - - uint16x8_t tmp_u0; - - if (use_jnt_comp_avg) { - const int32x4_t sub_const_vec = vmovl_s16(sub_const); - const int32x4_t round_bits_vec = vdupq_n_s32(-(int32_t)round_bits); - - sum0 = vmull_n_u16(vget_low_u16(res0), fwd_offset); - sum0 = vmlal_n_u16(sum0, vget_low_u16(d0), bck_offset); - sum0 = vshrq_n_u32(sum0, DIST_PRECISION_BITS); - - sum2 = vmull_n_u16(vget_high_u16(res0), fwd_offset); - sum2 = vmlal_n_u16(sum2, vget_high_u16(d0), bck_offset); - sum2 = vshrq_n_u32(sum2, DIST_PRECISION_BITS); - - dst0 = vsubq_s32(vreinterpretq_s32_u32(sum0), sub_const_vec); - dst2 = vsubq_s32(vreinterpretq_s32_u32(sum2), sub_const_vec); - - dst0 = vqrshlq_s32(dst0, round_bits_vec); - dst2 = vqrshlq_s32(dst2, round_bits_vec); - - tmp0 = vqmovn_s32(dst0); - tmp2 = vqmovn_s32(dst2); - - f0 = vcombine_s16(tmp0, tmp2); - - *t0 = vqmovun_s16(f0); - - } else { - const int16x8_t sub_const_vec = vcombine_s16(sub_const, sub_const); - const int16x8_t round_bits_vec = vdupq_n_s16(-round_bits); - - tmp_u0 = vhaddq_u16(res0, d0); - - f0 = vsubq_s16(vreinterpretq_s16_u16(tmp_u0), sub_const_vec); - - f0 = vqrshlq_s16(f0, round_bits_vec); - - *t0 = vqmovun_s16(f0); - } -} -#endif // !defined(__arch64__) - -static INLINE void compute_avg_4x4( - uint16x4_t res0, uint16x4_t res1, uint16x4_t res2, uint16x4_t res3, - uint16x4_t d0, uint16x4_t d1, uint16x4_t d2, uint16x4_t d3, - const uint16_t fwd_offset, const uint16_t bck_offset, - const int16x4_t sub_const_vec, const int16_t round_bits, - const int use_jnt_comp_avg, uint8x8_t *t0, uint8x8_t *t1) { - int16x4_t tmp0, tmp1, tmp2, tmp3; - uint16x4_t tmp_u0, tmp_u1, tmp_u2, tmp_u3; - uint32x4_t sum0, sum1, sum2, sum3; - - int32x4_t dst0, dst1, dst2, dst3; - int16x8_t tmp4, tmp5; - const int16x8_t zero = vdupq_n_s16(0); - - if (use_jnt_comp_avg) { - const int32x4_t round_bits_vec = vdupq_n_s32((int32_t)(-round_bits)); - const int32x4_t const_vec = vmovl_s16(sub_const_vec); - - sum0 = vmull_n_u16(res0, fwd_offset); - sum0 = vmlal_n_u16(sum0, d0, bck_offset); - sum1 = vmull_n_u16(res1, fwd_offset); - sum1 = vmlal_n_u16(sum1, d1, bck_offset); - sum2 = vmull_n_u16(res2, fwd_offset); - sum2 = vmlal_n_u16(sum2, d2, bck_offset); - sum3 = vmull_n_u16(res3, fwd_offset); - sum3 = vmlal_n_u16(sum3, d3, bck_offset); - - sum0 = vshrq_n_u32(sum0, DIST_PRECISION_BITS); - sum1 = vshrq_n_u32(sum1, DIST_PRECISION_BITS); - sum2 = vshrq_n_u32(sum2, DIST_PRECISION_BITS); - sum3 = vshrq_n_u32(sum3, DIST_PRECISION_BITS); - - dst0 = vsubq_s32(vreinterpretq_s32_u32(sum0), const_vec); - dst1 = vsubq_s32(vreinterpretq_s32_u32(sum1), const_vec); - dst2 = vsubq_s32(vreinterpretq_s32_u32(sum2), const_vec); - dst3 = vsubq_s32(vreinterpretq_s32_u32(sum3), const_vec); - - dst0 = vqrshlq_s32(dst0, round_bits_vec); - dst1 = vqrshlq_s32(dst1, round_bits_vec); - dst2 = vqrshlq_s32(dst2, round_bits_vec); - dst3 = vqrshlq_s32(dst3, round_bits_vec); - - tmp0 = vqmovn_s32(dst0); - tmp1 = vqmovn_s32(dst1); - tmp2 = vqmovn_s32(dst2); - tmp3 = vqmovn_s32(dst3); - tmp4 = vcombine_s16(tmp0, tmp1); - tmp5 = vcombine_s16(tmp2, tmp3); - tmp4 = vmaxq_s16(tmp4, zero); - tmp5 = vmaxq_s16(tmp5, zero); - - *t0 = vqmovn_u16(vreinterpretq_u16_s16(tmp4)); - *t1 = vqmovn_u16(vreinterpretq_u16_s16(tmp5)); - } else { - const int16x4_t round_bits_vec = vdup_n_s16(-round_bits); - tmp_u0 = vhadd_u16(res0, d0); - tmp_u1 = vhadd_u16(res1, d1); - tmp_u2 = vhadd_u16(res2, d2); - tmp_u3 = vhadd_u16(res3, d3); - - tmp0 = vsub_s16(vreinterpret_s16_u16(tmp_u0), sub_const_vec); - tmp1 = vsub_s16(vreinterpret_s16_u16(tmp_u1), sub_const_vec); - tmp2 = vsub_s16(vreinterpret_s16_u16(tmp_u2), sub_const_vec); - tmp3 = vsub_s16(vreinterpret_s16_u16(tmp_u3), sub_const_vec); - - tmp0 = vqrshl_s16(tmp0, round_bits_vec); - tmp1 = vqrshl_s16(tmp1, round_bits_vec); - tmp2 = vqrshl_s16(tmp2, round_bits_vec); - tmp3 = vqrshl_s16(tmp3, round_bits_vec); - - tmp4 = vcombine_s16(tmp0, tmp1); - tmp5 = vcombine_s16(tmp2, tmp3); - tmp4 = vmaxq_s16(tmp4, zero); - tmp5 = vmaxq_s16(tmp5, zero); - - *t0 = vqmovn_u16(vreinterpretq_u16_s16(tmp4)); - *t1 = vqmovn_u16(vreinterpretq_u16_s16(tmp5)); - } -} - -static INLINE void compute_avg_8x4( - uint16x8_t res0, uint16x8_t res1, uint16x8_t res2, uint16x8_t res3, - uint16x8_t d0, uint16x8_t d1, uint16x8_t d2, uint16x8_t d3, - const uint16_t fwd_offset, const uint16_t bck_offset, - const int16x4_t sub_const, const int16_t round_bits, - const int use_jnt_comp_avg, uint8x8_t *t0, uint8x8_t *t1, uint8x8_t *t2, - uint8x8_t *t3) { - int16x4_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - int16x8_t f0, f1, f2, f3; - uint32x4_t sum0, sum1, sum2, sum3; - uint32x4_t sum4, sum5, sum6, sum7; - int32x4_t dst0, dst1, dst2, dst3; - int32x4_t dst4, dst5, dst6, dst7; - uint16x8_t tmp_u0, tmp_u1, tmp_u2, tmp_u3; - const int16x8_t zero = vdupq_n_s16(0); - - if (use_jnt_comp_avg) { - const int32x4_t sub_const_vec = vmovl_s16(sub_const); - const int32x4_t round_bits_vec = vdupq_n_s32(-(int32_t)round_bits); - - sum0 = vmull_n_u16(vget_low_u16(res0), fwd_offset); - sum0 = vmlal_n_u16(sum0, vget_low_u16(d0), bck_offset); - sum1 = vmull_n_u16(vget_low_u16(res1), fwd_offset); - sum1 = vmlal_n_u16(sum1, vget_low_u16(d1), bck_offset); - sum0 = vshrq_n_u32(sum0, DIST_PRECISION_BITS); - sum1 = vshrq_n_u32(sum1, DIST_PRECISION_BITS); - - sum2 = vmull_n_u16(vget_high_u16(res0), fwd_offset); - sum2 = vmlal_n_u16(sum2, vget_high_u16(d0), bck_offset); - sum3 = vmull_n_u16(vget_high_u16(res1), fwd_offset); - sum3 = vmlal_n_u16(sum3, vget_high_u16(d1), bck_offset); - sum2 = vshrq_n_u32(sum2, DIST_PRECISION_BITS); - sum3 = vshrq_n_u32(sum3, DIST_PRECISION_BITS); - - sum4 = vmull_n_u16(vget_low_u16(res2), fwd_offset); - sum4 = vmlal_n_u16(sum4, vget_low_u16(d2), bck_offset); - sum5 = vmull_n_u16(vget_low_u16(res3), fwd_offset); - sum5 = vmlal_n_u16(sum5, vget_low_u16(d3), bck_offset); - sum4 = vshrq_n_u32(sum4, DIST_PRECISION_BITS); - sum5 = vshrq_n_u32(sum5, DIST_PRECISION_BITS); - - sum6 = vmull_n_u16(vget_high_u16(res2), fwd_offset); - sum6 = vmlal_n_u16(sum6, vget_high_u16(d2), bck_offset); - sum7 = vmull_n_u16(vget_high_u16(res3), fwd_offset); - sum7 = vmlal_n_u16(sum7, vget_high_u16(d3), bck_offset); - sum6 = vshrq_n_u32(sum6, DIST_PRECISION_BITS); - sum7 = vshrq_n_u32(sum7, DIST_PRECISION_BITS); - - dst0 = vsubq_s32(vreinterpretq_s32_u32(sum0), sub_const_vec); - dst1 = vsubq_s32(vreinterpretq_s32_u32(sum1), sub_const_vec); - dst2 = vsubq_s32(vreinterpretq_s32_u32(sum2), sub_const_vec); - dst3 = vsubq_s32(vreinterpretq_s32_u32(sum3), sub_const_vec); - dst4 = vsubq_s32(vreinterpretq_s32_u32(sum4), sub_const_vec); - dst5 = vsubq_s32(vreinterpretq_s32_u32(sum5), sub_const_vec); - dst6 = vsubq_s32(vreinterpretq_s32_u32(sum6), sub_const_vec); - dst7 = vsubq_s32(vreinterpretq_s32_u32(sum7), sub_const_vec); - - dst0 = vqrshlq_s32(dst0, round_bits_vec); - dst1 = vqrshlq_s32(dst1, round_bits_vec); - dst2 = vqrshlq_s32(dst2, round_bits_vec); - dst3 = vqrshlq_s32(dst3, round_bits_vec); - dst4 = vqrshlq_s32(dst4, round_bits_vec); - dst5 = vqrshlq_s32(dst5, round_bits_vec); - dst6 = vqrshlq_s32(dst6, round_bits_vec); - dst7 = vqrshlq_s32(dst7, round_bits_vec); - - tmp0 = vqmovn_s32(dst0); - tmp1 = vqmovn_s32(dst1); - tmp2 = vqmovn_s32(dst2); - tmp3 = vqmovn_s32(dst3); - tmp4 = vqmovn_s32(dst4); - tmp5 = vqmovn_s32(dst5); - tmp6 = vqmovn_s32(dst6); - tmp7 = vqmovn_s32(dst7); - - f0 = vcombine_s16(tmp0, tmp2); - f1 = vcombine_s16(tmp1, tmp3); - f2 = vcombine_s16(tmp4, tmp6); - f3 = vcombine_s16(tmp5, tmp7); - - f0 = vmaxq_s16(f0, zero); - f1 = vmaxq_s16(f1, zero); - f2 = vmaxq_s16(f2, zero); - f3 = vmaxq_s16(f3, zero); - - *t0 = vqmovn_u16(vreinterpretq_u16_s16(f0)); - *t1 = vqmovn_u16(vreinterpretq_u16_s16(f1)); - *t2 = vqmovn_u16(vreinterpretq_u16_s16(f2)); - *t3 = vqmovn_u16(vreinterpretq_u16_s16(f3)); - - } else { - const int16x8_t sub_const_vec = vcombine_s16(sub_const, sub_const); - const int16x8_t round_bits_vec = vdupq_n_s16(-round_bits); - - tmp_u0 = vhaddq_u16(res0, d0); - tmp_u1 = vhaddq_u16(res1, d1); - tmp_u2 = vhaddq_u16(res2, d2); - tmp_u3 = vhaddq_u16(res3, d3); - - f0 = vsubq_s16(vreinterpretq_s16_u16(tmp_u0), sub_const_vec); - f1 = vsubq_s16(vreinterpretq_s16_u16(tmp_u1), sub_const_vec); - f2 = vsubq_s16(vreinterpretq_s16_u16(tmp_u2), sub_const_vec); - f3 = vsubq_s16(vreinterpretq_s16_u16(tmp_u3), sub_const_vec); - - f0 = vqrshlq_s16(f0, round_bits_vec); - f1 = vqrshlq_s16(f1, round_bits_vec); - f2 = vqrshlq_s16(f2, round_bits_vec); - f3 = vqrshlq_s16(f3, round_bits_vec); - - f0 = vmaxq_s16(f0, zero); - f1 = vmaxq_s16(f1, zero); - f2 = vmaxq_s16(f2, zero); - f3 = vmaxq_s16(f3, zero); - - *t0 = vqmovn_u16(vreinterpretq_u16_s16(f0)); - *t1 = vqmovn_u16(vreinterpretq_u16_s16(f1)); - *t2 = vqmovn_u16(vreinterpretq_u16_s16(f2)); - *t3 = vqmovn_u16(vreinterpretq_u16_s16(f3)); - } -} - -static INLINE void jnt_convolve_2d_horiz_neon( - const uint8_t *src, int src_stride, int16_t *im_block, const int im_stride, - int16_t *x_filter_tmp, const int im_h, int w, const int round_0) { - const int bd = 8; - const uint8_t *s; - int16_t *dst_ptr; - int dst_stride; - int width, height; - - dst_ptr = im_block; - dst_stride = im_stride; - height = im_h; - width = w; - - if (w == 4) { - int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, d0; - int16x8_t tt0; - uint8x8_t t0; - - const int16x4_t horiz_const = vdup_n_s16((1 << (bd + FILTER_BITS - 2))); - const int16x4_t shift_round_0 = vdup_n_s16(-(round_0)); - -#if defined(__aarch64__) - int16x4_t s8, s9, s10, d1, d2, d3; - int16x8_t tt1, tt2, tt3; - uint8x8_t t1, t2, t3; -#endif - do { - s = src; - __builtin_prefetch(s + 0 * src_stride); -#if defined(__aarch64__) - __builtin_prefetch(s + 1 * src_stride); - __builtin_prefetch(s + 2 * src_stride); - __builtin_prefetch(s + 3 * src_stride); - - load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3); - transpose_u8_8x4(&t0, &t1, &t2, &t3); - tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); - tt1 = vreinterpretq_s16_u16(vmovl_u8(t1)); - tt2 = vreinterpretq_s16_u16(vmovl_u8(t2)); - tt3 = vreinterpretq_s16_u16(vmovl_u8(t3)); - s0 = vget_low_s16(tt0); - s1 = vget_low_s16(tt1); - s2 = vget_low_s16(tt2); - s3 = vget_low_s16(tt3); - s4 = vget_high_s16(tt0); - s5 = vget_high_s16(tt1); - s6 = vget_high_s16(tt2); - __builtin_prefetch(dst_ptr + 0 * dst_stride); - __builtin_prefetch(dst_ptr + 1 * dst_stride); - __builtin_prefetch(dst_ptr + 2 * dst_stride); - __builtin_prefetch(dst_ptr + 3 * dst_stride); - s += 7; - - load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3); - transpose_u8_8x4(&t0, &t1, &t2, &t3); - tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); - tt1 = vreinterpretq_s16_u16(vmovl_u8(t1)); - tt2 = vreinterpretq_s16_u16(vmovl_u8(t2)); - tt3 = vreinterpretq_s16_u16(vmovl_u8(t3)); - s7 = vget_low_s16(tt0); - s8 = vget_low_s16(tt1); - s9 = vget_low_s16(tt2); - s10 = vget_low_s16(tt3); - - d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp, - horiz_const, shift_round_0); - d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp, - horiz_const, shift_round_0); - d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp, - horiz_const, shift_round_0); - d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp, - horiz_const, shift_round_0); - - transpose_s16_4x4d(&d0, &d1, &d2, &d3); - - vst1_s16((dst_ptr + 0 * dst_stride), d0); - vst1_s16((dst_ptr + 1 * dst_stride), d1); - vst1_s16((dst_ptr + 2 * dst_stride), d2); - vst1_s16((dst_ptr + 3 * dst_stride), d3); - - src += 4 * src_stride; - dst_ptr += 4 * dst_stride; - height -= 4; -#else - t0 = vld1_u8(s); // a0 a1 a2 a3 a4 a5 a6 a7 - tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); // a0 a1 a2 a3 a4 a5 a6 a7 - s0 = vget_low_s16(tt0); // a0 a1 a2 a3 - s4 = vget_high_s16(tt0); // a4 a5 a6 a7 - __builtin_prefetch(dst_ptr); - s += 8; - t0 = vld1_u8(s); // a8 a9 a10 a11 - - // a8 a9 a10 a11 - s7 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0))); - - s1 = vext_s16(s0, s4, 1); // a1 a2 a3 a4 - s2 = vext_s16(s0, s4, 2); // a2 a3 a4 a5 - s3 = vext_s16(s0, s4, 3); // a3 a4 a5 a6 - s5 = vext_s16(s4, s7, 1); // a5 a6 a7 a8 - s6 = vext_s16(s4, s7, 2); // a6 a7 a8 a9 - s7 = vext_s16(s4, s7, 3); // a7 a8 a9 a10 - - d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp, - horiz_const, shift_round_0); - - vst1_s16(dst_ptr, d0); - - src += src_stride; - dst_ptr += dst_stride; - height -= 1; -#endif - } while (height > 0); - } else { - int16_t *d_tmp; - int16x8_t s0, s1, s2, s3, s4, s5, s6, s7; - int16x8_t res0; - uint8x8_t t0; - - const int16x8_t horiz_const = vdupq_n_s16((1 << (bd + FILTER_BITS - 2))); - const int16x8_t shift_round_0 = vdupq_n_s16(-(round_0)); - do { -#if defined(__aarch64__) - uint8x8_t t1, t2, t3, t4, t5, t6, t7; - int16x8_t s8, s9, s10, s11, s12, s13, s14; - int16x8_t res1, res2, res3, res4, res5, res6, res7; - __builtin_prefetch(src + 0 * src_stride); - __builtin_prefetch(src + 1 * src_stride); - __builtin_prefetch(src + 2 * src_stride); - __builtin_prefetch(src + 3 * src_stride); - __builtin_prefetch(src + 4 * src_stride); - __builtin_prefetch(src + 5 * src_stride); - __builtin_prefetch(src + 6 * src_stride); - __builtin_prefetch(src + 7 * src_stride); - load_u8_8x8(src, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); - s1 = vreinterpretq_s16_u16(vmovl_u8(t1)); - s2 = vreinterpretq_s16_u16(vmovl_u8(t2)); - s3 = vreinterpretq_s16_u16(vmovl_u8(t3)); - s4 = vreinterpretq_s16_u16(vmovl_u8(t4)); - s5 = vreinterpretq_s16_u16(vmovl_u8(t5)); - s6 = vreinterpretq_s16_u16(vmovl_u8(t6)); - - width = w; - s = src + 7; - d_tmp = dst_ptr; - __builtin_prefetch(dst_ptr + 0 * dst_stride); - __builtin_prefetch(dst_ptr + 1 * dst_stride); - __builtin_prefetch(dst_ptr + 2 * dst_stride); - __builtin_prefetch(dst_ptr + 3 * dst_stride); - __builtin_prefetch(dst_ptr + 4 * dst_stride); - __builtin_prefetch(dst_ptr + 5 * dst_stride); - __builtin_prefetch(dst_ptr + 6 * dst_stride); - __builtin_prefetch(dst_ptr + 7 * dst_stride); - - do { - load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - s7 = vreinterpretq_s16_u16(vmovl_u8(t0)); - s8 = vreinterpretq_s16_u16(vmovl_u8(t1)); - s9 = vreinterpretq_s16_u16(vmovl_u8(t2)); - s10 = vreinterpretq_s16_u16(vmovl_u8(t3)); - s11 = vreinterpretq_s16_u16(vmovl_u8(t4)); - s12 = vreinterpretq_s16_u16(vmovl_u8(t5)); - s13 = vreinterpretq_s16_u16(vmovl_u8(t6)); - s14 = vreinterpretq_s16_u16(vmovl_u8(t7)); - - res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp, - horiz_const, shift_round_0); - res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp, - horiz_const, shift_round_0); - res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp, - horiz_const, shift_round_0); - res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp, - horiz_const, shift_round_0); - res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, x_filter_tmp, - horiz_const, shift_round_0); - res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12, - x_filter_tmp, horiz_const, shift_round_0); - res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13, - x_filter_tmp, horiz_const, shift_round_0); - res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14, - x_filter_tmp, horiz_const, shift_round_0); - - transpose_s16_8x8(&res0, &res1, &res2, &res3, &res4, &res5, &res6, - &res7); - - store_s16_8x8(d_tmp, dst_stride, res0, res1, res2, res3, res4, res5, - res6, res7); - s0 = s8; - s1 = s9; - s2 = s10; - s3 = s11; - s4 = s12; - s5 = s13; - s6 = s14; - s += 8; - d_tmp += 8; - width -= 8; - } while (width > 0); - src += 8 * src_stride; - dst_ptr += 8 * dst_stride; - height -= 8; -#else - int16x8_t temp_0; - t0 = vld1_u8(src); - s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); // a0 a1 a2 a3 a4 a5 a6 a7 - - width = w; - s = src + 8; - d_tmp = dst_ptr; - __builtin_prefetch(dst_ptr); - - do { - t0 = vld1_u8(s); // a8 a9 a10 a11 a12 a13 a14 a15 - s7 = vreinterpretq_s16_u16(vmovl_u8(t0)); - temp_0 = s0; - s0 = s7; - - s1 = vextq_s16(temp_0, s7, 1); // a1 a2 a3 a4 a5 a6 a7 a8 - s2 = vextq_s16(temp_0, s7, 2); // a2 a3 a4 a5 a6 a7 a8 a9 - s3 = vextq_s16(temp_0, s7, 3); // a3 a4 a5 a6 a7 a8 a9 a10 - s4 = vextq_s16(temp_0, s7, 4); // a4 a5 a6 a7 a8 a9 a10 a11 - s5 = vextq_s16(temp_0, s7, 5); // a5 a6 a7 a8 a9 a10 a11 a12 - s6 = vextq_s16(temp_0, s7, 6); // a6 a7 a8 a9 a10 a11 a12 a13 - s7 = vextq_s16(temp_0, s7, 7); // a7 a8 a9 a10 a11 a12 a13 a14 - - res0 = convolve8_8x8_s16(temp_0, s1, s2, s3, s4, s5, s6, s7, - x_filter_tmp, horiz_const, shift_round_0); - vst1q_s16(d_tmp, res0); - - s += 8; - d_tmp += 8; - width -= 8; - } while (width > 0); - src += src_stride; - dst_ptr += dst_stride; - height -= 1; -#endif - } while (height > 0); - } -} - -static INLINE void jnt_convolve_2d_vert_neon( - int16_t *im_block, const int im_stride, uint8_t *dst8, int dst8_stride, - ConvolveParams *conv_params, const int16_t *y_filter, int h, int w) { - uint8_t *dst_u8_ptr, *d_u8; - CONV_BUF_TYPE *dst_ptr, *dst; - int16_t *src_ptr, *s; - uint16_t *d; - - const int bd = 8; - int height; - int dst_stride = conv_params->dst_stride; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - const int16_t sub_const = (1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1)); - - const int16_t round_bits = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset = bd + 2 * FILTER_BITS - conv_params->round_0; - const int32x4_t round_shift_vec = vdupq_n_s32(-(conv_params->round_1)); - const int32x4_t offset_const = vdupq_n_s32(1 << offset); - const int16x4_t sub_const_vec = vdup_n_s16(sub_const); - const uint16_t fwd_offset = conv_params->fwd_offset; - const uint16_t bck_offset = conv_params->bck_offset; - const int do_average = conv_params->do_average; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - - int16x4_t s0, s1, s2, s3, s4, s5, s6, s7; - uint16x4_t res4, d0; - uint8x8_t t0; - -#if defined(__aarch64__) - int16x4_t s8, s9, s10; - uint16x4_t res5, res6, res7, d1, d2, d3; - uint8x8_t t1; -#endif - - dst = conv_params->dst; - src_ptr = im_block; - dst_u8_ptr = dst8; - dst_ptr = dst; - height = h; - - do { - d = dst_ptr; - d_u8 = dst_u8_ptr; - s = src_ptr; - height = h; - - __builtin_prefetch(s + 0 * im_stride); - __builtin_prefetch(s + 1 * im_stride); - __builtin_prefetch(s + 2 * im_stride); - __builtin_prefetch(s + 3 * im_stride); - __builtin_prefetch(s + 4 * im_stride); - __builtin_prefetch(s + 5 * im_stride); - __builtin_prefetch(s + 6 * im_stride); - __builtin_prefetch(s + 7 * im_stride); - - load_s16_4x8(s, im_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7); - s += (7 * im_stride); - - do { -#if defined(__aarch64__) - load_s16_4x4(s, im_stride, &s7, &s8, &s9, &s10); - s += (im_stride << 2); - - __builtin_prefetch(d + 0 * dst_stride); - __builtin_prefetch(d + 1 * dst_stride); - __builtin_prefetch(d + 2 * dst_stride); - __builtin_prefetch(d + 3 * dst_stride); - - __builtin_prefetch(d_u8 + 4 * dst8_stride); - __builtin_prefetch(d_u8 + 5 * dst8_stride); - __builtin_prefetch(d_u8 + 6 * dst8_stride); - __builtin_prefetch(d_u8 + 7 * dst8_stride); - - d0 = convolve8_4x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter, - round_shift_vec, offset_const); - d1 = convolve8_4x4_s32(s1, s2, s3, s4, s5, s6, s7, s8, y_filter, - round_shift_vec, offset_const); - d2 = convolve8_4x4_s32(s2, s3, s4, s5, s6, s7, s8, s9, y_filter, - round_shift_vec, offset_const); - d3 = convolve8_4x4_s32(s3, s4, s5, s6, s7, s8, s9, s10, y_filter, - round_shift_vec, offset_const); - - if (do_average) { - load_u16_4x4(d, dst_stride, &res4, &res5, &res6, &res7); - d += (dst_stride << 2); - - compute_avg_4x4(res4, res5, res6, res7, d0, d1, d2, d3, fwd_offset, - bck_offset, sub_const_vec, round_bits, use_jnt_comp_avg, - &t0, &t1); - - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0), 0); - d_u8 += dst8_stride; - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0), 1); - d_u8 += dst8_stride; - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t1), 0); - d_u8 += dst8_stride; - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t1), 1); - d_u8 += dst8_stride; - - } else { - store_u16_4x4(d, dst_stride, d0, d1, d2, d3); - d += (dst_stride << 2); - } - s0 = s4; - s1 = s5; - s2 = s6; - s3 = s7; - s4 = s8; - s5 = s9; - s6 = s10; - height -= 4; -#else - s7 = vld1_s16(s); - s += (im_stride); - - __builtin_prefetch(d + 0 * dst_stride); - __builtin_prefetch(d_u8 + 0 * dst8_stride); - - d0 = convolve8_4x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter, - round_shift_vec, offset_const); - - if (do_average) { - res4 = vld1_u16(d); - d += (dst_stride); - - compute_avg_4x1(res4, d0, fwd_offset, bck_offset, sub_const_vec, - round_bits, use_jnt_comp_avg, &t0); - - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0), 0); - d_u8 += dst8_stride; - - } else { - vst1_u16(d, d0); - d += (dst_stride); - } - s0 = s1; - s1 = s2; - s2 = s3; - s3 = s4; - s4 = s5; - s5 = s6; - s6 = s7; - height--; -#endif - } while (height > 0); - src_ptr += 4; - dst_ptr += 4; - dst_u8_ptr += 4; - w -= 4; - } while (w > 0); -} - -void av1_jnt_convolve_2d_neon(const uint8_t *src, int src_stride, uint8_t *dst8, - int dst8_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - assert(!(w % 4)); - assert(!(h % 4)); - - DECLARE_ALIGNED(16, int16_t, - im_block[(MAX_SB_SIZE + HORIZ_EXTRA_ROWS) * MAX_SB_SIZE]); - - const int im_h = h + filter_params_y->taps - 1; - const int im_stride = MAX_SB_SIZE; - const int vert_offset = filter_params_y->taps / 2 - 1; - const int horiz_offset = filter_params_x->taps / 2 - 1; - const int round_0 = conv_params->round_0 - 1; - const uint8_t *src_ptr = src - vert_offset * src_stride - horiz_offset; - const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( - filter_params_x, subpel_x_q4 & SUBPEL_MASK); - const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( - filter_params_y, subpel_y_q4 & SUBPEL_MASK); - - int16_t x_filter_tmp[8]; - int16x8_t filter_x_coef = vld1q_s16(x_filter); - - // filter coeffs are even, so downshifting by 1 to reduce intermediate - // precision requirements. - filter_x_coef = vshrq_n_s16(filter_x_coef, 1); - vst1q_s16(&x_filter_tmp[0], filter_x_coef); - - jnt_convolve_2d_horiz_neon(src_ptr, src_stride, im_block, im_stride, - x_filter_tmp, im_h, w, round_0); - - jnt_convolve_2d_vert_neon(im_block, im_stride, dst8, dst8_stride, conv_params, - y_filter, h, w); -} - -void av1_jnt_convolve_2d_copy_neon(const uint8_t *src, int src_stride, - uint8_t *dst8, int dst8_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - uint8x8_t res0_8, res1_8, res2_8, res3_8, tmp_shift0, tmp_shift1, tmp_shift2, - tmp_shift3; - uint16x8_t res_q0, res_q1, res_q2, res_q3, tmp_q0, tmp_q1, tmp_q2, tmp_q3; - uint16x4_t tmp4, tmp5, tmp6, tmp7, res4, res5, res6, res7; - const uint8_t *src1, *src2; - uint8_t *dst8_1; - CONV_BUF_TYPE *dst = conv_params->dst, *dst_1, *dst_2; - const int dst_stride = conv_params->dst_stride; - int x, y; - const int16_t bits = - FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0; - const int bd = 8; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - const int round_offset = (1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1)); - const int16x4_t sub_const_vec = vdup_n_s16((int16_t)round_offset); - const uint16x8_t dup_round_offset16x8 = vdupq_n_u16((uint16_t)round_offset); - const int16x4_t dup_bits16x4 = vdup_n_s16(bits); - const int16x8_t dup_bits16x8 = vdupq_n_s16(bits); - - (void)filter_params_x; - (void)filter_params_y; - (void)subpel_x_q4; - (void)subpel_y_q4; - - if (!(w & 0x07)) { - for (y = 0; y < (h >> 2); ++y) { - src1 = src; - dst8_1 = dst8; - dst_1 = dst; - for (x = 0; x < (w >> 3); ++x) { - src2 = src1; - load_u8_8x4(src2, src_stride, &res0_8, &res1_8, &res2_8, &res3_8); - - res_q0 = vaddq_u16(vshlq_u16(vmovl_u8(res0_8), dup_bits16x8), - dup_round_offset16x8); - res_q1 = vaddq_u16(vshlq_u16(vmovl_u8(res1_8), dup_bits16x8), - dup_round_offset16x8); - res_q2 = vaddq_u16(vshlq_u16(vmovl_u8(res2_8), dup_bits16x8), - dup_round_offset16x8); - res_q3 = vaddq_u16(vshlq_u16(vmovl_u8(res3_8), dup_bits16x8), - dup_round_offset16x8); - - if (conv_params->do_average) { - dst_2 = dst_1; - load_u16_8x4(dst_2, dst_stride, &tmp_q0, &tmp_q1, &tmp_q2, &tmp_q3); - - compute_avg_8x4(tmp_q0, tmp_q1, tmp_q2, tmp_q3, res_q0, res_q1, - res_q2, res_q3, conv_params->fwd_offset, - conv_params->bck_offset, sub_const_vec, bits, - conv_params->use_jnt_comp_avg, &tmp_shift0, - &tmp_shift1, &tmp_shift2, &tmp_shift3); - - vst1_u8(dst8_1 + (0 * dst8_stride), tmp_shift0); - vst1_u8(dst8_1 + (1 * dst8_stride), tmp_shift1); - vst1_u8(dst8_1 + (2 * dst8_stride), tmp_shift2); - vst1_u8(dst8_1 + (3 * dst8_stride), tmp_shift3); - - } else { - vst1q_u16(dst_1 + (0 * dst_stride), res_q0); - vst1q_u16(dst_1 + (1 * dst_stride), res_q1); - vst1q_u16(dst_1 + (2 * dst_stride), res_q2); - vst1q_u16(dst_1 + (3 * dst_stride), res_q3); - } - src1 = src1 + 8; - dst_1 = dst_1 + 8; - dst8_1 = dst8_1 + 8; - } - src += src_stride * 4; - dst8 += dst8_stride * 4; - dst += dst_stride * 4; - } - } else if (!(w & 0x03)) { - for (y = 0; y < (h >> 2); ++y) { - src1 = src; - dst8_1 = dst8; - dst_1 = dst; - - load_u8_8x4(src1, src_stride, &res0_8, &res1_8, &res2_8, &res3_8); - - res4 = vadd_u16(vshl_u16(vget_low_u16(vmovl_u8(res0_8)), dup_bits16x4), - vreinterpret_u16_s16(sub_const_vec)); - res5 = vadd_u16(vshl_u16(vget_low_u16(vmovl_u8(res1_8)), dup_bits16x4), - vreinterpret_u16_s16(sub_const_vec)); - res6 = vadd_u16(vshl_u16(vget_low_u16(vmovl_u8(res2_8)), dup_bits16x4), - vreinterpret_u16_s16(sub_const_vec)); - res7 = vadd_u16(vshl_u16(vget_low_u16(vmovl_u8(res3_8)), dup_bits16x4), - vreinterpret_u16_s16(sub_const_vec)); - if (conv_params->do_average) { - load_u16_4x4(dst_1, dst_stride, &tmp4, &tmp5, &tmp6, &tmp7); - - compute_avg_4x4(tmp4, tmp5, tmp6, tmp7, res4, res5, res6, res7, - conv_params->fwd_offset, conv_params->bck_offset, - sub_const_vec, bits, conv_params->use_jnt_comp_avg, - &tmp_shift0, &tmp_shift1); - - vst1_lane_u32((uint32_t *)(dst8_1), vreinterpret_u32_u8(tmp_shift0), 0); - dst8_1 += dst8_stride; - vst1_lane_u32((uint32_t *)(dst8_1), vreinterpret_u32_u8(tmp_shift0), 1); - dst8_1 += dst8_stride; - vst1_lane_u32((uint32_t *)(dst8_1), vreinterpret_u32_u8(tmp_shift1), 0); - dst8_1 += dst8_stride; - vst1_lane_u32((uint32_t *)(dst8_1), vreinterpret_u32_u8(tmp_shift1), 1); - - } else { - vst1_u16(dst_1, res4); - dst_1 += dst_stride; - vst1_u16(dst_1, res5); - dst_1 += dst_stride; - vst1_u16(dst_1, res6); - dst_1 += dst_stride; - vst1_u16(dst_1, res7); - } - src += src_stride * 4; - dst += dst_stride * 4; - dst8 += dst8_stride * 4; - } - } -} - -void av1_jnt_convolve_x_neon(const uint8_t *src, int src_stride, uint8_t *dst8, - int dst8_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - assert(!(w % 4)); - assert(!(h % 4)); - - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - const int horiz_offset = filter_params_x->taps / 2 - 1; - const int bits = FILTER_BITS - conv_params->round_1; - const int bd = 8; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - const int round_offset = (1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1)); - const int round_bits = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const uint16_t fwd_offset = conv_params->fwd_offset; - const uint16_t bck_offset = conv_params->bck_offset; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - - (void)filter_params_y; - (void)subpel_y_q4; - - // horizontal filter - const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( - filter_params_x, subpel_x_q4 & SUBPEL_MASK); - - const uint8_t *src_ptr = src - horiz_offset; - - int16_t x_filter_tmp[8]; - int16x8_t filter_x_coef = vld1q_s16(x_filter); - - // filter coeffs are even, so downshifting by 1 to reduce intermediate - // precision requirements. - filter_x_coef = vshrq_n_s16(filter_x_coef, 1); - vst1q_s16(&x_filter_tmp[0], filter_x_coef); - - const uint8_t *s; - uint8_t *d_u8; - uint8_t *dst_u8_ptr; - CONV_BUF_TYPE *d, *dst_ptr; - int width, height; - uint8x8_t t0; -#if defined(__aarch64__) - uint8x8_t t1, t2, t3, t4, t5, t6, t7; -#endif - s = src_ptr; - dst_ptr = dst; - dst_u8_ptr = dst8; - width = w; - height = h; - - if ((w == 4) || (h == 4)) { - int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, d0; - int16x8_t tt0; - uint16x4_t res4; -#if defined(__aarch64__) - int16x4_t s8, s9, s10, d1, d2, d3; - int16x8_t tt1, tt2, tt3; - uint16x4_t res5, res6, res7; - uint32x2_t tu0 = vdup_n_u32(0), tu1 = vdup_n_u32(0); - int16x8_t u0, u1; -#else - int16x4_t temp_0; -#endif - const int16x4_t zero = vdup_n_s16(0); - const int16x4_t round_offset_vec = vdup_n_s16(round_offset); - const int16x4_t shift_round_0 = vdup_n_s16(-conv_params->round_0 + 1); - const int16x4_t horiz_const = vdup_n_s16(bits); - do { - s = src_ptr; - d = dst_ptr; - d_u8 = dst_u8_ptr; - width = w; - __builtin_prefetch(s + 0 * src_stride); -#if defined(__aarch64__) - __builtin_prefetch(s + 1 * src_stride); - __builtin_prefetch(s + 2 * src_stride); - __builtin_prefetch(s + 3 * src_stride); - - load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3); - transpose_u8_8x4(&t0, &t1, &t2, &t3); - tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); - tt1 = vreinterpretq_s16_u16(vmovl_u8(t1)); - tt2 = vreinterpretq_s16_u16(vmovl_u8(t2)); - tt3 = vreinterpretq_s16_u16(vmovl_u8(t3)); - s0 = vget_low_s16(tt0); - s1 = vget_low_s16(tt1); - s2 = vget_low_s16(tt2); - s3 = vget_low_s16(tt3); - s4 = vget_high_s16(tt0); - s5 = vget_high_s16(tt1); - s6 = vget_high_s16(tt2); - __builtin_prefetch(d + 0 * dst_stride); - __builtin_prefetch(d + 1 * dst_stride); - __builtin_prefetch(d + 2 * dst_stride); - __builtin_prefetch(d + 3 * dst_stride); - s += 7; - do { - load_unaligned_u8_4x4(s, src_stride, &tu0, &tu1); - t0 = vreinterpret_u8_u32(tu0); - t1 = vreinterpret_u8_u32(tu1); - - transpose_u8_4x4(&t0, &t1); - u0 = vreinterpretq_s16_u16(vmovl_u8(t0)); - u1 = vreinterpretq_s16_u16(vmovl_u8(t1)); - - s7 = vget_low_s16(u0); - s8 = vget_low_s16(u1); - s9 = vget_high_s16(u0); - s10 = vget_high_s16(u1); - - d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp, - zero, shift_round_0); - d0 = vrshl_s16(d0, horiz_const); - d0 = vadd_s16(d0, round_offset_vec); - d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp, - zero, shift_round_0); - d1 = vrshl_s16(d1, horiz_const); - d1 = vadd_s16(d1, round_offset_vec); - d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp, - zero, shift_round_0); - d2 = vrshl_s16(d2, horiz_const); - d2 = vadd_s16(d2, round_offset_vec); - d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp, - zero, shift_round_0); - d3 = vrshl_s16(d3, horiz_const); - d3 = vadd_s16(d3, round_offset_vec); - - transpose_s16_4x4d(&d0, &d1, &d2, &d3); - - if (conv_params->do_average) { - __builtin_prefetch(d + 0 * dst_stride); - __builtin_prefetch(d + 1 * dst_stride); - __builtin_prefetch(d + 2 * dst_stride); - __builtin_prefetch(d + 3 * dst_stride); - - __builtin_prefetch(d_u8 + 0 * dst8_stride); - __builtin_prefetch(d_u8 + 1 * dst8_stride); - __builtin_prefetch(d_u8 + 2 * dst8_stride); - __builtin_prefetch(d_u8 + 3 * dst8_stride); - - load_u16_4x4(d, dst_stride, &res4, &res5, &res6, &res7); - - compute_avg_4x4(res4, res5, res6, res7, vreinterpret_u16_s16(d0), - vreinterpret_u16_s16(d1), vreinterpret_u16_s16(d2), - vreinterpret_u16_s16(d3), fwd_offset, bck_offset, - round_offset_vec, round_bits, use_jnt_comp_avg, &t0, - &t1); - - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0), - 0); // 00 01 02 03 - vst1_lane_u32((uint32_t *)(d_u8 + dst8_stride), - vreinterpret_u32_u8(t0), - 1); // 10 11 12 13 - vst1_lane_u32((uint32_t *)(d_u8 + 2 * dst8_stride), - vreinterpret_u32_u8(t1), - 0); // 20 21 22 23 - vst1_lane_u32((uint32_t *)(d_u8 + 3 * dst8_stride), - vreinterpret_u32_u8(t1), - 1); // 30 31 32 33 - } else { - store_u16_4x4(d, dst_stride, vreinterpret_u16_s16(d0), - vreinterpret_u16_s16(d1), vreinterpret_u16_s16(d2), - vreinterpret_u16_s16(d3)); - } - - s0 = s4; - s1 = s5; - s2 = s6; - s3 = s7; - s4 = s8; - s5 = s9; - s6 = s10; - - s += 4; - width -= 4; - d += 4; - d_u8 += 4; - } while (width > 0); - src_ptr += (src_stride << 2); - dst_ptr += (dst_stride << 2); - dst_u8_ptr += (dst8_stride << 2); - height -= 4; -#else - t0 = vld1_u8(s); // a0 a1 a2 a3 a4 a5 a6 a7 - tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); // a0 a1 a2 a3 a4 a5 a6 a7 - s0 = vget_low_s16(tt0); // a0 a1 a2 a3 - s4 = vget_high_s16(tt0); // a4 a5 a6 a7 - __builtin_prefetch(d); - - s += 8; - do { - t0 = vld1_u8(s); // a8 a9 a10 a11 - - // a8 a9 a10 a11 - s7 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0))); - temp_0 = s7; - s1 = vext_s16(s0, s4, 1); // a1 a2 a3 a4 - s2 = vext_s16(s0, s4, 2); // a2 a3 a4 a5 - s3 = vext_s16(s0, s4, 3); // a3 a4 a5 a6 - s5 = vext_s16(s4, s7, 1); // a5 a6 a7 a8 - s6 = vext_s16(s4, s7, 2); // a6 a7 a8 a9 - s7 = vext_s16(s4, s7, 3); // a7 a8 a9 a10 - - d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp, - zero, shift_round_0); - d0 = vrshl_s16(d0, horiz_const); - d0 = vadd_s16(d0, round_offset_vec); - s0 = s4; - s4 = temp_0; - if (conv_params->do_average) { - __builtin_prefetch(d); - __builtin_prefetch(d_u8); - - res4 = vld1_u16(d); - - compute_avg_4x1(res4, vreinterpret_u16_s16(d0), fwd_offset, - bck_offset, round_offset_vec, round_bits, - use_jnt_comp_avg, &t0); - - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0), - 0); // 00 01 02 03 - } else { - vst1_u16(d, vreinterpret_u16_s16(d0)); - } - - s += 4; - width -= 4; - d += 4; - d_u8 += 4; - } while (width > 0); - src_ptr += (src_stride); - dst_ptr += (dst_stride); - dst_u8_ptr += (dst8_stride); - height--; -#endif - } while (height > 0); - } else { - CONV_BUF_TYPE *d_tmp; - uint8_t *d_u8_tmp; - int16x8_t s0, s1, s2, s3, s4, s5, s6, s7; - int16x8_t res0; - uint16x8_t res8; - const int16x8_t round_offset128 = vdupq_n_s16(round_offset); - const int16x4_t round_offset64 = vdup_n_s16(round_offset); - const int16x8_t shift_round_0 = vdupq_n_s16(-conv_params->round_0 + 1); - const int16x8_t horiz_const = vdupq_n_s16(bits); - const int16x8_t zero = vdupq_n_s16(0); - - d = dst_ptr = dst; - d_u8 = dst_u8_ptr = dst8; - do { -#if defined(__aarch64__) - int16x8_t s11, s12, s13, s14; - int16x8_t s8, s9, s10; - int16x8_t res1, res2, res3, res4, res5, res6, res7; - uint16x8_t res9, res10, res11; - __builtin_prefetch(src_ptr + 0 * src_stride); - __builtin_prefetch(src_ptr + 1 * src_stride); - __builtin_prefetch(src_ptr + 2 * src_stride); - __builtin_prefetch(src_ptr + 3 * src_stride); - __builtin_prefetch(src_ptr + 4 * src_stride); - __builtin_prefetch(src_ptr + 5 * src_stride); - __builtin_prefetch(src_ptr + 6 * src_stride); - __builtin_prefetch(src_ptr + 7 * src_stride); - load_u8_8x8(src_ptr, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); - s1 = vreinterpretq_s16_u16(vmovl_u8(t1)); - s2 = vreinterpretq_s16_u16(vmovl_u8(t2)); - s3 = vreinterpretq_s16_u16(vmovl_u8(t3)); - s4 = vreinterpretq_s16_u16(vmovl_u8(t4)); - s5 = vreinterpretq_s16_u16(vmovl_u8(t5)); - s6 = vreinterpretq_s16_u16(vmovl_u8(t6)); - - width = w; - s = src_ptr + 7; - d = dst_ptr; - d_u8_tmp = dst_u8_ptr; - - __builtin_prefetch(dst_ptr + 0 * dst_stride); - __builtin_prefetch(dst_ptr + 1 * dst_stride); - __builtin_prefetch(dst_ptr + 2 * dst_stride); - __builtin_prefetch(dst_ptr + 3 * dst_stride); - __builtin_prefetch(dst_ptr + 4 * dst_stride); - __builtin_prefetch(dst_ptr + 5 * dst_stride); - __builtin_prefetch(dst_ptr + 6 * dst_stride); - __builtin_prefetch(dst_ptr + 7 * dst_stride); - - do { - d_u8 = d_u8_tmp; - d_tmp = d; - - load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - s7 = vreinterpretq_s16_u16(vmovl_u8(t0)); - s8 = vreinterpretq_s16_u16(vmovl_u8(t1)); - s9 = vreinterpretq_s16_u16(vmovl_u8(t2)); - s10 = vreinterpretq_s16_u16(vmovl_u8(t3)); - s11 = vreinterpretq_s16_u16(vmovl_u8(t4)); - s12 = vreinterpretq_s16_u16(vmovl_u8(t5)); - s13 = vreinterpretq_s16_u16(vmovl_u8(t6)); - s14 = vreinterpretq_s16_u16(vmovl_u8(t7)); - - res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp, - zero, shift_round_0); - - res0 = vrshlq_s16(res0, horiz_const); - res0 = vaddq_s16(res0, round_offset128); - - res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp, - zero, shift_round_0); - res1 = vrshlq_s16(res1, horiz_const); - res1 = vaddq_s16(res1, round_offset128); - res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp, - zero, shift_round_0); - res2 = vrshlq_s16(res2, horiz_const); - res2 = vaddq_s16(res2, round_offset128); - res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp, - zero, shift_round_0); - res3 = vrshlq_s16(res3, horiz_const); - res3 = vaddq_s16(res3, round_offset128); - res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, x_filter_tmp, - zero, shift_round_0); - res4 = vrshlq_s16(res4, horiz_const); - res4 = vaddq_s16(res4, round_offset128); - res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12, - x_filter_tmp, zero, shift_round_0); - res5 = vrshlq_s16(res5, horiz_const); - res5 = vaddq_s16(res5, round_offset128); - res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13, - x_filter_tmp, zero, shift_round_0); - res6 = vrshlq_s16(res6, horiz_const); - res6 = vaddq_s16(res6, round_offset128); - res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14, - x_filter_tmp, zero, shift_round_0); - res7 = vrshlq_s16(res7, horiz_const); - res7 = vaddq_s16(res7, round_offset128); - - transpose_s16_8x8(&res0, &res1, &res2, &res3, &res4, &res5, &res6, - &res7); - - if (conv_params->do_average) { - load_u16_8x4(d_tmp, dst_stride, &res8, &res9, &res10, &res11); - d_tmp += (dst_stride << 2); - - compute_avg_8x4( - res8, res9, res10, res11, vreinterpretq_u16_s16(res0), - vreinterpretq_u16_s16(res1), vreinterpretq_u16_s16(res2), - vreinterpretq_u16_s16(res3), fwd_offset, bck_offset, - round_offset64, round_bits, use_jnt_comp_avg, &t0, &t1, &t2, &t3); - - store_u8_8x4(d_u8, dst8_stride, t0, t1, t2, t3); - d_u8 += (dst8_stride << 2); - - load_u16_8x4(d_tmp, dst_stride, &res8, &res9, &res10, &res11); - d_tmp += (dst_stride << 2); - - compute_avg_8x4( - res8, res9, res10, res11, vreinterpretq_u16_s16(res4), - vreinterpretq_u16_s16(res5), vreinterpretq_u16_s16(res6), - vreinterpretq_u16_s16(res7), fwd_offset, bck_offset, - round_offset64, round_bits, use_jnt_comp_avg, &t0, &t1, &t2, &t3); - - store_u8_8x4(d_u8, dst8_stride, t0, t1, t2, t3); - d_u8 += (dst8_stride << 2); - } else { - store_u16_8x8( - d_tmp, dst_stride, vreinterpretq_u16_s16(res0), - vreinterpretq_u16_s16(res1), vreinterpretq_u16_s16(res2), - vreinterpretq_u16_s16(res3), vreinterpretq_u16_s16(res4), - vreinterpretq_u16_s16(res5), vreinterpretq_u16_s16(res6), - vreinterpretq_u16_s16(res7)); - d_tmp += (dst_stride << 3); - } - - s0 = s8; - s1 = s9; - s2 = s10; - s3 = s11; - s4 = s12; - s5 = s13; - s6 = s14; - s += 8; - d += 8; - width -= 8; - d_u8_tmp += 8; - } while (width > 0); - src_ptr += 8 * src_stride; - dst_ptr += 8 * dst_stride; - dst_u8_ptr += 8 * dst8_stride; - height -= 8; -#else - int16x8_t temp_0; - __builtin_prefetch(src_ptr); - t0 = vld1_u8(src_ptr); - s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); // a0 a1 a2 a3 a4 a5 a6 a7 - - width = w; - s = src_ptr + 8; - d = dst_ptr; - d_u8_tmp = dst_u8_ptr; - - __builtin_prefetch(dst_ptr); - - do { - d_u8 = d_u8_tmp; - d_tmp = d; - - t0 = vld1_u8(s); // a8 a9 a10 a11 a12 a13 a14 a15 - s7 = vreinterpretq_s16_u16(vmovl_u8(t0)); - temp_0 = s0; - s0 = s7; - - s1 = vextq_s16(temp_0, s7, 1); // a1 a2 a3 a4 a5 a6 a7 a8 - s2 = vextq_s16(temp_0, s7, 2); // a2 a3 a4 a5 a6 a7 a8 a9 - s3 = vextq_s16(temp_0, s7, 3); // a3 a4 a5 a6 a7 a8 a9 a10 - s4 = vextq_s16(temp_0, s7, 4); // a4 a5 a6 a7 a8 a9 a10 a11 - s5 = vextq_s16(temp_0, s7, 5); // a5 a6 a7 a8 a9 a10 a11 a12 - s6 = vextq_s16(temp_0, s7, 6); // a6 a7 a8 a9 a10 a11 a12 a13 - s7 = vextq_s16(temp_0, s7, 7); // a7 a8 a9 a10 a11 a12 a13 a14 - - res0 = convolve8_8x8_s16(temp_0, s1, s2, s3, s4, s5, s6, s7, - x_filter_tmp, zero, shift_round_0); - - res0 = vrshlq_s16(res0, horiz_const); - res0 = vaddq_s16(res0, round_offset128); - - if (conv_params->do_average) { - res8 = vld1q_u16(d_tmp); - d_tmp += (dst_stride); - - compute_avg_8x1(res8, vreinterpretq_u16_s16(res0), fwd_offset, - bck_offset, round_offset64, round_bits, - use_jnt_comp_avg, &t0); - - vst1_u8(d_u8, t0); - d_u8 += (dst8_stride); - } else { - vst1q_u16(d_tmp, vreinterpretq_u16_s16(res0)); - d_tmp += (dst_stride); - } - - s += 8; - d += 8; - width -= 8; - d_u8_tmp += 8; - } while (width > 0); - src_ptr += src_stride; - dst_ptr += dst_stride; - dst_u8_ptr += dst8_stride; - height--; -#endif - } while (height > 0); - } -} - -void av1_jnt_convolve_y_neon(const uint8_t *src, int src_stride, uint8_t *dst8, - int dst8_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - assert(!(w % 4)); - assert(!(h % 4)); - - CONV_BUF_TYPE *dst = conv_params->dst; - const int dst_stride = conv_params->dst_stride; - const int vert_offset = filter_params_y->taps / 2 - 1; - const int bits = FILTER_BITS - conv_params->round_0; - const int bd = 8; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - const int round_offset = (1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1)); - const int round_bits = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const uint16_t fwd_offset = conv_params->fwd_offset; - const uint16_t bck_offset = conv_params->bck_offset; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - const int shift_value = (conv_params->round_1 - 1 - bits); - - (void)filter_params_x; - (void)subpel_x_q4; - - // vertical filter - const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( - filter_params_y, subpel_y_q4 & SUBPEL_MASK); - - const uint8_t *src_ptr = src - (vert_offset * src_stride); - - int16_t y_filter_tmp[8]; - int16x8_t filter_y_coef = vld1q_s16(y_filter); - - // filter coeffs are even, so downshifting by 1 to reduce intermediate - // precision requirements. - filter_y_coef = vshrq_n_s16(filter_y_coef, 1); - vst1q_s16(&y_filter_tmp[0], filter_y_coef); - - const uint8_t *s; - uint8_t *d_u8; - uint8_t *dst_u8_ptr; - CONV_BUF_TYPE *d, *dst_ptr; - int width, height; - - s = src_ptr; - dst_ptr = dst; - dst_u8_ptr = dst8; - width = w; - height = h; - - // used to get rid of multiplication = (vertical filter output sum) * - // (1<<bits). - assert((conv_params->round_1 - 2) >= bits); - - if ((w == 4) || (h == 4)) { - int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, d0; - uint16x4_t res4; - uint32x2_t tu0 = vdup_n_u32(0), tu1 = vdup_n_u32(0), tu2 = vdup_n_u32(0), - tu3 = vdup_n_u32(0); - int16x8_t u0, u1, u2, u3; - uint8x8_t t0; - -#if defined(__aarch64__) - int16x4_t s8, s9, s10, d1, d2, d3; - uint16x4_t res5, res6, res7; - uint8x8_t t1; -#endif - const int16x4_t round_offset64 = vdup_n_s16(round_offset); - const int16x4_t shift_vec = vdup_n_s16(-shift_value); - const int16x4_t zero = vdup_n_s16(0); - - do { - s = src_ptr; - d = dst_ptr; - d_u8 = dst_u8_ptr; - height = h; - __builtin_prefetch(s + 0 * src_stride); - __builtin_prefetch(s + 1 * src_stride); - __builtin_prefetch(s + 2 * src_stride); - __builtin_prefetch(s + 3 * src_stride); - - load_unaligned_u8_4x8(s, src_stride, &tu0, &tu1, &tu2, &tu3); - - u0 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu0))); - u1 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu1))); - u2 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu2))); - u3 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu3))); - - s0 = vget_low_s16(u0); - s1 = vget_high_s16(u0); - s2 = vget_low_s16(u1); - s3 = vget_high_s16(u1); - s4 = vget_low_s16(u2); - s5 = vget_high_s16(u2); - s6 = vget_low_s16(u3); - - __builtin_prefetch(d + 0 * dst_stride); - __builtin_prefetch(d + 1 * dst_stride); - __builtin_prefetch(d + 2 * dst_stride); - __builtin_prefetch(d + 3 * dst_stride); - - s += (7 * src_stride); - do { -#if defined(__aarch64__) - load_unaligned_u8_4x4(s, src_stride, &tu0, &tu1); - - u0 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu0))); - u1 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu1))); - - s7 = vget_low_s16(u0); - s8 = vget_high_s16(u0); - s9 = vget_low_s16(u1); - s10 = vget_high_s16(u1); - - d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter_tmp, - zero, shift_vec); - d0 = vadd_s16(d0, round_offset64); - d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, y_filter_tmp, - zero, shift_vec); - d1 = vadd_s16(d1, round_offset64); - d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, y_filter_tmp, - zero, shift_vec); - d2 = vadd_s16(d2, round_offset64); - d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, y_filter_tmp, - zero, shift_vec); - d3 = vadd_s16(d3, round_offset64); - - if (conv_params->do_average) { - __builtin_prefetch(d + 0 * dst_stride); - __builtin_prefetch(d + 1 * dst_stride); - __builtin_prefetch(d + 2 * dst_stride); - __builtin_prefetch(d + 3 * dst_stride); - - __builtin_prefetch(d_u8 + 0 * dst8_stride); - __builtin_prefetch(d_u8 + 1 * dst8_stride); - __builtin_prefetch(d_u8 + 2 * dst8_stride); - __builtin_prefetch(d_u8 + 3 * dst8_stride); - - load_u16_4x4(d, dst_stride, &res4, &res5, &res6, &res7); - d += (dst_stride << 2); - - compute_avg_4x4(res4, res5, res6, res7, vreinterpret_u16_s16(d0), - vreinterpret_u16_s16(d1), vreinterpret_u16_s16(d2), - vreinterpret_u16_s16(d3), fwd_offset, bck_offset, - round_offset64, round_bits, use_jnt_comp_avg, &t0, - &t1); - - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0), 0); - d_u8 += dst8_stride; - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0), 1); - d_u8 += dst8_stride; - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t1), 0); - d_u8 += dst8_stride; - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t1), 1); - d_u8 += dst8_stride; - } else { - store_u16_4x4(d, dst_stride, vreinterpret_u16_s16(d0), - vreinterpret_u16_s16(d1), vreinterpret_u16_s16(d2), - vreinterpret_u16_s16(d3)); - d += (dst_stride << 2); - } - - s0 = s4; - s1 = s5; - s2 = s6; - s3 = s7; - s4 = s8; - s5 = s9; - s6 = s10; - - s += (src_stride << 2); - height -= 4; -#else - load_unaligned_u8_4x1(s, src_stride, &tu0); - u0 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu0))); - s7 = vget_low_s16(u0); - - d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter_tmp, - zero, shift_vec); - - d0 = vadd_s16(d0, round_offset64); - - if (conv_params->do_average) { - __builtin_prefetch(d); - - res4 = vld1_u16(d); - d += (dst_stride); - - compute_avg_4x1(res4, vreinterpret_u16_s16(d0), fwd_offset, - bck_offset, round_offset64, round_bits, - use_jnt_comp_avg, &t0); - - vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0), 0); - d_u8 += dst8_stride; - } else { - vst1_u16(d, vreinterpret_u16_s16(d0)); - d += (dst_stride); - } - - s0 = s1; - s1 = s2; - s2 = s3; - s3 = s4; - s4 = s5; - s5 = s6; - s6 = s7; - - s += (src_stride); - height--; -#endif - } while (height > 0); - src_ptr += 4; - dst_ptr += 4; - dst_u8_ptr += 4; - width -= 4; - } while (width > 0); - } else { - CONV_BUF_TYPE *d_tmp; - int16x8_t s0, s1, s2, s3, s4, s5, s6, s7; - int16x8_t res0; - uint16x8_t res8; - uint8x8_t t0, t1, t2, t3, t4, t5, t6, t7; - const int16x8_t round_offset128 = vdupq_n_s16(round_offset); - const int16x8_t shift_vec = vdupq_n_s16(-shift_value); - const int16x4_t round_offset64 = vdup_n_s16(round_offset); - const int16x8_t zero = vdupq_n_s16(0); -#if defined(__aarch64__) - int16x8_t s8, s9, s10, s11, s12, s13, s14; - int16x8_t res1, res2, res3, res4, res5, res6, res7; - uint16x8_t res10, res11, res9; -#endif - dst_ptr = dst; - dst_u8_ptr = dst8; - do { - __builtin_prefetch(src_ptr + 0 * src_stride); - __builtin_prefetch(src_ptr + 1 * src_stride); - __builtin_prefetch(src_ptr + 2 * src_stride); - __builtin_prefetch(src_ptr + 3 * src_stride); - __builtin_prefetch(src_ptr + 4 * src_stride); - __builtin_prefetch(src_ptr + 5 * src_stride); - __builtin_prefetch(src_ptr + 6 * src_stride); - __builtin_prefetch(src_ptr + 7 * src_stride); - load_u8_8x8(src_ptr, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - - s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); - s1 = vreinterpretq_s16_u16(vmovl_u8(t1)); - s2 = vreinterpretq_s16_u16(vmovl_u8(t2)); - s3 = vreinterpretq_s16_u16(vmovl_u8(t3)); - s4 = vreinterpretq_s16_u16(vmovl_u8(t4)); - s5 = vreinterpretq_s16_u16(vmovl_u8(t5)); - s6 = vreinterpretq_s16_u16(vmovl_u8(t6)); - - height = h; - s = src_ptr + (7 * src_stride); - d_tmp = dst_ptr; - d_u8 = dst_u8_ptr; - - do { -#if defined(__aarch64__) - load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - - s7 = vreinterpretq_s16_u16(vmovl_u8(t0)); - s8 = vreinterpretq_s16_u16(vmovl_u8(t1)); - s9 = vreinterpretq_s16_u16(vmovl_u8(t2)); - s10 = vreinterpretq_s16_u16(vmovl_u8(t3)); - s11 = vreinterpretq_s16_u16(vmovl_u8(t4)); - s12 = vreinterpretq_s16_u16(vmovl_u8(t5)); - s13 = vreinterpretq_s16_u16(vmovl_u8(t6)); - s14 = vreinterpretq_s16_u16(vmovl_u8(t7)); - - __builtin_prefetch(dst_ptr + 0 * dst_stride); - __builtin_prefetch(dst_ptr + 1 * dst_stride); - __builtin_prefetch(dst_ptr + 2 * dst_stride); - __builtin_prefetch(dst_ptr + 3 * dst_stride); - - res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter_tmp, - zero, shift_vec); - res0 = vaddq_s16(res0, round_offset128); - res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, y_filter_tmp, - zero, shift_vec); - res1 = vaddq_s16(res1, round_offset128); - res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, y_filter_tmp, - zero, shift_vec); - res2 = vaddq_s16(res2, round_offset128); - res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, y_filter_tmp, - zero, shift_vec); - res3 = vaddq_s16(res3, round_offset128); - res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, y_filter_tmp, - zero, shift_vec); - res4 = vaddq_s16(res4, round_offset128); - res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12, - y_filter_tmp, zero, shift_vec); - res5 = vaddq_s16(res5, round_offset128); - res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13, - y_filter_tmp, zero, shift_vec); - res6 = vaddq_s16(res6, round_offset128); - res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14, - y_filter_tmp, zero, shift_vec); - res7 = vaddq_s16(res7, round_offset128); - - if (conv_params->do_average) { - __builtin_prefetch(d_tmp + 0 * dst8_stride); - __builtin_prefetch(d_tmp + 1 * dst8_stride); - __builtin_prefetch(d_tmp + 2 * dst8_stride); - __builtin_prefetch(d_tmp + 3 * dst8_stride); - - load_u16_8x4(d_tmp, dst_stride, &res8, &res9, &res10, &res11); - d_tmp += (dst_stride << 2); - - compute_avg_8x4( - res8, res9, res10, res11, vreinterpretq_u16_s16(res0), - vreinterpretq_u16_s16(res1), vreinterpretq_u16_s16(res2), - vreinterpretq_u16_s16(res3), fwd_offset, bck_offset, - round_offset64, round_bits, use_jnt_comp_avg, &t0, &t1, &t2, &t3); - - store_u8_8x4(d_u8, dst8_stride, t0, t1, t2, t3); - d_u8 += (dst8_stride << 2); - - load_u16_8x4(d_tmp, dst_stride, &res8, &res9, &res10, &res11); - d_tmp += (dst_stride << 2); - - compute_avg_8x4( - res8, res9, res10, res11, vreinterpretq_u16_s16(res4), - vreinterpretq_u16_s16(res5), vreinterpretq_u16_s16(res6), - vreinterpretq_u16_s16(res7), fwd_offset, bck_offset, - round_offset64, round_bits, use_jnt_comp_avg, &t0, &t1, &t2, &t3); - - store_u8_8x4(d_u8, dst8_stride, t0, t1, t2, t3); - d_u8 += (dst8_stride << 2); - } else { - store_u16_8x8( - d_tmp, dst_stride, vreinterpretq_u16_s16(res0), - vreinterpretq_u16_s16(res1), vreinterpretq_u16_s16(res2), - vreinterpretq_u16_s16(res3), vreinterpretq_u16_s16(res4), - vreinterpretq_u16_s16(res5), vreinterpretq_u16_s16(res6), - vreinterpretq_u16_s16(res7)); - d_tmp += (dst_stride << 3); - } - - s0 = s8; - s1 = s9; - s2 = s10; - s3 = s11; - s4 = s12; - s5 = s13; - s6 = s14; - s += (8 * src_stride); - height -= 8; -#else - s7 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); - - __builtin_prefetch(dst_ptr); - - res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter_tmp, - zero, shift_vec); - res0 = vaddq_s16(res0, round_offset128); - - s0 = s1; - s1 = s2; - s2 = s3; - s3 = s4; - s4 = s5; - s5 = s6; - s6 = s7; - - if (conv_params->do_average) { - __builtin_prefetch(d_tmp); - - res8 = vld1q_u16(d_tmp); - d_tmp += (dst_stride); - - compute_avg_8x1(res8, vreinterpretq_u16_s16(res0), fwd_offset, - bck_offset, round_offset64, round_bits, - use_jnt_comp_avg, &t0); - - vst1_u8(d_u8, t0); - d_u8 += (dst8_stride); - } else { - vst1q_u16(d_tmp, vreinterpretq_u16_s16(res0)); - d_tmp += dst_stride; - } - - s += (src_stride); - height--; -#endif - } while (height > 0); - src_ptr += 8; - dst_ptr += 8; - dst_u8_ptr += 8; - width -= 8; - } while (width > 0); - } -} diff --git a/third_party/aom/av1/common/arm/mem_neon.h b/third_party/aom/av1/common/arm/mem_neon.h deleted file mode 100644 index c4ae2e784..000000000 --- a/third_party/aom/av1/common/arm/mem_neon.h +++ /dev/null @@ -1,494 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef AOM_AV1_COMMON_ARM_MEM_NEON_H_ -#define AOM_AV1_COMMON_ARM_MEM_NEON_H_ - -#include <arm_neon.h> -#include <string.h> - -static INLINE void store_row2_u8_8x8(uint8_t *s, int p, const uint8x8_t s0, - const uint8x8_t s1) { - vst1_u8(s, s0); - s += p; - vst1_u8(s, s1); - s += p; -} - -/* These intrinsics require immediate values, so we must use #defines - to enforce that. */ -#define load_u8_4x1(s, s0, lane) \ - do { \ - *(s0) = vreinterpret_u8_u32( \ - vld1_lane_u32((uint32_t *)(s), vreinterpret_u32_u8(*(s0)), lane)); \ - } while (0) - -static INLINE void load_u8_8x8(const uint8_t *s, ptrdiff_t p, - uint8x8_t *const s0, uint8x8_t *const s1, - uint8x8_t *const s2, uint8x8_t *const s3, - uint8x8_t *const s4, uint8x8_t *const s5, - uint8x8_t *const s6, uint8x8_t *const s7) { - *s0 = vld1_u8(s); - s += p; - *s1 = vld1_u8(s); - s += p; - *s2 = vld1_u8(s); - s += p; - *s3 = vld1_u8(s); - s += p; - *s4 = vld1_u8(s); - s += p; - *s5 = vld1_u8(s); - s += p; - *s6 = vld1_u8(s); - s += p; - *s7 = vld1_u8(s); -} - -static INLINE void load_u8_8x16(const uint8_t *s, ptrdiff_t p, - uint8x16_t *const s0, uint8x16_t *const s1, - uint8x16_t *const s2, uint8x16_t *const s3) { - *s0 = vld1q_u8(s); - s += p; - *s1 = vld1q_u8(s); - s += p; - *s2 = vld1q_u8(s); - s += p; - *s3 = vld1q_u8(s); -} - -static INLINE void load_u8_8x4(const uint8_t *s, const ptrdiff_t p, - uint8x8_t *const s0, uint8x8_t *const s1, - uint8x8_t *const s2, uint8x8_t *const s3) { - *s0 = vld1_u8(s); - s += p; - *s1 = vld1_u8(s); - s += p; - *s2 = vld1_u8(s); - s += p; - *s3 = vld1_u8(s); -} - -static INLINE void load_u16_4x4(const uint16_t *s, const ptrdiff_t p, - uint16x4_t *const s0, uint16x4_t *const s1, - uint16x4_t *const s2, uint16x4_t *const s3) { - *s0 = vld1_u16(s); - s += p; - *s1 = vld1_u16(s); - s += p; - *s2 = vld1_u16(s); - s += p; - *s3 = vld1_u16(s); - s += p; -} - -static INLINE void load_u16_8x4(const uint16_t *s, const ptrdiff_t p, - uint16x8_t *const s0, uint16x8_t *const s1, - uint16x8_t *const s2, uint16x8_t *const s3) { - *s0 = vld1q_u16(s); - s += p; - *s1 = vld1q_u16(s); - s += p; - *s2 = vld1q_u16(s); - s += p; - *s3 = vld1q_u16(s); - s += p; -} - -static INLINE void load_s16_4x8(const int16_t *s, ptrdiff_t p, - int16x4_t *const s0, int16x4_t *const s1, - int16x4_t *const s2, int16x4_t *const s3, - int16x4_t *const s4, int16x4_t *const s5, - int16x4_t *const s6, int16x4_t *const s7) { - *s0 = vld1_s16(s); - s += p; - *s1 = vld1_s16(s); - s += p; - *s2 = vld1_s16(s); - s += p; - *s3 = vld1_s16(s); - s += p; - *s4 = vld1_s16(s); - s += p; - *s5 = vld1_s16(s); - s += p; - *s6 = vld1_s16(s); - s += p; - *s7 = vld1_s16(s); -} - -static INLINE void load_s16_4x4(const int16_t *s, ptrdiff_t p, - int16x4_t *const s0, int16x4_t *const s1, - int16x4_t *const s2, int16x4_t *const s3) { - *s0 = vld1_s16(s); - s += p; - *s1 = vld1_s16(s); - s += p; - *s2 = vld1_s16(s); - s += p; - *s3 = vld1_s16(s); -} - -/* These intrinsics require immediate values, so we must use #defines - to enforce that. */ -#define store_u8_4x1(s, s0, lane) \ - do { \ - vst1_lane_u32((uint32_t *)(s), vreinterpret_u32_u8(s0), lane); \ - } while (0) - -static INLINE void store_u8_8x8(uint8_t *s, ptrdiff_t p, const uint8x8_t s0, - const uint8x8_t s1, const uint8x8_t s2, - const uint8x8_t s3, const uint8x8_t s4, - const uint8x8_t s5, const uint8x8_t s6, - const uint8x8_t s7) { - vst1_u8(s, s0); - s += p; - vst1_u8(s, s1); - s += p; - vst1_u8(s, s2); - s += p; - vst1_u8(s, s3); - s += p; - vst1_u8(s, s4); - s += p; - vst1_u8(s, s5); - s += p; - vst1_u8(s, s6); - s += p; - vst1_u8(s, s7); -} - -static INLINE void store_u8_8x4(uint8_t *s, ptrdiff_t p, const uint8x8_t s0, - const uint8x8_t s1, const uint8x8_t s2, - const uint8x8_t s3) { - vst1_u8(s, s0); - s += p; - vst1_u8(s, s1); - s += p; - vst1_u8(s, s2); - s += p; - vst1_u8(s, s3); -} - -static INLINE void store_u8_8x16(uint8_t *s, ptrdiff_t p, const uint8x16_t s0, - const uint8x16_t s1, const uint8x16_t s2, - const uint8x16_t s3) { - vst1q_u8(s, s0); - s += p; - vst1q_u8(s, s1); - s += p; - vst1q_u8(s, s2); - s += p; - vst1q_u8(s, s3); -} - -static INLINE void store_u16_8x8(uint16_t *s, ptrdiff_t dst_stride, - const uint16x8_t s0, const uint16x8_t s1, - const uint16x8_t s2, const uint16x8_t s3, - const uint16x8_t s4, const uint16x8_t s5, - const uint16x8_t s6, const uint16x8_t s7) { - vst1q_u16(s, s0); - s += dst_stride; - vst1q_u16(s, s1); - s += dst_stride; - vst1q_u16(s, s2); - s += dst_stride; - vst1q_u16(s, s3); - s += dst_stride; - vst1q_u16(s, s4); - s += dst_stride; - vst1q_u16(s, s5); - s += dst_stride; - vst1q_u16(s, s6); - s += dst_stride; - vst1q_u16(s, s7); -} - -static INLINE void store_u16_4x4(uint16_t *s, ptrdiff_t dst_stride, - const uint16x4_t s0, const uint16x4_t s1, - const uint16x4_t s2, const uint16x4_t s3) { - vst1_u16(s, s0); - s += dst_stride; - vst1_u16(s, s1); - s += dst_stride; - vst1_u16(s, s2); - s += dst_stride; - vst1_u16(s, s3); -} - -static INLINE void store_u16_8x4(uint16_t *s, ptrdiff_t dst_stride, - const uint16x8_t s0, const uint16x8_t s1, - const uint16x8_t s2, const uint16x8_t s3) { - vst1q_u16(s, s0); - s += dst_stride; - vst1q_u16(s, s1); - s += dst_stride; - vst1q_u16(s, s2); - s += dst_stride; - vst1q_u16(s, s3); -} - -static INLINE void store_s16_8x8(int16_t *s, ptrdiff_t dst_stride, - const int16x8_t s0, const int16x8_t s1, - const int16x8_t s2, const int16x8_t s3, - const int16x8_t s4, const int16x8_t s5, - const int16x8_t s6, const int16x8_t s7) { - vst1q_s16(s, s0); - s += dst_stride; - vst1q_s16(s, s1); - s += dst_stride; - vst1q_s16(s, s2); - s += dst_stride; - vst1q_s16(s, s3); - s += dst_stride; - vst1q_s16(s, s4); - s += dst_stride; - vst1q_s16(s, s5); - s += dst_stride; - vst1q_s16(s, s6); - s += dst_stride; - vst1q_s16(s, s7); -} - -static INLINE void store_s16_4x4(int16_t *s, ptrdiff_t dst_stride, - const int16x4_t s0, const int16x4_t s1, - const int16x4_t s2, const int16x4_t s3) { - vst1_s16(s, s0); - s += dst_stride; - vst1_s16(s, s1); - s += dst_stride; - vst1_s16(s, s2); - s += dst_stride; - vst1_s16(s, s3); -} - -static INLINE void store_s16_8x4(int16_t *s, ptrdiff_t dst_stride, - const int16x8_t s0, const int16x8_t s1, - const int16x8_t s2, const int16x8_t s3) { - vst1q_s16(s, s0); - s += dst_stride; - vst1q_s16(s, s1); - s += dst_stride; - vst1q_s16(s, s2); - s += dst_stride; - vst1q_s16(s, s3); -} - -static INLINE void load_s16_8x8(const int16_t *s, ptrdiff_t p, - int16x8_t *const s0, int16x8_t *const s1, - int16x8_t *const s2, int16x8_t *const s3, - int16x8_t *const s4, int16x8_t *const s5, - int16x8_t *const s6, int16x8_t *const s7) { - *s0 = vld1q_s16(s); - s += p; - *s1 = vld1q_s16(s); - s += p; - *s2 = vld1q_s16(s); - s += p; - *s3 = vld1q_s16(s); - s += p; - *s4 = vld1q_s16(s); - s += p; - *s5 = vld1q_s16(s); - s += p; - *s6 = vld1q_s16(s); - s += p; - *s7 = vld1q_s16(s); -} - -static INLINE void load_s16_8x4(const int16_t *s, ptrdiff_t p, - int16x8_t *const s0, int16x8_t *const s1, - int16x8_t *const s2, int16x8_t *const s3) { - *s0 = vld1q_s16(s); - s += p; - *s1 = vld1q_s16(s); - s += p; - *s2 = vld1q_s16(s); - s += p; - *s3 = vld1q_s16(s); -} - -static INLINE void load_unaligned_u8_4x8(const uint8_t *buf, int stride, - uint32x2_t *tu0, uint32x2_t *tu1, - uint32x2_t *tu2, uint32x2_t *tu3) { - uint32_t a; - - memcpy(&a, buf, 4); - buf += stride; - *tu0 = vset_lane_u32(a, *tu0, 0); - memcpy(&a, buf, 4); - buf += stride; - *tu0 = vset_lane_u32(a, *tu0, 1); - memcpy(&a, buf, 4); - buf += stride; - *tu1 = vset_lane_u32(a, *tu1, 0); - memcpy(&a, buf, 4); - buf += stride; - *tu1 = vset_lane_u32(a, *tu1, 1); - memcpy(&a, buf, 4); - buf += stride; - *tu2 = vset_lane_u32(a, *tu2, 0); - memcpy(&a, buf, 4); - buf += stride; - *tu2 = vset_lane_u32(a, *tu2, 1); - memcpy(&a, buf, 4); - buf += stride; - *tu3 = vset_lane_u32(a, *tu3, 0); - memcpy(&a, buf, 4); - *tu3 = vset_lane_u32(a, *tu3, 1); -} - -static INLINE void load_unaligned_u8_4x4(const uint8_t *buf, int stride, - uint32x2_t *tu0, uint32x2_t *tu1) { - uint32_t a; - - memcpy(&a, buf, 4); - buf += stride; - *tu0 = vset_lane_u32(a, *tu0, 0); - memcpy(&a, buf, 4); - buf += stride; - *tu0 = vset_lane_u32(a, *tu0, 1); - memcpy(&a, buf, 4); - buf += stride; - *tu1 = vset_lane_u32(a, *tu1, 0); - memcpy(&a, buf, 4); - *tu1 = vset_lane_u32(a, *tu1, 1); -} - -static INLINE void load_unaligned_u8_4x1(const uint8_t *buf, int stride, - uint32x2_t *tu0) { - uint32_t a; - - memcpy(&a, buf, 4); - buf += stride; - *tu0 = vset_lane_u32(a, *tu0, 0); -} - -static INLINE void load_unaligned_u8_4x2(const uint8_t *buf, int stride, - uint32x2_t *tu0) { - uint32_t a; - - memcpy(&a, buf, 4); - buf += stride; - *tu0 = vset_lane_u32(a, *tu0, 0); - memcpy(&a, buf, 4); - buf += stride; - *tu0 = vset_lane_u32(a, *tu0, 1); -} - -static INLINE void load_unaligned_u8_2x2(const uint8_t *buf, int stride, - uint16x4_t *tu0) { - uint16_t a; - - memcpy(&a, buf, 2); - buf += stride; - *tu0 = vset_lane_u16(a, *tu0, 0); - memcpy(&a, buf, 2); - buf += stride; - *tu0 = vset_lane_u16(a, *tu0, 1); -} - -static INLINE void load_u8_16x8(const uint8_t *s, ptrdiff_t p, - uint8x16_t *const s0, uint8x16_t *const s1, - uint8x16_t *const s2, uint8x16_t *const s3, - uint8x16_t *const s4, uint8x16_t *const s5, - uint8x16_t *const s6, uint8x16_t *const s7) { - *s0 = vld1q_u8(s); - s += p; - *s1 = vld1q_u8(s); - s += p; - *s2 = vld1q_u8(s); - s += p; - *s3 = vld1q_u8(s); - s += p; - *s4 = vld1q_u8(s); - s += p; - *s5 = vld1q_u8(s); - s += p; - *s6 = vld1q_u8(s); - s += p; - *s7 = vld1q_u8(s); -} - -static INLINE void load_u8_16x4(const uint8_t *s, ptrdiff_t p, - uint8x16_t *const s0, uint8x16_t *const s1, - uint8x16_t *const s2, uint8x16_t *const s3) { - *s0 = vld1q_u8(s); - s += p; - *s1 = vld1q_u8(s); - s += p; - *s2 = vld1q_u8(s); - s += p; - *s3 = vld1q_u8(s); -} - -static INLINE void load_unaligned_u16_4x4(const uint16_t *buf, uint32_t stride, - uint64x2_t *tu0, uint64x2_t *tu1) { - uint64_t a; - - memcpy(&a, buf, 8); - buf += stride; - *tu0 = vsetq_lane_u64(a, *tu0, 0); - memcpy(&a, buf, 8); - buf += stride; - *tu0 = vsetq_lane_u64(a, *tu0, 1); - memcpy(&a, buf, 8); - buf += stride; - *tu1 = vsetq_lane_u64(a, *tu1, 0); - memcpy(&a, buf, 8); - *tu1 = vsetq_lane_u64(a, *tu1, 1); -} - -static INLINE void load_s32_4x4(int32_t *s, int32_t p, int32x4_t *s1, - int32x4_t *s2, int32x4_t *s3, int32x4_t *s4) { - *s1 = vld1q_s32(s); - s += p; - *s2 = vld1q_s32(s); - s += p; - *s3 = vld1q_s32(s); - s += p; - *s4 = vld1q_s32(s); -} - -static INLINE void store_s32_4x4(int32_t *s, int32_t p, int32x4_t s1, - int32x4_t s2, int32x4_t s3, int32x4_t s4) { - vst1q_s32(s, s1); - s += p; - vst1q_s32(s, s2); - s += p; - vst1q_s32(s, s3); - s += p; - vst1q_s32(s, s4); -} - -static INLINE void load_u32_4x4(uint32_t *s, int32_t p, uint32x4_t *s1, - uint32x4_t *s2, uint32x4_t *s3, - uint32x4_t *s4) { - *s1 = vld1q_u32(s); - s += p; - *s2 = vld1q_u32(s); - s += p; - *s3 = vld1q_u32(s); - s += p; - *s4 = vld1q_u32(s); -} - -static INLINE void store_u32_4x4(uint32_t *s, int32_t p, uint32x4_t s1, - uint32x4_t s2, uint32x4_t s3, uint32x4_t s4) { - vst1q_u32(s, s1); - s += p; - vst1q_u32(s, s2); - s += p; - vst1q_u32(s, s3); - s += p; - vst1q_u32(s, s4); -} - -#endif // AOM_AV1_COMMON_ARM_MEM_NEON_H_ diff --git a/third_party/aom/av1/common/arm/reconinter_neon.c b/third_party/aom/av1/common/arm/reconinter_neon.c deleted file mode 100644 index 44e064195..000000000 --- a/third_party/aom/av1/common/arm/reconinter_neon.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <arm_neon.h> -#include <assert.h> - -#include "aom/aom_integer.h" -#include "aom_dsp/blend.h" -#include "aom_ports/mem.h" -#include "av1/common/arm/mem_neon.h" -#include "av1/common/blockd.h" -#include "config/av1_rtcd.h" - -void av1_build_compound_diffwtd_mask_d16_neon( - uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, - int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, - ConvolveParams *conv_params, int bd) { - assert(h >= 4); - assert(w >= 4); - assert((mask_type == DIFFWTD_38_INV) || (mask_type == DIFFWTD_38)); - const int round = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8); - uint16x8_t diff_q, tmp0, tmp1; - uint8x8_t diff_d, diff_select; - const CONV_BUF_TYPE *src0_1, *src1_1; - const int16x8_t dup_round = vdupq_n_s16((int16_t)(-round)); - const uint8x8_t dup_38 = vdup_n_u8(38); - const uint8x8_t dup_64 = vdup_n_u8(AOM_BLEND_A64_MAX_ALPHA); - if (mask_type == DIFFWTD_38) { - diff_select = vdup_n_u8(255); - } else { - diff_select = vdup_n_u8(0); - } - if (w >= 8) { - for (int i = 0; i < h; ++i) { - src0_1 = src0; - src1_1 = src1; - for (int j = 0; j < w; j += 8) { - __builtin_prefetch(src0_1); - __builtin_prefetch(src1_1); - diff_q = vabdq_u16(vld1q_u16(src0_1), vld1q_u16(src1_1)); - diff_q = vrshlq_u16(diff_q, dup_round); - diff_d = vshrn_n_u16(diff_q, DIFF_FACTOR_LOG2); - diff_d = vmin_u8(vadd_u8(diff_d, dup_38), dup_64); - diff_d = vbsl_u8(diff_select, diff_d, vsub_u8(dup_64, diff_d)); - vst1_u8(mask, diff_d); - src0_1 += 8; - src1_1 += 8; - mask += 8; - } - src0 += src0_stride; - src1 += src1_stride; - } - } else if (w == 4) { - for (int i = 0; i < h; i += 2) { - src0_1 = src0; - src1_1 = src1; - __builtin_prefetch(src0_1 + 0 * src0_stride); - __builtin_prefetch(src0_1 + 1 * src0_stride); - __builtin_prefetch(src1_1 + 0 * src1_stride); - __builtin_prefetch(src1_1 + 1 * src1_stride); - tmp0 = vcombine_u16(vld1_u16(src0_1 + (0 * src0_stride)), - vld1_u16(src0_1 + (1 * src0_stride))); - tmp1 = vcombine_u16(vld1_u16(src1_1 + (0 * src1_stride)), - vld1_u16(src1_1 + (1 * src1_stride))); - diff_q = vabdq_u16(tmp0, tmp1); - diff_q = vrshlq_u16(diff_q, dup_round); - diff_d = vshrn_n_u16(diff_q, DIFF_FACTOR_LOG2); - diff_d = vmin_u8(vadd_u8(diff_d, dup_38), dup_64); - diff_d = vbsl_u8(diff_select, diff_d, vsub_u8(dup_64, diff_d)); - vst1_u8(mask, diff_d); - src0 += src0_stride * 2; - src1 += src1_stride * 2; - mask += w * 2; - } - } -} diff --git a/third_party/aom/av1/common/arm/selfguided_neon.c b/third_party/aom/av1/common/arm/selfguided_neon.c deleted file mode 100644 index b3a37c4cb..000000000 --- a/third_party/aom/av1/common/arm/selfguided_neon.c +++ /dev/null @@ -1,1508 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <arm_neon.h> -#include <assert.h> - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/txfm_common.h" -#include "aom_mem/aom_mem.h" -#include "aom_ports/mem.h" -#include "av1/common/common.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/resize.h" -#include "av1/common/restoration.h" -#include "av1/common/arm/mem_neon.h" -#include "av1/common/arm/transpose_neon.h" - -// Constants used for right shift in final_filter calculation. -#define NB_EVEN 5 -#define NB_ODD 4 - -static INLINE void calc_ab_fast_internal_common( - uint32x4_t s0, uint32x4_t s1, uint32x4_t s2, uint32x4_t s3, uint32x4_t s4, - uint32x4_t s5, uint32x4_t s6, uint32x4_t s7, int32x4_t sr4, int32x4_t sr5, - int32x4_t sr6, int32x4_t sr7, uint32x4_t const_n_val, uint32x4_t s_vec, - uint32x4_t const_val, uint32x4_t one_by_n_minus_1_vec, - uint16x4_t sgrproj_sgr, int32_t *src1, uint16_t *dst_A16, int32_t *src2, - const int buf_stride) { - uint32x4_t q0, q1, q2, q3; - uint32x4_t p0, p1, p2, p3; - uint16x4_t d0, d1, d2, d3; - - s0 = vmulq_u32(s0, const_n_val); - s1 = vmulq_u32(s1, const_n_val); - s2 = vmulq_u32(s2, const_n_val); - s3 = vmulq_u32(s3, const_n_val); - - q0 = vmulq_u32(s4, s4); - q1 = vmulq_u32(s5, s5); - q2 = vmulq_u32(s6, s6); - q3 = vmulq_u32(s7, s7); - - p0 = vcleq_u32(q0, s0); - p1 = vcleq_u32(q1, s1); - p2 = vcleq_u32(q2, s2); - p3 = vcleq_u32(q3, s3); - - q0 = vsubq_u32(s0, q0); - q1 = vsubq_u32(s1, q1); - q2 = vsubq_u32(s2, q2); - q3 = vsubq_u32(s3, q3); - - p0 = vandq_u32(p0, q0); - p1 = vandq_u32(p1, q1); - p2 = vandq_u32(p2, q2); - p3 = vandq_u32(p3, q3); - - p0 = vmulq_u32(p0, s_vec); - p1 = vmulq_u32(p1, s_vec); - p2 = vmulq_u32(p2, s_vec); - p3 = vmulq_u32(p3, s_vec); - - p0 = vrshrq_n_u32(p0, SGRPROJ_MTABLE_BITS); - p1 = vrshrq_n_u32(p1, SGRPROJ_MTABLE_BITS); - p2 = vrshrq_n_u32(p2, SGRPROJ_MTABLE_BITS); - p3 = vrshrq_n_u32(p3, SGRPROJ_MTABLE_BITS); - - p0 = vminq_u32(p0, const_val); - p1 = vminq_u32(p1, const_val); - p2 = vminq_u32(p2, const_val); - p3 = vminq_u32(p3, const_val); - - { - store_u32_4x4((uint32_t *)src1, buf_stride, p0, p1, p2, p3); - - for (int x = 0; x < 4; x++) { - for (int y = 0; y < 4; y++) { - dst_A16[x * buf_stride + y] = x_by_xplus1[src1[x * buf_stride + y]]; - } - } - load_u16_4x4(dst_A16, buf_stride, &d0, &d1, &d2, &d3); - } - p0 = vsubl_u16(sgrproj_sgr, d0); - p1 = vsubl_u16(sgrproj_sgr, d1); - p2 = vsubl_u16(sgrproj_sgr, d2); - p3 = vsubl_u16(sgrproj_sgr, d3); - - s4 = vmulq_u32(vreinterpretq_u32_s32(sr4), one_by_n_minus_1_vec); - s5 = vmulq_u32(vreinterpretq_u32_s32(sr5), one_by_n_minus_1_vec); - s6 = vmulq_u32(vreinterpretq_u32_s32(sr6), one_by_n_minus_1_vec); - s7 = vmulq_u32(vreinterpretq_u32_s32(sr7), one_by_n_minus_1_vec); - - s4 = vmulq_u32(s4, p0); - s5 = vmulq_u32(s5, p1); - s6 = vmulq_u32(s6, p2); - s7 = vmulq_u32(s7, p3); - - p0 = vrshrq_n_u32(s4, SGRPROJ_RECIP_BITS); - p1 = vrshrq_n_u32(s5, SGRPROJ_RECIP_BITS); - p2 = vrshrq_n_u32(s6, SGRPROJ_RECIP_BITS); - p3 = vrshrq_n_u32(s7, SGRPROJ_RECIP_BITS); - - store_s32_4x4(src2, buf_stride, vreinterpretq_s32_u32(p0), - vreinterpretq_s32_u32(p1), vreinterpretq_s32_u32(p2), - vreinterpretq_s32_u32(p3)); -} -static INLINE void calc_ab_internal_common( - uint32x4_t s0, uint32x4_t s1, uint32x4_t s2, uint32x4_t s3, uint32x4_t s4, - uint32x4_t s5, uint32x4_t s6, uint32x4_t s7, uint16x8_t s16_0, - uint16x8_t s16_1, uint16x8_t s16_2, uint16x8_t s16_3, uint16x8_t s16_4, - uint16x8_t s16_5, uint16x8_t s16_6, uint16x8_t s16_7, - uint32x4_t const_n_val, uint32x4_t s_vec, uint32x4_t const_val, - uint16x4_t one_by_n_minus_1_vec, uint16x8_t sgrproj_sgr, int32_t *src1, - uint16_t *dst_A16, int32_t *dst2, const int buf_stride) { - uint16x4_t d0, d1, d2, d3, d4, d5, d6, d7; - uint32x4_t q0, q1, q2, q3, q4, q5, q6, q7; - uint32x4_t p0, p1, p2, p3, p4, p5, p6, p7; - - s0 = vmulq_u32(s0, const_n_val); - s1 = vmulq_u32(s1, const_n_val); - s2 = vmulq_u32(s2, const_n_val); - s3 = vmulq_u32(s3, const_n_val); - s4 = vmulq_u32(s4, const_n_val); - s5 = vmulq_u32(s5, const_n_val); - s6 = vmulq_u32(s6, const_n_val); - s7 = vmulq_u32(s7, const_n_val); - - d0 = vget_low_u16(s16_4); - d1 = vget_low_u16(s16_5); - d2 = vget_low_u16(s16_6); - d3 = vget_low_u16(s16_7); - d4 = vget_high_u16(s16_4); - d5 = vget_high_u16(s16_5); - d6 = vget_high_u16(s16_6); - d7 = vget_high_u16(s16_7); - - q0 = vmull_u16(d0, d0); - q1 = vmull_u16(d1, d1); - q2 = vmull_u16(d2, d2); - q3 = vmull_u16(d3, d3); - q4 = vmull_u16(d4, d4); - q5 = vmull_u16(d5, d5); - q6 = vmull_u16(d6, d6); - q7 = vmull_u16(d7, d7); - - p0 = vcleq_u32(q0, s0); - p1 = vcleq_u32(q1, s1); - p2 = vcleq_u32(q2, s2); - p3 = vcleq_u32(q3, s3); - p4 = vcleq_u32(q4, s4); - p5 = vcleq_u32(q5, s5); - p6 = vcleq_u32(q6, s6); - p7 = vcleq_u32(q7, s7); - - q0 = vsubq_u32(s0, q0); - q1 = vsubq_u32(s1, q1); - q2 = vsubq_u32(s2, q2); - q3 = vsubq_u32(s3, q3); - q4 = vsubq_u32(s4, q4); - q5 = vsubq_u32(s5, q5); - q6 = vsubq_u32(s6, q6); - q7 = vsubq_u32(s7, q7); - - p0 = vandq_u32(p0, q0); - p1 = vandq_u32(p1, q1); - p2 = vandq_u32(p2, q2); - p3 = vandq_u32(p3, q3); - p4 = vandq_u32(p4, q4); - p5 = vandq_u32(p5, q5); - p6 = vandq_u32(p6, q6); - p7 = vandq_u32(p7, q7); - - p0 = vmulq_u32(p0, s_vec); - p1 = vmulq_u32(p1, s_vec); - p2 = vmulq_u32(p2, s_vec); - p3 = vmulq_u32(p3, s_vec); - p4 = vmulq_u32(p4, s_vec); - p5 = vmulq_u32(p5, s_vec); - p6 = vmulq_u32(p6, s_vec); - p7 = vmulq_u32(p7, s_vec); - - p0 = vrshrq_n_u32(p0, SGRPROJ_MTABLE_BITS); - p1 = vrshrq_n_u32(p1, SGRPROJ_MTABLE_BITS); - p2 = vrshrq_n_u32(p2, SGRPROJ_MTABLE_BITS); - p3 = vrshrq_n_u32(p3, SGRPROJ_MTABLE_BITS); - p4 = vrshrq_n_u32(p4, SGRPROJ_MTABLE_BITS); - p5 = vrshrq_n_u32(p5, SGRPROJ_MTABLE_BITS); - p6 = vrshrq_n_u32(p6, SGRPROJ_MTABLE_BITS); - p7 = vrshrq_n_u32(p7, SGRPROJ_MTABLE_BITS); - - p0 = vminq_u32(p0, const_val); - p1 = vminq_u32(p1, const_val); - p2 = vminq_u32(p2, const_val); - p3 = vminq_u32(p3, const_val); - p4 = vminq_u32(p4, const_val); - p5 = vminq_u32(p5, const_val); - p6 = vminq_u32(p6, const_val); - p7 = vminq_u32(p7, const_val); - - { - store_u32_4x4((uint32_t *)src1, buf_stride, p0, p1, p2, p3); - store_u32_4x4((uint32_t *)src1 + 4, buf_stride, p4, p5, p6, p7); - - for (int x = 0; x < 4; x++) { - for (int y = 0; y < 8; y++) { - dst_A16[x * buf_stride + y] = x_by_xplus1[src1[x * buf_stride + y]]; - } - } - load_u16_8x4(dst_A16, buf_stride, &s16_4, &s16_5, &s16_6, &s16_7); - } - - s16_4 = vsubq_u16(sgrproj_sgr, s16_4); - s16_5 = vsubq_u16(sgrproj_sgr, s16_5); - s16_6 = vsubq_u16(sgrproj_sgr, s16_6); - s16_7 = vsubq_u16(sgrproj_sgr, s16_7); - - s0 = vmull_u16(vget_low_u16(s16_0), one_by_n_minus_1_vec); - s1 = vmull_u16(vget_low_u16(s16_1), one_by_n_minus_1_vec); - s2 = vmull_u16(vget_low_u16(s16_2), one_by_n_minus_1_vec); - s3 = vmull_u16(vget_low_u16(s16_3), one_by_n_minus_1_vec); - s4 = vmull_u16(vget_high_u16(s16_0), one_by_n_minus_1_vec); - s5 = vmull_u16(vget_high_u16(s16_1), one_by_n_minus_1_vec); - s6 = vmull_u16(vget_high_u16(s16_2), one_by_n_minus_1_vec); - s7 = vmull_u16(vget_high_u16(s16_3), one_by_n_minus_1_vec); - - s0 = vmulq_u32(s0, vmovl_u16(vget_low_u16(s16_4))); - s1 = vmulq_u32(s1, vmovl_u16(vget_low_u16(s16_5))); - s2 = vmulq_u32(s2, vmovl_u16(vget_low_u16(s16_6))); - s3 = vmulq_u32(s3, vmovl_u16(vget_low_u16(s16_7))); - s4 = vmulq_u32(s4, vmovl_u16(vget_high_u16(s16_4))); - s5 = vmulq_u32(s5, vmovl_u16(vget_high_u16(s16_5))); - s6 = vmulq_u32(s6, vmovl_u16(vget_high_u16(s16_6))); - s7 = vmulq_u32(s7, vmovl_u16(vget_high_u16(s16_7))); - - p0 = vrshrq_n_u32(s0, SGRPROJ_RECIP_BITS); - p1 = vrshrq_n_u32(s1, SGRPROJ_RECIP_BITS); - p2 = vrshrq_n_u32(s2, SGRPROJ_RECIP_BITS); - p3 = vrshrq_n_u32(s3, SGRPROJ_RECIP_BITS); - p4 = vrshrq_n_u32(s4, SGRPROJ_RECIP_BITS); - p5 = vrshrq_n_u32(s5, SGRPROJ_RECIP_BITS); - p6 = vrshrq_n_u32(s6, SGRPROJ_RECIP_BITS); - p7 = vrshrq_n_u32(s7, SGRPROJ_RECIP_BITS); - - store_s32_4x4(dst2, buf_stride, vreinterpretq_s32_u32(p0), - vreinterpretq_s32_u32(p1), vreinterpretq_s32_u32(p2), - vreinterpretq_s32_u32(p3)); - store_s32_4x4(dst2 + 4, buf_stride, vreinterpretq_s32_u32(p4), - vreinterpretq_s32_u32(p5), vreinterpretq_s32_u32(p6), - vreinterpretq_s32_u32(p7)); -} - -static INLINE void boxsum2_square_sum_calc( - int16x4_t t1, int16x4_t t2, int16x4_t t3, int16x4_t t4, int16x4_t t5, - int16x4_t t6, int16x4_t t7, int16x4_t t8, int16x4_t t9, int16x4_t t10, - int16x4_t t11, int32x4_t *r0, int32x4_t *r1, int32x4_t *r2, int32x4_t *r3) { - int32x4_t d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11; - int32x4_t r12, r34, r67, r89, r1011; - int32x4_t r345, r6789, r789; - - d1 = vmull_s16(t1, t1); - d2 = vmull_s16(t2, t2); - d3 = vmull_s16(t3, t3); - d4 = vmull_s16(t4, t4); - d5 = vmull_s16(t5, t5); - d6 = vmull_s16(t6, t6); - d7 = vmull_s16(t7, t7); - d8 = vmull_s16(t8, t8); - d9 = vmull_s16(t9, t9); - d10 = vmull_s16(t10, t10); - d11 = vmull_s16(t11, t11); - - r12 = vaddq_s32(d1, d2); - r34 = vaddq_s32(d3, d4); - r67 = vaddq_s32(d6, d7); - r89 = vaddq_s32(d8, d9); - r1011 = vaddq_s32(d10, d11); - r345 = vaddq_s32(r34, d5); - r6789 = vaddq_s32(r67, r89); - r789 = vsubq_s32(r6789, d6); - *r0 = vaddq_s32(r12, r345); - *r1 = vaddq_s32(r67, r345); - *r2 = vaddq_s32(d5, r6789); - *r3 = vaddq_s32(r789, r1011); -} - -static INLINE void boxsum2(int16_t *src, const int src_stride, int16_t *dst16, - int32_t *dst32, int32_t *dst2, const int dst_stride, - const int width, const int height) { - assert(width > 2 * SGRPROJ_BORDER_HORZ); - assert(height > 2 * SGRPROJ_BORDER_VERT); - - int16_t *dst1_16_ptr, *src_ptr; - int32_t *dst2_ptr; - int h, w, count = 0; - const int dst_stride_2 = (dst_stride << 1); - const int dst_stride_8 = (dst_stride << 3); - - dst1_16_ptr = dst16; - dst2_ptr = dst2; - src_ptr = src; - w = width; - { - int16x8_t t1, t2, t3, t4, t5, t6, t7; - int16x8_t t8, t9, t10, t11, t12; - - int16x8_t q12345, q56789, q34567, q7891011; - int16x8_t q12, q34, q67, q89, q1011; - int16x8_t q345, q6789, q789; - - int32x4_t r12345, r56789, r34567, r7891011; - - do { - h = height; - dst1_16_ptr = dst16 + (count << 3); - dst2_ptr = dst2 + (count << 3); - src_ptr = src + (count << 3); - - dst1_16_ptr += dst_stride_2; - dst2_ptr += dst_stride_2; - do { - load_s16_8x4(src_ptr, src_stride, &t1, &t2, &t3, &t4); - src_ptr += 4 * src_stride; - load_s16_8x4(src_ptr, src_stride, &t5, &t6, &t7, &t8); - src_ptr += 4 * src_stride; - load_s16_8x4(src_ptr, src_stride, &t9, &t10, &t11, &t12); - - q12 = vaddq_s16(t1, t2); - q34 = vaddq_s16(t3, t4); - q67 = vaddq_s16(t6, t7); - q89 = vaddq_s16(t8, t9); - q1011 = vaddq_s16(t10, t11); - q345 = vaddq_s16(q34, t5); - q6789 = vaddq_s16(q67, q89); - q789 = vaddq_s16(q89, t7); - q12345 = vaddq_s16(q12, q345); - q34567 = vaddq_s16(q67, q345); - q56789 = vaddq_s16(t5, q6789); - q7891011 = vaddq_s16(q789, q1011); - - store_s16_8x4(dst1_16_ptr, dst_stride_2, q12345, q34567, q56789, - q7891011); - dst1_16_ptr += dst_stride_8; - - boxsum2_square_sum_calc( - vget_low_s16(t1), vget_low_s16(t2), vget_low_s16(t3), - vget_low_s16(t4), vget_low_s16(t5), vget_low_s16(t6), - vget_low_s16(t7), vget_low_s16(t8), vget_low_s16(t9), - vget_low_s16(t10), vget_low_s16(t11), &r12345, &r34567, &r56789, - &r7891011); - - store_s32_4x4(dst2_ptr, dst_stride_2, r12345, r34567, r56789, r7891011); - - boxsum2_square_sum_calc( - vget_high_s16(t1), vget_high_s16(t2), vget_high_s16(t3), - vget_high_s16(t4), vget_high_s16(t5), vget_high_s16(t6), - vget_high_s16(t7), vget_high_s16(t8), vget_high_s16(t9), - vget_high_s16(t10), vget_high_s16(t11), &r12345, &r34567, &r56789, - &r7891011); - - store_s32_4x4(dst2_ptr + 4, dst_stride_2, r12345, r34567, r56789, - r7891011); - dst2_ptr += (dst_stride_8); - h -= 8; - } while (h > 0); - w -= 8; - count++; - } while (w > 0); - } - - { - int16x4_t s1, s2, s3, s4, s5, s6, s7, s8; - int32x4_t d1, d2, d3, d4, d5, d6, d7, d8; - int32x4_t q12345, q34567, q23456, q45678; - int32x4_t q23, q45, q67; - int32x4_t q2345, q4567; - - int32x4_t r12345, r34567, r23456, r45678; - int32x4_t r23, r45, r67; - int32x4_t r2345, r4567; - - int32_t *src2_ptr, *dst1_32_ptr; - int16_t *src1_ptr; - count = 0; - h = height; - do { - dst1_32_ptr = dst32 + count * dst_stride_8 + (dst_stride_2); - dst2_ptr = dst2 + count * dst_stride_8 + (dst_stride_2); - src1_ptr = dst16 + count * dst_stride_8 + (dst_stride_2); - src2_ptr = dst2 + count * dst_stride_8 + (dst_stride_2); - w = width; - - dst1_32_ptr += 2; - dst2_ptr += 2; - load_s16_4x4(src1_ptr, dst_stride_2, &s1, &s2, &s3, &s4); - transpose_s16_4x4d(&s1, &s2, &s3, &s4); - load_s32_4x4(src2_ptr, dst_stride_2, &d1, &d2, &d3, &d4); - transpose_s32_4x4(&d1, &d2, &d3, &d4); - do { - src1_ptr += 4; - src2_ptr += 4; - load_s16_4x4(src1_ptr, dst_stride_2, &s5, &s6, &s7, &s8); - transpose_s16_4x4d(&s5, &s6, &s7, &s8); - load_s32_4x4(src2_ptr, dst_stride_2, &d5, &d6, &d7, &d8); - transpose_s32_4x4(&d5, &d6, &d7, &d8); - q23 = vaddl_s16(s2, s3); - q45 = vaddl_s16(s4, s5); - q67 = vaddl_s16(s6, s7); - q2345 = vaddq_s32(q23, q45); - q4567 = vaddq_s32(q45, q67); - q12345 = vaddq_s32(vmovl_s16(s1), q2345); - q23456 = vaddq_s32(q2345, vmovl_s16(s6)); - q34567 = vaddq_s32(q4567, vmovl_s16(s3)); - q45678 = vaddq_s32(q4567, vmovl_s16(s8)); - - transpose_s32_4x4(&q12345, &q23456, &q34567, &q45678); - store_s32_4x4(dst1_32_ptr, dst_stride_2, q12345, q23456, q34567, - q45678); - dst1_32_ptr += 4; - s1 = s5; - s2 = s6; - s3 = s7; - s4 = s8; - - r23 = vaddq_s32(d2, d3); - r45 = vaddq_s32(d4, d5); - r67 = vaddq_s32(d6, d7); - r2345 = vaddq_s32(r23, r45); - r4567 = vaddq_s32(r45, r67); - r12345 = vaddq_s32(d1, r2345); - r23456 = vaddq_s32(r2345, d6); - r34567 = vaddq_s32(r4567, d3); - r45678 = vaddq_s32(r4567, d8); - - transpose_s32_4x4(&r12345, &r23456, &r34567, &r45678); - store_s32_4x4(dst2_ptr, dst_stride_2, r12345, r23456, r34567, r45678); - dst2_ptr += 4; - d1 = d5; - d2 = d6; - d3 = d7; - d4 = d8; - w -= 4; - } while (w > 0); - h -= 8; - count++; - } while (h > 0); - } -} - -static INLINE void calc_ab_internal_lbd(int32_t *A, uint16_t *A16, - uint16_t *B16, int32_t *B, - const int buf_stride, const int width, - const int height, const int r, - const int s, const int ht_inc) { - int32_t *src1, *dst2, count = 0; - uint16_t *dst_A16, *src2; - const uint32_t n = (2 * r + 1) * (2 * r + 1); - const uint32x4_t const_n_val = vdupq_n_u32(n); - const uint16x8_t sgrproj_sgr = vdupq_n_u16(SGRPROJ_SGR); - const uint16x4_t one_by_n_minus_1_vec = vdup_n_u16(one_by_x[n - 1]); - const uint32x4_t const_val = vdupq_n_u32(255); - - uint16x8_t s16_0, s16_1, s16_2, s16_3, s16_4, s16_5, s16_6, s16_7; - - uint32x4_t s0, s1, s2, s3, s4, s5, s6, s7; - - const uint32x4_t s_vec = vdupq_n_u32(s); - int w, h = height; - - do { - dst_A16 = A16 + (count << 2) * buf_stride; - src1 = A + (count << 2) * buf_stride; - src2 = B16 + (count << 2) * buf_stride; - dst2 = B + (count << 2) * buf_stride; - w = width; - do { - load_u32_4x4((uint32_t *)src1, buf_stride, &s0, &s1, &s2, &s3); - load_u32_4x4((uint32_t *)src1 + 4, buf_stride, &s4, &s5, &s6, &s7); - load_u16_8x4(src2, buf_stride, &s16_0, &s16_1, &s16_2, &s16_3); - - s16_4 = s16_0; - s16_5 = s16_1; - s16_6 = s16_2; - s16_7 = s16_3; - - calc_ab_internal_common( - s0, s1, s2, s3, s4, s5, s6, s7, s16_0, s16_1, s16_2, s16_3, s16_4, - s16_5, s16_6, s16_7, const_n_val, s_vec, const_val, - one_by_n_minus_1_vec, sgrproj_sgr, src1, dst_A16, dst2, buf_stride); - - w -= 8; - dst2 += 8; - src1 += 8; - src2 += 8; - dst_A16 += 8; - } while (w > 0); - count++; - h -= (ht_inc * 4); - } while (h > 0); -} - -static INLINE void calc_ab_internal_hbd(int32_t *A, uint16_t *A16, - uint16_t *B16, int32_t *B, - const int buf_stride, const int width, - const int height, const int bit_depth, - const int r, const int s, - const int ht_inc) { - int32_t *src1, *dst2, count = 0; - uint16_t *dst_A16, *src2; - const uint32_t n = (2 * r + 1) * (2 * r + 1); - const int16x8_t bd_min_2_vec = vdupq_n_s16(-(bit_depth - 8)); - const int32x4_t bd_min_1_vec = vdupq_n_s32(-((bit_depth - 8) << 1)); - const uint32x4_t const_n_val = vdupq_n_u32(n); - const uint16x8_t sgrproj_sgr = vdupq_n_u16(SGRPROJ_SGR); - const uint16x4_t one_by_n_minus_1_vec = vdup_n_u16(one_by_x[n - 1]); - const uint32x4_t const_val = vdupq_n_u32(255); - - int32x4_t sr0, sr1, sr2, sr3, sr4, sr5, sr6, sr7; - uint16x8_t s16_0, s16_1, s16_2, s16_3; - uint16x8_t s16_4, s16_5, s16_6, s16_7; - uint32x4_t s0, s1, s2, s3, s4, s5, s6, s7; - - const uint32x4_t s_vec = vdupq_n_u32(s); - int w, h = height; - - do { - src1 = A + (count << 2) * buf_stride; - src2 = B16 + (count << 2) * buf_stride; - dst2 = B + (count << 2) * buf_stride; - dst_A16 = A16 + (count << 2) * buf_stride; - w = width; - do { - load_s32_4x4(src1, buf_stride, &sr0, &sr1, &sr2, &sr3); - load_s32_4x4(src1 + 4, buf_stride, &sr4, &sr5, &sr6, &sr7); - load_u16_8x4(src2, buf_stride, &s16_0, &s16_1, &s16_2, &s16_3); - - s0 = vrshlq_u32(vreinterpretq_u32_s32(sr0), bd_min_1_vec); - s1 = vrshlq_u32(vreinterpretq_u32_s32(sr1), bd_min_1_vec); - s2 = vrshlq_u32(vreinterpretq_u32_s32(sr2), bd_min_1_vec); - s3 = vrshlq_u32(vreinterpretq_u32_s32(sr3), bd_min_1_vec); - s4 = vrshlq_u32(vreinterpretq_u32_s32(sr4), bd_min_1_vec); - s5 = vrshlq_u32(vreinterpretq_u32_s32(sr5), bd_min_1_vec); - s6 = vrshlq_u32(vreinterpretq_u32_s32(sr6), bd_min_1_vec); - s7 = vrshlq_u32(vreinterpretq_u32_s32(sr7), bd_min_1_vec); - - s16_4 = vrshlq_u16(s16_0, bd_min_2_vec); - s16_5 = vrshlq_u16(s16_1, bd_min_2_vec); - s16_6 = vrshlq_u16(s16_2, bd_min_2_vec); - s16_7 = vrshlq_u16(s16_3, bd_min_2_vec); - - calc_ab_internal_common( - s0, s1, s2, s3, s4, s5, s6, s7, s16_0, s16_1, s16_2, s16_3, s16_4, - s16_5, s16_6, s16_7, const_n_val, s_vec, const_val, - one_by_n_minus_1_vec, sgrproj_sgr, src1, dst_A16, dst2, buf_stride); - - w -= 8; - dst2 += 8; - src1 += 8; - src2 += 8; - dst_A16 += 8; - } while (w > 0); - count++; - h -= (ht_inc * 4); - } while (h > 0); -} - -static INLINE void calc_ab_fast_internal_lbd(int32_t *A, uint16_t *A16, - int32_t *B, const int buf_stride, - const int width, const int height, - const int r, const int s, - const int ht_inc) { - int32_t *src1, *src2, count = 0; - uint16_t *dst_A16; - const uint32_t n = (2 * r + 1) * (2 * r + 1); - const uint32x4_t const_n_val = vdupq_n_u32(n); - const uint16x4_t sgrproj_sgr = vdup_n_u16(SGRPROJ_SGR); - const uint32x4_t one_by_n_minus_1_vec = vdupq_n_u32(one_by_x[n - 1]); - const uint32x4_t const_val = vdupq_n_u32(255); - - int32x4_t sr0, sr1, sr2, sr3, sr4, sr5, sr6, sr7; - uint32x4_t s0, s1, s2, s3, s4, s5, s6, s7; - - const uint32x4_t s_vec = vdupq_n_u32(s); - int w, h = height; - - do { - src1 = A + (count << 2) * buf_stride; - src2 = B + (count << 2) * buf_stride; - dst_A16 = A16 + (count << 2) * buf_stride; - w = width; - do { - load_s32_4x4(src1, buf_stride, &sr0, &sr1, &sr2, &sr3); - load_s32_4x4(src2, buf_stride, &sr4, &sr5, &sr6, &sr7); - - s0 = vreinterpretq_u32_s32(sr0); - s1 = vreinterpretq_u32_s32(sr1); - s2 = vreinterpretq_u32_s32(sr2); - s3 = vreinterpretq_u32_s32(sr3); - s4 = vreinterpretq_u32_s32(sr4); - s5 = vreinterpretq_u32_s32(sr5); - s6 = vreinterpretq_u32_s32(sr6); - s7 = vreinterpretq_u32_s32(sr7); - - calc_ab_fast_internal_common(s0, s1, s2, s3, s4, s5, s6, s7, sr4, sr5, - sr6, sr7, const_n_val, s_vec, const_val, - one_by_n_minus_1_vec, sgrproj_sgr, src1, - dst_A16, src2, buf_stride); - - w -= 4; - src1 += 4; - src2 += 4; - dst_A16 += 4; - } while (w > 0); - count++; - h -= (ht_inc * 4); - } while (h > 0); -} - -static INLINE void calc_ab_fast_internal_hbd(int32_t *A, uint16_t *A16, - int32_t *B, const int buf_stride, - const int width, const int height, - const int bit_depth, const int r, - const int s, const int ht_inc) { - int32_t *src1, *src2, count = 0; - uint16_t *dst_A16; - const uint32_t n = (2 * r + 1) * (2 * r + 1); - const int32x4_t bd_min_2_vec = vdupq_n_s32(-(bit_depth - 8)); - const int32x4_t bd_min_1_vec = vdupq_n_s32(-((bit_depth - 8) << 1)); - const uint32x4_t const_n_val = vdupq_n_u32(n); - const uint16x4_t sgrproj_sgr = vdup_n_u16(SGRPROJ_SGR); - const uint32x4_t one_by_n_minus_1_vec = vdupq_n_u32(one_by_x[n - 1]); - const uint32x4_t const_val = vdupq_n_u32(255); - - int32x4_t sr0, sr1, sr2, sr3, sr4, sr5, sr6, sr7; - uint32x4_t s0, s1, s2, s3, s4, s5, s6, s7; - - const uint32x4_t s_vec = vdupq_n_u32(s); - int w, h = height; - - do { - src1 = A + (count << 2) * buf_stride; - src2 = B + (count << 2) * buf_stride; - dst_A16 = A16 + (count << 2) * buf_stride; - w = width; - do { - load_s32_4x4(src1, buf_stride, &sr0, &sr1, &sr2, &sr3); - load_s32_4x4(src2, buf_stride, &sr4, &sr5, &sr6, &sr7); - - s0 = vrshlq_u32(vreinterpretq_u32_s32(sr0), bd_min_1_vec); - s1 = vrshlq_u32(vreinterpretq_u32_s32(sr1), bd_min_1_vec); - s2 = vrshlq_u32(vreinterpretq_u32_s32(sr2), bd_min_1_vec); - s3 = vrshlq_u32(vreinterpretq_u32_s32(sr3), bd_min_1_vec); - s4 = vrshlq_u32(vreinterpretq_u32_s32(sr4), bd_min_2_vec); - s5 = vrshlq_u32(vreinterpretq_u32_s32(sr5), bd_min_2_vec); - s6 = vrshlq_u32(vreinterpretq_u32_s32(sr6), bd_min_2_vec); - s7 = vrshlq_u32(vreinterpretq_u32_s32(sr7), bd_min_2_vec); - - calc_ab_fast_internal_common(s0, s1, s2, s3, s4, s5, s6, s7, sr4, sr5, - sr6, sr7, const_n_val, s_vec, const_val, - one_by_n_minus_1_vec, sgrproj_sgr, src1, - dst_A16, src2, buf_stride); - - w -= 4; - src1 += 4; - src2 += 4; - dst_A16 += 4; - } while (w > 0); - count++; - h -= (ht_inc * 4); - } while (h > 0); -} - -static INLINE void boxsum1(int16_t *src, const int src_stride, uint16_t *dst1, - int32_t *dst2, const int dst_stride, const int width, - const int height) { - assert(width > 2 * SGRPROJ_BORDER_HORZ); - assert(height > 2 * SGRPROJ_BORDER_VERT); - - int16_t *src_ptr; - int32_t *dst2_ptr; - uint16_t *dst1_ptr; - int h, w, count = 0; - - w = width; - { - int16x8_t s1, s2, s3, s4, s5, s6, s7, s8; - int16x8_t q23, q34, q56, q234, q345, q456, q567; - int32x4_t r23, r56, r345, r456, r567, r78, r678; - int32x4_t r4_low, r4_high, r34_low, r34_high, r234_low, r234_high; - int32x4_t r2, r3, r5, r6, r7, r8; - int16x8_t q678, q78; - - do { - dst1_ptr = dst1 + (count << 3); - dst2_ptr = dst2 + (count << 3); - src_ptr = src + (count << 3); - h = height; - - load_s16_8x4(src_ptr, src_stride, &s1, &s2, &s3, &s4); - src_ptr += 4 * src_stride; - - q23 = vaddq_s16(s2, s3); - q234 = vaddq_s16(q23, s4); - q34 = vaddq_s16(s3, s4); - dst1_ptr += (dst_stride << 1); - - r2 = vmull_s16(vget_low_s16(s2), vget_low_s16(s2)); - r3 = vmull_s16(vget_low_s16(s3), vget_low_s16(s3)); - r4_low = vmull_s16(vget_low_s16(s4), vget_low_s16(s4)); - r23 = vaddq_s32(r2, r3); - r234_low = vaddq_s32(r23, r4_low); - r34_low = vaddq_s32(r3, r4_low); - - r2 = vmull_s16(vget_high_s16(s2), vget_high_s16(s2)); - r3 = vmull_s16(vget_high_s16(s3), vget_high_s16(s3)); - r4_high = vmull_s16(vget_high_s16(s4), vget_high_s16(s4)); - r23 = vaddq_s32(r2, r3); - r234_high = vaddq_s32(r23, r4_high); - r34_high = vaddq_s32(r3, r4_high); - - dst2_ptr += (dst_stride << 1); - - do { - load_s16_8x4(src_ptr, src_stride, &s5, &s6, &s7, &s8); - src_ptr += 4 * src_stride; - - q345 = vaddq_s16(s5, q34); - q56 = vaddq_s16(s5, s6); - q456 = vaddq_s16(s4, q56); - q567 = vaddq_s16(s7, q56); - q78 = vaddq_s16(s7, s8); - q678 = vaddq_s16(s6, q78); - - store_s16_8x4((int16_t *)dst1_ptr, dst_stride, q234, q345, q456, q567); - dst1_ptr += (dst_stride << 2); - - s4 = s8; - q34 = q78; - q234 = q678; - - r5 = vmull_s16(vget_low_s16(s5), vget_low_s16(s5)); - r6 = vmull_s16(vget_low_s16(s6), vget_low_s16(s6)); - r7 = vmull_s16(vget_low_s16(s7), vget_low_s16(s7)); - r8 = vmull_s16(vget_low_s16(s8), vget_low_s16(s8)); - - r345 = vaddq_s32(r5, r34_low); - r56 = vaddq_s32(r5, r6); - r456 = vaddq_s32(r4_low, r56); - r567 = vaddq_s32(r7, r56); - r78 = vaddq_s32(r7, r8); - r678 = vaddq_s32(r6, r78); - store_s32_4x4(dst2_ptr, dst_stride, r234_low, r345, r456, r567); - - r4_low = r8; - r34_low = r78; - r234_low = r678; - - r5 = vmull_s16(vget_high_s16(s5), vget_high_s16(s5)); - r6 = vmull_s16(vget_high_s16(s6), vget_high_s16(s6)); - r7 = vmull_s16(vget_high_s16(s7), vget_high_s16(s7)); - r8 = vmull_s16(vget_high_s16(s8), vget_high_s16(s8)); - - r345 = vaddq_s32(r5, r34_high); - r56 = vaddq_s32(r5, r6); - r456 = vaddq_s32(r4_high, r56); - r567 = vaddq_s32(r7, r56); - r78 = vaddq_s32(r7, r8); - r678 = vaddq_s32(r6, r78); - store_s32_4x4((dst2_ptr + 4), dst_stride, r234_high, r345, r456, r567); - dst2_ptr += (dst_stride << 2); - - r4_high = r8; - r34_high = r78; - r234_high = r678; - - h -= 4; - } while (h > 0); - w -= 8; - count++; - } while (w > 0); - } - - { - int16x4_t d1, d2, d3, d4, d5, d6, d7, d8; - int16x4_t q23, q34, q56, q234, q345, q456, q567; - int32x4_t r23, r56, r234, r345, r456, r567, r34, r78, r678; - int32x4_t r1, r2, r3, r4, r5, r6, r7, r8; - int16x4_t q678, q78; - - int32_t *src2_ptr; - uint16_t *src1_ptr; - count = 0; - h = height; - w = width; - do { - dst1_ptr = dst1 + (count << 2) * dst_stride; - dst2_ptr = dst2 + (count << 2) * dst_stride; - src1_ptr = dst1 + (count << 2) * dst_stride; - src2_ptr = dst2 + (count << 2) * dst_stride; - w = width; - - load_s16_4x4((int16_t *)src1_ptr, dst_stride, &d1, &d2, &d3, &d4); - transpose_s16_4x4d(&d1, &d2, &d3, &d4); - load_s32_4x4(src2_ptr, dst_stride, &r1, &r2, &r3, &r4); - transpose_s32_4x4(&r1, &r2, &r3, &r4); - src1_ptr += 4; - src2_ptr += 4; - - q23 = vadd_s16(d2, d3); - q234 = vadd_s16(q23, d4); - q34 = vadd_s16(d3, d4); - dst1_ptr += 2; - r23 = vaddq_s32(r2, r3); - r234 = vaddq_s32(r23, r4); - r34 = vaddq_s32(r3, r4); - dst2_ptr += 2; - - do { - load_s16_4x4((int16_t *)src1_ptr, dst_stride, &d5, &d6, &d7, &d8); - transpose_s16_4x4d(&d5, &d6, &d7, &d8); - load_s32_4x4(src2_ptr, dst_stride, &r5, &r6, &r7, &r8); - transpose_s32_4x4(&r5, &r6, &r7, &r8); - src1_ptr += 4; - src2_ptr += 4; - - q345 = vadd_s16(d5, q34); - q56 = vadd_s16(d5, d6); - q456 = vadd_s16(d4, q56); - q567 = vadd_s16(d7, q56); - q78 = vadd_s16(d7, d8); - q678 = vadd_s16(d6, q78); - transpose_s16_4x4d(&q234, &q345, &q456, &q567); - store_s16_4x4((int16_t *)dst1_ptr, dst_stride, q234, q345, q456, q567); - dst1_ptr += 4; - - d4 = d8; - q34 = q78; - q234 = q678; - - r345 = vaddq_s32(r5, r34); - r56 = vaddq_s32(r5, r6); - r456 = vaddq_s32(r4, r56); - r567 = vaddq_s32(r7, r56); - r78 = vaddq_s32(r7, r8); - r678 = vaddq_s32(r6, r78); - transpose_s32_4x4(&r234, &r345, &r456, &r567); - store_s32_4x4(dst2_ptr, dst_stride, r234, r345, r456, r567); - dst2_ptr += 4; - - r4 = r8; - r34 = r78; - r234 = r678; - w -= 4; - } while (w > 0); - h -= 4; - count++; - } while (h > 0); - } -} - -static INLINE int32x4_t cross_sum_inp_s32(int32_t *buf, int buf_stride) { - int32x4_t xtr, xt, xtl, xl, x, xr, xbr, xb, xbl; - int32x4_t fours, threes, res; - - xtl = vld1q_s32(buf - buf_stride - 1); - xt = vld1q_s32(buf - buf_stride); - xtr = vld1q_s32(buf - buf_stride + 1); - xl = vld1q_s32(buf - 1); - x = vld1q_s32(buf); - xr = vld1q_s32(buf + 1); - xbl = vld1q_s32(buf + buf_stride - 1); - xb = vld1q_s32(buf + buf_stride); - xbr = vld1q_s32(buf + buf_stride + 1); - - fours = vaddq_s32(xl, vaddq_s32(xt, vaddq_s32(xr, vaddq_s32(xb, x)))); - threes = vaddq_s32(xtl, vaddq_s32(xtr, vaddq_s32(xbr, xbl))); - res = vsubq_s32(vshlq_n_s32(vaddq_s32(fours, threes), 2), threes); - return res; -} - -static INLINE void cross_sum_inp_u16(uint16_t *buf, int buf_stride, - int32x4_t *a0, int32x4_t *a1) { - uint16x8_t xtr, xt, xtl, xl, x, xr, xbr, xb, xbl; - uint16x8_t r0, r1; - - xtl = vld1q_u16(buf - buf_stride - 1); - xt = vld1q_u16(buf - buf_stride); - xtr = vld1q_u16(buf - buf_stride + 1); - xl = vld1q_u16(buf - 1); - x = vld1q_u16(buf); - xr = vld1q_u16(buf + 1); - xbl = vld1q_u16(buf + buf_stride - 1); - xb = vld1q_u16(buf + buf_stride); - xbr = vld1q_u16(buf + buf_stride + 1); - - xb = vaddq_u16(xb, x); - xt = vaddq_u16(xt, xr); - xl = vaddq_u16(xl, xb); - xl = vaddq_u16(xl, xt); - - r0 = vshlq_n_u16(xl, 2); - - xbl = vaddq_u16(xbl, xbr); - xtl = vaddq_u16(xtl, xtr); - xtl = vaddq_u16(xtl, xbl); - - r1 = vshlq_n_u16(xtl, 2); - r1 = vsubq_u16(r1, xtl); - - *a0 = vreinterpretq_s32_u32( - vaddq_u32(vmovl_u16(vget_low_u16(r0)), vmovl_u16(vget_low_u16(r1)))); - *a1 = vreinterpretq_s32_u32( - vaddq_u32(vmovl_u16(vget_high_u16(r0)), vmovl_u16(vget_high_u16(r1)))); -} - -static INLINE int32x4_t cross_sum_fast_even_row(int32_t *buf, int buf_stride) { - int32x4_t xtr, xt, xtl, xbr, xb, xbl; - int32x4_t fives, sixes, fives_plus_sixes; - - xtl = vld1q_s32(buf - buf_stride - 1); - xt = vld1q_s32(buf - buf_stride); - xtr = vld1q_s32(buf - buf_stride + 1); - xbl = vld1q_s32(buf + buf_stride - 1); - xb = vld1q_s32(buf + buf_stride); - xbr = vld1q_s32(buf + buf_stride + 1); - - fives = vaddq_s32(xtl, vaddq_s32(xtr, vaddq_s32(xbr, xbl))); - sixes = vaddq_s32(xt, xb); - fives_plus_sixes = vaddq_s32(fives, sixes); - - return vaddq_s32( - vaddq_s32(vshlq_n_s32(fives_plus_sixes, 2), fives_plus_sixes), sixes); -} - -static INLINE void cross_sum_fast_even_row_inp16(uint16_t *buf, int buf_stride, - int32x4_t *a0, int32x4_t *a1) { - uint16x8_t xtr, xt, xtl, xbr, xb, xbl, xb0; - - xtl = vld1q_u16(buf - buf_stride - 1); - xt = vld1q_u16(buf - buf_stride); - xtr = vld1q_u16(buf - buf_stride + 1); - xbl = vld1q_u16(buf + buf_stride - 1); - xb = vld1q_u16(buf + buf_stride); - xbr = vld1q_u16(buf + buf_stride + 1); - - xbr = vaddq_u16(xbr, xbl); - xtr = vaddq_u16(xtr, xtl); - xbr = vaddq_u16(xbr, xtr); - xtl = vshlq_n_u16(xbr, 2); - xbr = vaddq_u16(xtl, xbr); - - xb = vaddq_u16(xb, xt); - xb0 = vshlq_n_u16(xb, 1); - xb = vshlq_n_u16(xb, 2); - xb = vaddq_u16(xb, xb0); - - *a0 = vreinterpretq_s32_u32( - vaddq_u32(vmovl_u16(vget_low_u16(xbr)), vmovl_u16(vget_low_u16(xb)))); - *a1 = vreinterpretq_s32_u32( - vaddq_u32(vmovl_u16(vget_high_u16(xbr)), vmovl_u16(vget_high_u16(xb)))); -} - -static INLINE int32x4_t cross_sum_fast_odd_row(int32_t *buf) { - int32x4_t xl, x, xr; - int32x4_t fives, sixes, fives_plus_sixes; - - xl = vld1q_s32(buf - 1); - x = vld1q_s32(buf); - xr = vld1q_s32(buf + 1); - fives = vaddq_s32(xl, xr); - sixes = x; - fives_plus_sixes = vaddq_s32(fives, sixes); - - return vaddq_s32( - vaddq_s32(vshlq_n_s32(fives_plus_sixes, 2), fives_plus_sixes), sixes); -} - -static INLINE void cross_sum_fast_odd_row_inp16(uint16_t *buf, int32x4_t *a0, - int32x4_t *a1) { - uint16x8_t xl, x, xr; - uint16x8_t x0; - - xl = vld1q_u16(buf - 1); - x = vld1q_u16(buf); - xr = vld1q_u16(buf + 1); - xl = vaddq_u16(xl, xr); - x0 = vshlq_n_u16(xl, 2); - xl = vaddq_u16(xl, x0); - - x0 = vshlq_n_u16(x, 1); - x = vshlq_n_u16(x, 2); - x = vaddq_u16(x, x0); - - *a0 = vreinterpretq_s32_u32( - vaddq_u32(vmovl_u16(vget_low_u16(xl)), vmovl_u16(vget_low_u16(x)))); - *a1 = vreinterpretq_s32_u32( - vaddq_u32(vmovl_u16(vget_high_u16(xl)), vmovl_u16(vget_high_u16(x)))); -} - -static void final_filter_fast_internal(uint16_t *A, int32_t *B, - const int buf_stride, int16_t *src, - const int src_stride, int32_t *dst, - const int dst_stride, const int width, - const int height) { - int16x8_t s0; - int32_t *B_tmp, *dst_ptr; - uint16_t *A_tmp; - int16_t *src_ptr; - int32x4_t a_res0, a_res1, b_res0, b_res1; - int w, h, count = 0; - assert(SGRPROJ_SGR_BITS == 8); - assert(SGRPROJ_RST_BITS == 4); - - A_tmp = A; - B_tmp = B; - src_ptr = src; - dst_ptr = dst; - h = height; - do { - A_tmp = (A + count * buf_stride); - B_tmp = (B + count * buf_stride); - src_ptr = (src + count * src_stride); - dst_ptr = (dst + count * dst_stride); - w = width; - if (!(count & 1)) { - do { - s0 = vld1q_s16(src_ptr); - cross_sum_fast_even_row_inp16(A_tmp, buf_stride, &a_res0, &a_res1); - a_res0 = vmulq_s32(vmovl_s16(vget_low_s16(s0)), a_res0); - a_res1 = vmulq_s32(vmovl_s16(vget_high_s16(s0)), a_res1); - - b_res0 = cross_sum_fast_even_row(B_tmp, buf_stride); - b_res1 = cross_sum_fast_even_row(B_tmp + 4, buf_stride); - a_res0 = vaddq_s32(a_res0, b_res0); - a_res1 = vaddq_s32(a_res1, b_res1); - - a_res0 = - vrshrq_n_s32(a_res0, SGRPROJ_SGR_BITS + NB_EVEN - SGRPROJ_RST_BITS); - a_res1 = - vrshrq_n_s32(a_res1, SGRPROJ_SGR_BITS + NB_EVEN - SGRPROJ_RST_BITS); - - vst1q_s32(dst_ptr, a_res0); - vst1q_s32(dst_ptr + 4, a_res1); - - A_tmp += 8; - B_tmp += 8; - src_ptr += 8; - dst_ptr += 8; - w -= 8; - } while (w > 0); - } else { - do { - s0 = vld1q_s16(src_ptr); - cross_sum_fast_odd_row_inp16(A_tmp, &a_res0, &a_res1); - a_res0 = vmulq_s32(vmovl_s16(vget_low_s16(s0)), a_res0); - a_res1 = vmulq_s32(vmovl_s16(vget_high_s16(s0)), a_res1); - - b_res0 = cross_sum_fast_odd_row(B_tmp); - b_res1 = cross_sum_fast_odd_row(B_tmp + 4); - a_res0 = vaddq_s32(a_res0, b_res0); - a_res1 = vaddq_s32(a_res1, b_res1); - - a_res0 = - vrshrq_n_s32(a_res0, SGRPROJ_SGR_BITS + NB_ODD - SGRPROJ_RST_BITS); - a_res1 = - vrshrq_n_s32(a_res1, SGRPROJ_SGR_BITS + NB_ODD - SGRPROJ_RST_BITS); - - vst1q_s32(dst_ptr, a_res0); - vst1q_s32(dst_ptr + 4, a_res1); - - A_tmp += 8; - B_tmp += 8; - src_ptr += 8; - dst_ptr += 8; - w -= 8; - } while (w > 0); - } - count++; - h -= 1; - } while (h > 0); -} - -void final_filter_internal(uint16_t *A, int32_t *B, const int buf_stride, - int16_t *src, const int src_stride, int32_t *dst, - const int dst_stride, const int width, - const int height) { - int16x8_t s0; - int32_t *B_tmp, *dst_ptr; - uint16_t *A_tmp; - int16_t *src_ptr; - int32x4_t a_res0, a_res1, b_res0, b_res1; - int w, h, count = 0; - - assert(SGRPROJ_SGR_BITS == 8); - assert(SGRPROJ_RST_BITS == 4); - h = height; - - do { - A_tmp = (A + count * buf_stride); - B_tmp = (B + count * buf_stride); - src_ptr = (src + count * src_stride); - dst_ptr = (dst + count * dst_stride); - w = width; - do { - s0 = vld1q_s16(src_ptr); - cross_sum_inp_u16(A_tmp, buf_stride, &a_res0, &a_res1); - a_res0 = vmulq_s32(vmovl_s16(vget_low_s16(s0)), a_res0); - a_res1 = vmulq_s32(vmovl_s16(vget_high_s16(s0)), a_res1); - - b_res0 = cross_sum_inp_s32(B_tmp, buf_stride); - b_res1 = cross_sum_inp_s32(B_tmp + 4, buf_stride); - a_res0 = vaddq_s32(a_res0, b_res0); - a_res1 = vaddq_s32(a_res1, b_res1); - - a_res0 = - vrshrq_n_s32(a_res0, SGRPROJ_SGR_BITS + NB_EVEN - SGRPROJ_RST_BITS); - a_res1 = - vrshrq_n_s32(a_res1, SGRPROJ_SGR_BITS + NB_EVEN - SGRPROJ_RST_BITS); - vst1q_s32(dst_ptr, a_res0); - vst1q_s32(dst_ptr + 4, a_res1); - - A_tmp += 8; - B_tmp += 8; - src_ptr += 8; - dst_ptr += 8; - w -= 8; - } while (w > 0); - count++; - h -= 1; - } while (h > 0); -} - -static INLINE void restoration_fast_internal(uint16_t *dgd16, int width, - int height, int dgd_stride, - int32_t *dst, int dst_stride, - int bit_depth, int sgr_params_idx, - int radius_idx) { - const sgr_params_type *const params = &sgr_params[sgr_params_idx]; - const int r = params->r[radius_idx]; - const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ; - const int height_ext = height + 2 * SGRPROJ_BORDER_VERT; - - const int buf_stride = ((width_ext + 3) & ~3) + 16; - int32_t A_[RESTORATION_PROC_UNIT_PELS]; - uint16_t A16_[RESTORATION_PROC_UNIT_PELS]; - int32_t B_[RESTORATION_PROC_UNIT_PELS]; - int32_t *square_sum_buf = A_; - int32_t *sum_buf = B_; - uint16_t *tmp16_buf = A16_; - - assert(r <= MAX_RADIUS && "Need MAX_RADIUS >= r"); - assert(r <= SGRPROJ_BORDER_VERT - 1 && r <= SGRPROJ_BORDER_HORZ - 1 && - "Need SGRPROJ_BORDER_* >= r+1"); - - assert(radius_idx == 0); - assert(r == 2); - - // input(dgd16) is 16bit. - // sum of pixels 1st stage output will be in 16bit(tmp16_buf). End output is - // kept in 32bit [sum_buf]. sum of squares output is kept in 32bit - // buffer(square_sum_buf). - boxsum2((int16_t *)(dgd16 - dgd_stride * SGRPROJ_BORDER_VERT - - SGRPROJ_BORDER_HORZ), - dgd_stride, (int16_t *)tmp16_buf, sum_buf, square_sum_buf, buf_stride, - width_ext, height_ext); - - square_sum_buf += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ; - sum_buf += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ; - tmp16_buf += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ; - - // Calculation of a, b. a output is in 16bit tmp_buf which is in range of - // [1, 256] for all bit depths. b output is kept in 32bit buffer. - - if (8 == bit_depth) { - calc_ab_fast_internal_lbd( - (square_sum_buf - buf_stride - 1), (tmp16_buf - buf_stride - 1), - (sum_buf - buf_stride - 1), buf_stride * 2, width + 2, height + 2, r, - params->s[radius_idx], 2); - } else { - calc_ab_fast_internal_hbd( - (square_sum_buf - buf_stride - 1), (tmp16_buf - buf_stride - 1), - (sum_buf - buf_stride - 1), buf_stride * 2, width + 2, height + 2, - bit_depth, r, params->s[radius_idx], 2); - } - final_filter_fast_internal(tmp16_buf, sum_buf, buf_stride, (int16_t *)dgd16, - dgd_stride, dst, dst_stride, width, height); -} - -static INLINE void restoration_internal(uint16_t *dgd16, int width, int height, - int dgd_stride, int32_t *dst, - int dst_stride, int bit_depth, - int sgr_params_idx, int radius_idx) { - const sgr_params_type *const params = &sgr_params[sgr_params_idx]; - const int r = params->r[radius_idx]; - const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ; - const int height_ext = height + 2 * SGRPROJ_BORDER_VERT; - - int buf_stride = ((width_ext + 3) & ~3) + 16; - int32_t A_[RESTORATION_PROC_UNIT_PELS]; - uint16_t A16_[RESTORATION_PROC_UNIT_PELS]; - uint16_t B16_[RESTORATION_PROC_UNIT_PELS]; - int32_t B_[RESTORATION_PROC_UNIT_PELS]; - int32_t *square_sum_buf = A_; - uint16_t *sum_buf = B16_; - uint16_t *A16 = A16_; - int32_t *B = B_; - - assert(r <= MAX_RADIUS && "Need MAX_RADIUS >= r"); - assert(r <= SGRPROJ_BORDER_VERT - 1 && r <= SGRPROJ_BORDER_HORZ - 1 && - "Need SGRPROJ_BORDER_* >= r+1"); - - assert(radius_idx == 1); - assert(r == 1); - - // input(dgd16) is 16bit. - // sum of pixels output will be in 16bit(sum_buf). - // sum of squares output is kept in 32bit buffer(square_sum_buf). - boxsum1((int16_t *)(dgd16 - dgd_stride * SGRPROJ_BORDER_VERT - - SGRPROJ_BORDER_HORZ), - dgd_stride, sum_buf, square_sum_buf, buf_stride, width_ext, - height_ext); - - square_sum_buf += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ; - B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ; - A16 += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ; - sum_buf += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ; - - // Calculation of a, b. a output is in 16bit tmp_buf which is in range of - // [1, 256] for all bit depths. b output is kept in 32bit buffer. - if (8 == bit_depth) { - calc_ab_internal_lbd((square_sum_buf - buf_stride - 1), - (A16 - buf_stride - 1), (sum_buf - buf_stride - 1), - (B - buf_stride - 1), buf_stride, width + 2, - height + 2, r, params->s[radius_idx], 1); - } else { - calc_ab_internal_hbd((square_sum_buf - buf_stride - 1), - (A16 - buf_stride - 1), (sum_buf - buf_stride - 1), - (B - buf_stride - 1), buf_stride, width + 2, - height + 2, bit_depth, r, params->s[radius_idx], 1); - } - final_filter_internal(A16, B, buf_stride, (int16_t *)dgd16, dgd_stride, dst, - dst_stride, width, height); -} - -static INLINE void src_convert_u8_to_u16(const uint8_t *src, - const int src_stride, uint16_t *dst, - const int dst_stride, const int width, - const int height) { - const uint8_t *src_ptr; - uint16_t *dst_ptr; - int h, w, count = 0; - - uint8x8_t t1, t2, t3, t4; - uint16x8_t s1, s2, s3, s4; - h = height; - do { - src_ptr = src + (count << 2) * src_stride; - dst_ptr = dst + (count << 2) * dst_stride; - w = width; - if (w >= 7) { - do { - load_u8_8x4(src_ptr, src_stride, &t1, &t2, &t3, &t4); - s1 = vmovl_u8(t1); - s2 = vmovl_u8(t2); - s3 = vmovl_u8(t3); - s4 = vmovl_u8(t4); - store_u16_8x4(dst_ptr, dst_stride, s1, s2, s3, s4); - - src_ptr += 8; - dst_ptr += 8; - w -= 8; - } while (w > 7); - } - - for (int y = 0; y < w; y++) { - dst_ptr[y] = src_ptr[y]; - dst_ptr[y + 1 * dst_stride] = src_ptr[y + 1 * src_stride]; - dst_ptr[y + 2 * dst_stride] = src_ptr[y + 2 * src_stride]; - dst_ptr[y + 3 * dst_stride] = src_ptr[y + 3 * src_stride]; - } - count++; - h -= 4; - } while (h > 3); - - src_ptr = src + (count << 2) * src_stride; - dst_ptr = dst + (count << 2) * dst_stride; - for (int x = 0; x < h; x++) { - for (int y = 0; y < width; y++) { - dst_ptr[y + x * dst_stride] = src_ptr[y + x * src_stride]; - } - } -} - -static INLINE void src_convert_hbd_copy(const uint16_t *src, int src_stride, - uint16_t *dst, const int dst_stride, - int width, int height) { - const uint16_t *src_ptr; - uint16_t *dst_ptr; - int h, w, count = 0; - uint16x8_t s1, s2, s3, s4; - - h = height; - do { - src_ptr = src + (count << 2) * src_stride; - dst_ptr = dst + (count << 2) * dst_stride; - w = width; - do { - load_u16_8x4(src_ptr, src_stride, &s1, &s2, &s3, &s4); - store_u16_8x4(dst_ptr, dst_stride, s1, s2, s3, s4); - src_ptr += 8; - dst_ptr += 8; - w -= 8; - } while (w > 7); - - for (int y = 0; y < w; y++) { - dst_ptr[y] = src_ptr[y]; - dst_ptr[y + 1 * dst_stride] = src_ptr[y + 1 * src_stride]; - dst_ptr[y + 2 * dst_stride] = src_ptr[y + 2 * src_stride]; - dst_ptr[y + 3 * dst_stride] = src_ptr[y + 3 * src_stride]; - } - count++; - h -= 4; - } while (h > 3); - - src_ptr = src + (count << 2) * src_stride; - dst_ptr = dst + (count << 2) * dst_stride; - - for (int x = 0; x < h; x++) { - memcpy((dst_ptr + x * dst_stride), (src_ptr + x * src_stride), - sizeof(uint16_t) * width); - } -} - -int av1_selfguided_restoration_neon(const uint8_t *dat8, int width, int height, - int stride, int32_t *flt0, int32_t *flt1, - int flt_stride, int sgr_params_idx, - int bit_depth, int highbd) { - const sgr_params_type *const params = &sgr_params[sgr_params_idx]; - assert(!(params->r[0] == 0 && params->r[1] == 0)); - - uint16_t dgd16_[RESTORATION_PROC_UNIT_PELS]; - const int dgd16_stride = width + 2 * SGRPROJ_BORDER_HORZ; - uint16_t *dgd16 = - dgd16_ + dgd16_stride * SGRPROJ_BORDER_VERT + SGRPROJ_BORDER_HORZ; - const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ; - const int height_ext = height + 2 * SGRPROJ_BORDER_VERT; - const int dgd_stride = stride; - - if (highbd) { - const uint16_t *dgd16_tmp = CONVERT_TO_SHORTPTR(dat8); - src_convert_hbd_copy( - dgd16_tmp - SGRPROJ_BORDER_VERT * dgd_stride - SGRPROJ_BORDER_HORZ, - dgd_stride, - dgd16 - SGRPROJ_BORDER_VERT * dgd16_stride - SGRPROJ_BORDER_HORZ, - dgd16_stride, width_ext, height_ext); - } else { - src_convert_u8_to_u16( - dat8 - SGRPROJ_BORDER_VERT * dgd_stride - SGRPROJ_BORDER_HORZ, - dgd_stride, - dgd16 - SGRPROJ_BORDER_VERT * dgd16_stride - SGRPROJ_BORDER_HORZ, - dgd16_stride, width_ext, height_ext); - } - - if (params->r[0] > 0) - restoration_fast_internal(dgd16, width, height, dgd16_stride, flt0, - flt_stride, bit_depth, sgr_params_idx, 0); - if (params->r[1] > 0) - restoration_internal(dgd16, width, height, dgd16_stride, flt1, flt_stride, - bit_depth, sgr_params_idx, 1); - return 0; -} - -void apply_selfguided_restoration_neon(const uint8_t *dat8, int width, - int height, int stride, int eps, - const int *xqd, uint8_t *dst8, - int dst_stride, int32_t *tmpbuf, - int bit_depth, int highbd) { - int32_t *flt0 = tmpbuf; - int32_t *flt1 = flt0 + RESTORATION_UNITPELS_MAX; - assert(width * height <= RESTORATION_UNITPELS_MAX); - uint16_t dgd16_[RESTORATION_PROC_UNIT_PELS]; - const int dgd16_stride = width + 2 * SGRPROJ_BORDER_HORZ; - uint16_t *dgd16 = - dgd16_ + dgd16_stride * SGRPROJ_BORDER_VERT + SGRPROJ_BORDER_HORZ; - const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ; - const int height_ext = height + 2 * SGRPROJ_BORDER_VERT; - const int dgd_stride = stride; - const sgr_params_type *const params = &sgr_params[eps]; - int xq[2]; - - assert(!(params->r[0] == 0 && params->r[1] == 0)); - - if (highbd) { - const uint16_t *dgd16_tmp = CONVERT_TO_SHORTPTR(dat8); - src_convert_hbd_copy( - dgd16_tmp - SGRPROJ_BORDER_VERT * dgd_stride - SGRPROJ_BORDER_HORZ, - dgd_stride, - dgd16 - SGRPROJ_BORDER_VERT * dgd16_stride - SGRPROJ_BORDER_HORZ, - dgd16_stride, width_ext, height_ext); - } else { - src_convert_u8_to_u16( - dat8 - SGRPROJ_BORDER_VERT * dgd_stride - SGRPROJ_BORDER_HORZ, - dgd_stride, - dgd16 - SGRPROJ_BORDER_VERT * dgd16_stride - SGRPROJ_BORDER_HORZ, - dgd16_stride, width_ext, height_ext); - } - - if (params->r[0] > 0) - restoration_fast_internal(dgd16, width, height, dgd16_stride, flt0, width, - bit_depth, eps, 0); - if (params->r[1] > 0) - restoration_internal(dgd16, width, height, dgd16_stride, flt1, width, - bit_depth, eps, 1); - - decode_xq(xqd, xq, params); - - { - int16_t *src_ptr; - uint8_t *dst_ptr; - uint16_t *dst16_ptr; - int16x4_t d0, d4; - int16x8_t r0, s0; - uint16x8_t r4; - int32x4_t u0, u4, v0, v4, f00, f10; - uint8x8_t t0; - int count = 0, w = width, h = height, rc = 0; - - const int32x4_t xq0_vec = vdupq_n_s32(xq[0]); - const int32x4_t xq1_vec = vdupq_n_s32(xq[1]); - const int16x8_t zero = vdupq_n_s16(0); - const uint16x8_t max = vdupq_n_u16((1 << bit_depth) - 1); - uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst8); - dst_ptr = dst8; - src_ptr = (int16_t *)dgd16; - do { - w = width; - count = 0; - dst_ptr = dst8 + rc * dst_stride; - dst16_ptr = dst16 + rc * dst_stride; - do { - s0 = vld1q_s16(src_ptr + count); - - u0 = vshll_n_s16(vget_low_s16(s0), SGRPROJ_RST_BITS); - u4 = vshll_n_s16(vget_high_s16(s0), SGRPROJ_RST_BITS); - - v0 = vshlq_n_s32(u0, SGRPROJ_PRJ_BITS); - v4 = vshlq_n_s32(u4, SGRPROJ_PRJ_BITS); - - if (params->r[0] > 0) { - f00 = vld1q_s32(flt0 + count); - f10 = vld1q_s32(flt0 + count + 4); - - f00 = vsubq_s32(f00, u0); - f10 = vsubq_s32(f10, u4); - - v0 = vmlaq_s32(v0, xq0_vec, f00); - v4 = vmlaq_s32(v4, xq0_vec, f10); - } - - if (params->r[1] > 0) { - f00 = vld1q_s32(flt1 + count); - f10 = vld1q_s32(flt1 + count + 4); - - f00 = vsubq_s32(f00, u0); - f10 = vsubq_s32(f10, u4); - - v0 = vmlaq_s32(v0, xq1_vec, f00); - v4 = vmlaq_s32(v4, xq1_vec, f10); - } - - d0 = vqrshrn_n_s32(v0, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS); - d4 = vqrshrn_n_s32(v4, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS); - - r0 = vcombine_s16(d0, d4); - - r4 = vreinterpretq_u16_s16(vmaxq_s16(r0, zero)); - - if (highbd) { - r4 = vminq_u16(r4, max); - vst1q_u16(dst16_ptr, r4); - } else { - t0 = vqmovn_u16(r4); - vst1_u8(dst_ptr, t0); - } - w -= 8; - count += 8; - dst_ptr += 8; - dst16_ptr += 8; - } while (w > 0); - - src_ptr += dgd16_stride; - flt1 += width; - flt0 += width; - rc++; - h--; - } while (h > 0); - } -} diff --git a/third_party/aom/av1/common/arm/transpose_neon.h b/third_party/aom/av1/common/arm/transpose_neon.h deleted file mode 100644 index 8a3d9f07f..000000000 --- a/third_party/aom/av1/common/arm/transpose_neon.h +++ /dev/null @@ -1,537 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef AOM_AV1_COMMON_ARM_TRANSPOSE_NEON_H_ -#define AOM_AV1_COMMON_ARM_TRANSPOSE_NEON_H_ - -#include <arm_neon.h> - -static INLINE void transpose_u8_8x8(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2, - uint8x8_t *a3, uint8x8_t *a4, uint8x8_t *a5, - uint8x8_t *a6, uint8x8_t *a7) { - // Swap 8 bit elements. Goes from: - // a0: 00 01 02 03 04 05 06 07 - // a1: 10 11 12 13 14 15 16 17 - // a2: 20 21 22 23 24 25 26 27 - // a3: 30 31 32 33 34 35 36 37 - // a4: 40 41 42 43 44 45 46 47 - // a5: 50 51 52 53 54 55 56 57 - // a6: 60 61 62 63 64 65 66 67 - // a7: 70 71 72 73 74 75 76 77 - // to: - // b0.val[0]: 00 10 02 12 04 14 06 16 40 50 42 52 44 54 46 56 - // b0.val[1]: 01 11 03 13 05 15 07 17 41 51 43 53 45 55 47 57 - // b1.val[0]: 20 30 22 32 24 34 26 36 60 70 62 72 64 74 66 76 - // b1.val[1]: 21 31 23 33 25 35 27 37 61 71 63 73 65 75 67 77 - - const uint8x16x2_t b0 = - vtrnq_u8(vcombine_u8(*a0, *a4), vcombine_u8(*a1, *a5)); - const uint8x16x2_t b1 = - vtrnq_u8(vcombine_u8(*a2, *a6), vcombine_u8(*a3, *a7)); - - // Swap 16 bit elements resulting in: - // c0.val[0]: 00 10 20 30 04 14 24 34 40 50 60 70 44 54 64 74 - // c0.val[1]: 02 12 22 32 06 16 26 36 42 52 62 72 46 56 66 76 - // c1.val[0]: 01 11 21 31 05 15 25 35 41 51 61 71 45 55 65 75 - // c1.val[1]: 03 13 23 33 07 17 27 37 43 53 63 73 47 57 67 77 - - const uint16x8x2_t c0 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[0]), - vreinterpretq_u16_u8(b1.val[0])); - const uint16x8x2_t c1 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[1]), - vreinterpretq_u16_u8(b1.val[1])); - - // Unzip 32 bit elements resulting in: - // d0.val[0]: 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71 - // d0.val[1]: 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75 - // d1.val[0]: 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73 - // d1.val[1]: 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77 - const uint32x4x2_t d0 = vuzpq_u32(vreinterpretq_u32_u16(c0.val[0]), - vreinterpretq_u32_u16(c1.val[0])); - const uint32x4x2_t d1 = vuzpq_u32(vreinterpretq_u32_u16(c0.val[1]), - vreinterpretq_u32_u16(c1.val[1])); - - *a0 = vreinterpret_u8_u32(vget_low_u32(d0.val[0])); - *a1 = vreinterpret_u8_u32(vget_high_u32(d0.val[0])); - *a2 = vreinterpret_u8_u32(vget_low_u32(d1.val[0])); - *a3 = vreinterpret_u8_u32(vget_high_u32(d1.val[0])); - *a4 = vreinterpret_u8_u32(vget_low_u32(d0.val[1])); - *a5 = vreinterpret_u8_u32(vget_high_u32(d0.val[1])); - *a6 = vreinterpret_u8_u32(vget_low_u32(d1.val[1])); - *a7 = vreinterpret_u8_u32(vget_high_u32(d1.val[1])); -} - -static INLINE void transpose_u8_8x4(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2, - uint8x8_t *a3) { - // Swap 8 bit elements. Goes from: - // a0: 00 01 02 03 04 05 06 07 - // a1: 10 11 12 13 14 15 16 17 - // a2: 20 21 22 23 24 25 26 27 - // a3: 30 31 32 33 34 35 36 37 - // to: - // b0.val[0]: 00 10 02 12 04 14 06 16 - // b0.val[1]: 01 11 03 13 05 15 07 17 - // b1.val[0]: 20 30 22 32 24 34 26 36 - // b1.val[1]: 21 31 23 33 25 35 27 37 - - const uint8x8x2_t b0 = vtrn_u8(*a0, *a1); - const uint8x8x2_t b1 = vtrn_u8(*a2, *a3); - - // Swap 16 bit elements resulting in: - // c0.val[0]: 00 10 20 30 04 14 24 34 - // c0.val[1]: 02 12 22 32 06 16 26 36 - // c1.val[0]: 01 11 21 31 05 15 25 35 - // c1.val[1]: 03 13 23 33 07 17 27 37 - - const uint16x4x2_t c0 = - vtrn_u16(vreinterpret_u16_u8(b0.val[0]), vreinterpret_u16_u8(b1.val[0])); - const uint16x4x2_t c1 = - vtrn_u16(vreinterpret_u16_u8(b0.val[1]), vreinterpret_u16_u8(b1.val[1])); - - *a0 = vreinterpret_u8_u16(c0.val[0]); - *a1 = vreinterpret_u8_u16(c1.val[0]); - *a2 = vreinterpret_u8_u16(c0.val[1]); - *a3 = vreinterpret_u8_u16(c1.val[1]); -} - -static INLINE void transpose_u8_4x4(uint8x8_t *a0, uint8x8_t *a1) { - // Swap 16 bit elements. Goes from: - // a0: 00 01 02 03 10 11 12 13 - // a1: 20 21 22 23 30 31 32 33 - // to: - // b0.val[0]: 00 01 20 21 10 11 30 31 - // b0.val[1]: 02 03 22 23 12 13 32 33 - - const uint16x4x2_t b0 = - vtrn_u16(vreinterpret_u16_u8(*a0), vreinterpret_u16_u8(*a1)); - - // Swap 32 bit elements resulting in: - // c0.val[0]: 00 01 20 21 02 03 22 23 - // c0.val[1]: 10 11 30 31 12 13 32 33 - - const uint32x2x2_t c0 = vtrn_u32(vreinterpret_u32_u16(b0.val[0]), - vreinterpret_u32_u16(b0.val[1])); - - // Swap 8 bit elements resulting in: - // d0.val[0]: 00 10 20 30 02 12 22 32 - // d0.val[1]: 01 11 21 31 03 13 23 33 - - const uint8x8x2_t d0 = - vtrn_u8(vreinterpret_u8_u32(c0.val[0]), vreinterpret_u8_u32(c0.val[1])); - - *a0 = d0.val[0]; - *a1 = d0.val[1]; -} - -static INLINE void transpose_u8_4x8(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2, - uint8x8_t *a3, const uint8x8_t a4, - const uint8x8_t a5, const uint8x8_t a6, - const uint8x8_t a7) { - // Swap 32 bit elements. Goes from: - // a0: 00 01 02 03 XX XX XX XX - // a1: 10 11 12 13 XX XX XX XX - // a2: 20 21 22 23 XX XX XX XX - // a3; 30 31 32 33 XX XX XX XX - // a4: 40 41 42 43 XX XX XX XX - // a5: 50 51 52 53 XX XX XX XX - // a6: 60 61 62 63 XX XX XX XX - // a7: 70 71 72 73 XX XX XX XX - // to: - // b0.val[0]: 00 01 02 03 40 41 42 43 - // b1.val[0]: 10 11 12 13 50 51 52 53 - // b2.val[0]: 20 21 22 23 60 61 62 63 - // b3.val[0]: 30 31 32 33 70 71 72 73 - - const uint32x2x2_t b0 = - vtrn_u32(vreinterpret_u32_u8(*a0), vreinterpret_u32_u8(a4)); - const uint32x2x2_t b1 = - vtrn_u32(vreinterpret_u32_u8(*a1), vreinterpret_u32_u8(a5)); - const uint32x2x2_t b2 = - vtrn_u32(vreinterpret_u32_u8(*a2), vreinterpret_u32_u8(a6)); - const uint32x2x2_t b3 = - vtrn_u32(vreinterpret_u32_u8(*a3), vreinterpret_u32_u8(a7)); - - // Swap 16 bit elements resulting in: - // c0.val[0]: 00 01 20 21 40 41 60 61 - // c0.val[1]: 02 03 22 23 42 43 62 63 - // c1.val[0]: 10 11 30 31 50 51 70 71 - // c1.val[1]: 12 13 32 33 52 53 72 73 - - const uint16x4x2_t c0 = vtrn_u16(vreinterpret_u16_u32(b0.val[0]), - vreinterpret_u16_u32(b2.val[0])); - const uint16x4x2_t c1 = vtrn_u16(vreinterpret_u16_u32(b1.val[0]), - vreinterpret_u16_u32(b3.val[0])); - - // Swap 8 bit elements resulting in: - // d0.val[0]: 00 10 20 30 40 50 60 70 - // d0.val[1]: 01 11 21 31 41 51 61 71 - // d1.val[0]: 02 12 22 32 42 52 62 72 - // d1.val[1]: 03 13 23 33 43 53 63 73 - - const uint8x8x2_t d0 = - vtrn_u8(vreinterpret_u8_u16(c0.val[0]), vreinterpret_u8_u16(c1.val[0])); - const uint8x8x2_t d1 = - vtrn_u8(vreinterpret_u8_u16(c0.val[1]), vreinterpret_u8_u16(c1.val[1])); - - *a0 = d0.val[0]; - *a1 = d0.val[1]; - *a2 = d1.val[0]; - *a3 = d1.val[1]; -} - -static INLINE void transpose_u16_4x8(uint16x4_t *a0, uint16x4_t *a1, - uint16x4_t *a2, uint16x4_t *a3, - uint16x4_t *a4, uint16x4_t *a5, - uint16x4_t *a6, uint16x4_t *a7, - uint16x8_t *o0, uint16x8_t *o1, - uint16x8_t *o2, uint16x8_t *o3) { - // Swap 16 bit elements. Goes from: - // a0: 00 01 02 03 - // a1: 10 11 12 13 - // a2: 20 21 22 23 - // a3: 30 31 32 33 - // a4: 40 41 42 43 - // a5: 50 51 52 53 - // a6: 60 61 62 63 - // a7: 70 71 72 73 - // to: - // b0.val[0]: 00 10 02 12 - // b0.val[1]: 01 11 03 13 - // b1.val[0]: 20 30 22 32 - // b1.val[1]: 21 31 23 33 - // b2.val[0]: 40 50 42 52 - // b2.val[1]: 41 51 43 53 - // b3.val[0]: 60 70 62 72 - // b3.val[1]: 61 71 63 73 - - uint16x4x2_t b0 = vtrn_u16(*a0, *a1); - uint16x4x2_t b1 = vtrn_u16(*a2, *a3); - uint16x4x2_t b2 = vtrn_u16(*a4, *a5); - uint16x4x2_t b3 = vtrn_u16(*a6, *a7); - - // Swap 32 bit elements resulting in: - // c0.val[0]: 00 10 20 30 - // c0.val[1]: 02 12 22 32 - // c1.val[0]: 01 11 21 31 - // c1.val[1]: 03 13 23 33 - // c2.val[0]: 40 50 60 70 - // c2.val[1]: 42 52 62 72 - // c3.val[0]: 41 51 61 71 - // c3.val[1]: 43 53 63 73 - - uint32x2x2_t c0 = vtrn_u32(vreinterpret_u32_u16(b0.val[0]), - vreinterpret_u32_u16(b1.val[0])); - uint32x2x2_t c1 = vtrn_u32(vreinterpret_u32_u16(b0.val[1]), - vreinterpret_u32_u16(b1.val[1])); - uint32x2x2_t c2 = vtrn_u32(vreinterpret_u32_u16(b2.val[0]), - vreinterpret_u32_u16(b3.val[0])); - uint32x2x2_t c3 = vtrn_u32(vreinterpret_u32_u16(b2.val[1]), - vreinterpret_u32_u16(b3.val[1])); - - // Swap 64 bit elements resulting in: - // o0: 00 10 20 30 40 50 60 70 - // o1: 01 11 21 31 41 51 61 71 - // o2: 02 12 22 32 42 52 62 72 - // o3: 03 13 23 33 43 53 63 73 - - *o0 = vcombine_u16(vreinterpret_u16_u32(c0.val[0]), - vreinterpret_u16_u32(c2.val[0])); - *o1 = vcombine_u16(vreinterpret_u16_u32(c1.val[0]), - vreinterpret_u16_u32(c3.val[0])); - *o2 = vcombine_u16(vreinterpret_u16_u32(c0.val[1]), - vreinterpret_u16_u32(c2.val[1])); - *o3 = vcombine_u16(vreinterpret_u16_u32(c1.val[1]), - vreinterpret_u16_u32(c3.val[1])); -} - -static INLINE void transpose_u16_8x8(uint16x8_t *a0, uint16x8_t *a1, - uint16x8_t *a2, uint16x8_t *a3, - uint16x8_t *a4, uint16x8_t *a5, - uint16x8_t *a6, uint16x8_t *a7) { - // Swap 16 bit elements. Goes from: - // a0: 00 01 02 03 04 05 06 07 - // a1: 10 11 12 13 14 15 16 17 - // a2: 20 21 22 23 24 25 26 27 - // a3: 30 31 32 33 34 35 36 37 - // a4: 40 41 42 43 44 45 46 47 - // a5: 50 51 52 53 54 55 56 57 - // a6: 60 61 62 63 64 65 66 67 - // a7: 70 71 72 73 74 75 76 77 - // to: - // b0.val[0]: 00 10 02 12 04 14 06 16 - // b0.val[1]: 01 11 03 13 05 15 07 17 - // b1.val[0]: 20 30 22 32 24 34 26 36 - // b1.val[1]: 21 31 23 33 25 35 27 37 - // b2.val[0]: 40 50 42 52 44 54 46 56 - // b2.val[1]: 41 51 43 53 45 55 47 57 - // b3.val[0]: 60 70 62 72 64 74 66 76 - // b3.val[1]: 61 71 63 73 65 75 67 77 - - const uint16x8x2_t b0 = vtrnq_u16(*a0, *a1); - const uint16x8x2_t b1 = vtrnq_u16(*a2, *a3); - const uint16x8x2_t b2 = vtrnq_u16(*a4, *a5); - const uint16x8x2_t b3 = vtrnq_u16(*a6, *a7); - - // Swap 32 bit elements resulting in: - // c0.val[0]: 00 10 20 30 04 14 24 34 - // c0.val[1]: 02 12 22 32 06 16 26 36 - // c1.val[0]: 01 11 21 31 05 15 25 35 - // c1.val[1]: 03 13 23 33 07 17 27 37 - // c2.val[0]: 40 50 60 70 44 54 64 74 - // c2.val[1]: 42 52 62 72 46 56 66 76 - // c3.val[0]: 41 51 61 71 45 55 65 75 - // c3.val[1]: 43 53 63 73 47 57 67 77 - - const uint32x4x2_t c0 = vtrnq_u32(vreinterpretq_u32_u16(b0.val[0]), - vreinterpretq_u32_u16(b1.val[0])); - const uint32x4x2_t c1 = vtrnq_u32(vreinterpretq_u32_u16(b0.val[1]), - vreinterpretq_u32_u16(b1.val[1])); - const uint32x4x2_t c2 = vtrnq_u32(vreinterpretq_u32_u16(b2.val[0]), - vreinterpretq_u32_u16(b3.val[0])); - const uint32x4x2_t c3 = vtrnq_u32(vreinterpretq_u32_u16(b2.val[1]), - vreinterpretq_u32_u16(b3.val[1])); - - *a0 = vcombine_u16(vget_low_u16(vreinterpretq_u16_u32(c0.val[0])), - vget_low_u16(vreinterpretq_u16_u32(c2.val[0]))); - *a4 = vcombine_u16(vget_high_u16(vreinterpretq_u16_u32(c0.val[0])), - vget_high_u16(vreinterpretq_u16_u32(c2.val[0]))); - - *a2 = vcombine_u16(vget_low_u16(vreinterpretq_u16_u32(c0.val[1])), - vget_low_u16(vreinterpretq_u16_u32(c2.val[1]))); - *a6 = vcombine_u16(vget_high_u16(vreinterpretq_u16_u32(c0.val[1])), - vget_high_u16(vreinterpretq_u16_u32(c2.val[1]))); - - *a1 = vcombine_u16(vget_low_u16(vreinterpretq_u16_u32(c1.val[0])), - vget_low_u16(vreinterpretq_u16_u32(c3.val[0]))); - *a5 = vcombine_u16(vget_high_u16(vreinterpretq_u16_u32(c1.val[0])), - vget_high_u16(vreinterpretq_u16_u32(c3.val[0]))); - - *a3 = vcombine_u16(vget_low_u16(vreinterpretq_u16_u32(c1.val[1])), - vget_low_u16(vreinterpretq_u16_u32(c3.val[1]))); - *a7 = vcombine_u16(vget_high_u16(vreinterpretq_u16_u32(c1.val[1])), - vget_high_u16(vreinterpretq_u16_u32(c3.val[1]))); -} - -static INLINE void transpose_s16_8x8(int16x8_t *a0, int16x8_t *a1, - int16x8_t *a2, int16x8_t *a3, - int16x8_t *a4, int16x8_t *a5, - int16x8_t *a6, int16x8_t *a7) { - // Swap 16 bit elements. Goes from: - // a0: 00 01 02 03 04 05 06 07 - // a1: 10 11 12 13 14 15 16 17 - // a2: 20 21 22 23 24 25 26 27 - // a3: 30 31 32 33 34 35 36 37 - // a4: 40 41 42 43 44 45 46 47 - // a5: 50 51 52 53 54 55 56 57 - // a6: 60 61 62 63 64 65 66 67 - // a7: 70 71 72 73 74 75 76 77 - // to: - // b0.val[0]: 00 10 02 12 04 14 06 16 - // b0.val[1]: 01 11 03 13 05 15 07 17 - // b1.val[0]: 20 30 22 32 24 34 26 36 - // b1.val[1]: 21 31 23 33 25 35 27 37 - // b2.val[0]: 40 50 42 52 44 54 46 56 - // b2.val[1]: 41 51 43 53 45 55 47 57 - // b3.val[0]: 60 70 62 72 64 74 66 76 - // b3.val[1]: 61 71 63 73 65 75 67 77 - - const int16x8x2_t b0 = vtrnq_s16(*a0, *a1); - const int16x8x2_t b1 = vtrnq_s16(*a2, *a3); - const int16x8x2_t b2 = vtrnq_s16(*a4, *a5); - const int16x8x2_t b3 = vtrnq_s16(*a6, *a7); - - // Swap 32 bit elements resulting in: - // c0.val[0]: 00 10 20 30 04 14 24 34 - // c0.val[1]: 02 12 22 32 06 16 26 36 - // c1.val[0]: 01 11 21 31 05 15 25 35 - // c1.val[1]: 03 13 23 33 07 17 27 37 - // c2.val[0]: 40 50 60 70 44 54 64 74 - // c2.val[1]: 42 52 62 72 46 56 66 76 - // c3.val[0]: 41 51 61 71 45 55 65 75 - // c3.val[1]: 43 53 63 73 47 57 67 77 - - const int32x4x2_t c0 = vtrnq_s32(vreinterpretq_s32_s16(b0.val[0]), - vreinterpretq_s32_s16(b1.val[0])); - const int32x4x2_t c1 = vtrnq_s32(vreinterpretq_s32_s16(b0.val[1]), - vreinterpretq_s32_s16(b1.val[1])); - const int32x4x2_t c2 = vtrnq_s32(vreinterpretq_s32_s16(b2.val[0]), - vreinterpretq_s32_s16(b3.val[0])); - const int32x4x2_t c3 = vtrnq_s32(vreinterpretq_s32_s16(b2.val[1]), - vreinterpretq_s32_s16(b3.val[1])); - - *a0 = vcombine_s16(vget_low_s16(vreinterpretq_s16_s32(c0.val[0])), - vget_low_s16(vreinterpretq_s16_s32(c2.val[0]))); - *a4 = vcombine_s16(vget_high_s16(vreinterpretq_s16_s32(c0.val[0])), - vget_high_s16(vreinterpretq_s16_s32(c2.val[0]))); - - *a2 = vcombine_s16(vget_low_s16(vreinterpretq_s16_s32(c0.val[1])), - vget_low_s16(vreinterpretq_s16_s32(c2.val[1]))); - *a6 = vcombine_s16(vget_high_s16(vreinterpretq_s16_s32(c0.val[1])), - vget_high_s16(vreinterpretq_s16_s32(c2.val[1]))); - - *a1 = vcombine_s16(vget_low_s16(vreinterpretq_s16_s32(c1.val[0])), - vget_low_s16(vreinterpretq_s16_s32(c3.val[0]))); - *a5 = vcombine_s16(vget_high_s16(vreinterpretq_s16_s32(c1.val[0])), - vget_high_s16(vreinterpretq_s16_s32(c3.val[0]))); - - *a3 = vcombine_s16(vget_low_s16(vreinterpretq_s16_s32(c1.val[1])), - vget_low_s16(vreinterpretq_s16_s32(c3.val[1]))); - *a7 = vcombine_s16(vget_high_s16(vreinterpretq_s16_s32(c1.val[1])), - vget_high_s16(vreinterpretq_s16_s32(c3.val[1]))); -} - -static INLINE int16x8x2_t vpx_vtrnq_s64_to_s16(int32x4_t a0, int32x4_t a1) { - int16x8x2_t b0; - b0.val[0] = vcombine_s16(vreinterpret_s16_s32(vget_low_s32(a0)), - vreinterpret_s16_s32(vget_low_s32(a1))); - b0.val[1] = vcombine_s16(vreinterpret_s16_s32(vget_high_s32(a0)), - vreinterpret_s16_s32(vget_high_s32(a1))); - return b0; -} - -static INLINE void transpose_s16_8x8q(int16x8_t *a0, int16x8_t *out) { - // Swap 16 bit elements. Goes from: - // a0: 00 01 02 03 04 05 06 07 - // a1: 10 11 12 13 14 15 16 17 - // a2: 20 21 22 23 24 25 26 27 - // a3: 30 31 32 33 34 35 36 37 - // a4: 40 41 42 43 44 45 46 47 - // a5: 50 51 52 53 54 55 56 57 - // a6: 60 61 62 63 64 65 66 67 - // a7: 70 71 72 73 74 75 76 77 - // to: - // b0.val[0]: 00 10 02 12 04 14 06 16 - // b0.val[1]: 01 11 03 13 05 15 07 17 - // b1.val[0]: 20 30 22 32 24 34 26 36 - // b1.val[1]: 21 31 23 33 25 35 27 37 - // b2.val[0]: 40 50 42 52 44 54 46 56 - // b2.val[1]: 41 51 43 53 45 55 47 57 - // b3.val[0]: 60 70 62 72 64 74 66 76 - // b3.val[1]: 61 71 63 73 65 75 67 77 - - const int16x8x2_t b0 = vtrnq_s16(*a0, *(a0 + 1)); - const int16x8x2_t b1 = vtrnq_s16(*(a0 + 2), *(a0 + 3)); - const int16x8x2_t b2 = vtrnq_s16(*(a0 + 4), *(a0 + 5)); - const int16x8x2_t b3 = vtrnq_s16(*(a0 + 6), *(a0 + 7)); - - // Swap 32 bit elements resulting in: - // c0.val[0]: 00 10 20 30 04 14 24 34 - // c0.val[1]: 02 12 22 32 06 16 26 36 - // c1.val[0]: 01 11 21 31 05 15 25 35 - // c1.val[1]: 03 13 23 33 07 17 27 37 - // c2.val[0]: 40 50 60 70 44 54 64 74 - // c2.val[1]: 42 52 62 72 46 56 66 76 - // c3.val[0]: 41 51 61 71 45 55 65 75 - // c3.val[1]: 43 53 63 73 47 57 67 77 - - const int32x4x2_t c0 = vtrnq_s32(vreinterpretq_s32_s16(b0.val[0]), - vreinterpretq_s32_s16(b1.val[0])); - const int32x4x2_t c1 = vtrnq_s32(vreinterpretq_s32_s16(b0.val[1]), - vreinterpretq_s32_s16(b1.val[1])); - const int32x4x2_t c2 = vtrnq_s32(vreinterpretq_s32_s16(b2.val[0]), - vreinterpretq_s32_s16(b3.val[0])); - const int32x4x2_t c3 = vtrnq_s32(vreinterpretq_s32_s16(b2.val[1]), - vreinterpretq_s32_s16(b3.val[1])); - - // Swap 64 bit elements resulting in: - // d0.val[0]: 00 10 20 30 40 50 60 70 - // d0.val[1]: 04 14 24 34 44 54 64 74 - // d1.val[0]: 01 11 21 31 41 51 61 71 - // d1.val[1]: 05 15 25 35 45 55 65 75 - // d2.val[0]: 02 12 22 32 42 52 62 72 - // d2.val[1]: 06 16 26 36 46 56 66 76 - // d3.val[0]: 03 13 23 33 43 53 63 73 - // d3.val[1]: 07 17 27 37 47 57 67 77 - const int16x8x2_t d0 = vpx_vtrnq_s64_to_s16(c0.val[0], c2.val[0]); - const int16x8x2_t d1 = vpx_vtrnq_s64_to_s16(c1.val[0], c3.val[0]); - const int16x8x2_t d2 = vpx_vtrnq_s64_to_s16(c0.val[1], c2.val[1]); - const int16x8x2_t d3 = vpx_vtrnq_s64_to_s16(c1.val[1], c3.val[1]); - - *out = d0.val[0]; - *(out + 1) = d1.val[0]; - *(out + 2) = d2.val[0]; - *(out + 3) = d3.val[0]; - *(out + 4) = d0.val[1]; - *(out + 5) = d1.val[1]; - *(out + 6) = d2.val[1]; - *(out + 7) = d3.val[1]; -} - -static INLINE void transpose_s16_4x4d(int16x4_t *a0, int16x4_t *a1, - int16x4_t *a2, int16x4_t *a3) { - // Swap 16 bit elements. Goes from: - // a0: 00 01 02 03 - // a1: 10 11 12 13 - // a2: 20 21 22 23 - // a3: 30 31 32 33 - // to: - // b0.val[0]: 00 10 02 12 - // b0.val[1]: 01 11 03 13 - // b1.val[0]: 20 30 22 32 - // b1.val[1]: 21 31 23 33 - - const int16x4x2_t b0 = vtrn_s16(*a0, *a1); - const int16x4x2_t b1 = vtrn_s16(*a2, *a3); - - // Swap 32 bit elements resulting in: - // c0.val[0]: 00 10 20 30 - // c0.val[1]: 02 12 22 32 - // c1.val[0]: 01 11 21 31 - // c1.val[1]: 03 13 23 33 - - const int32x2x2_t c0 = vtrn_s32(vreinterpret_s32_s16(b0.val[0]), - vreinterpret_s32_s16(b1.val[0])); - const int32x2x2_t c1 = vtrn_s32(vreinterpret_s32_s16(b0.val[1]), - vreinterpret_s32_s16(b1.val[1])); - - *a0 = vreinterpret_s16_s32(c0.val[0]); - *a1 = vreinterpret_s16_s32(c1.val[0]); - *a2 = vreinterpret_s16_s32(c0.val[1]); - *a3 = vreinterpret_s16_s32(c1.val[1]); -} - -static INLINE int32x4x2_t aom_vtrnq_s64_to_s32(int32x4_t a0, int32x4_t a1) { - int32x4x2_t b0; - b0.val[0] = vcombine_s32(vget_low_s32(a0), vget_low_s32(a1)); - b0.val[1] = vcombine_s32(vget_high_s32(a0), vget_high_s32(a1)); - return b0; -} - -static INLINE void transpose_s32_4x4(int32x4_t *a0, int32x4_t *a1, - int32x4_t *a2, int32x4_t *a3) { - // Swap 32 bit elements. Goes from: - // a0: 00 01 02 03 - // a1: 10 11 12 13 - // a2: 20 21 22 23 - // a3: 30 31 32 33 - // to: - // b0.val[0]: 00 10 02 12 - // b0.val[1]: 01 11 03 13 - // b1.val[0]: 20 30 22 32 - // b1.val[1]: 21 31 23 33 - - const int32x4x2_t b0 = vtrnq_s32(*a0, *a1); - const int32x4x2_t b1 = vtrnq_s32(*a2, *a3); - - // Swap 64 bit elements resulting in: - // c0.val[0]: 00 10 20 30 - // c0.val[1]: 02 12 22 32 - // c1.val[0]: 01 11 21 31 - // c1.val[1]: 03 13 23 33 - - const int32x4x2_t c0 = aom_vtrnq_s64_to_s32(b0.val[0], b1.val[0]); - const int32x4x2_t c1 = aom_vtrnq_s64_to_s32(b0.val[1], b1.val[1]); - - *a0 = c0.val[0]; - *a1 = c1.val[0]; - *a2 = c0.val[1]; - *a3 = c1.val[1]; -} - -#endif // AOM_AV1_COMMON_ARM_TRANSPOSE_NEON_H_ diff --git a/third_party/aom/av1/common/arm/warp_plane_neon.c b/third_party/aom/av1/common/arm/warp_plane_neon.c deleted file mode 100644 index 7f02d42a7..000000000 --- a/third_party/aom/av1/common/arm/warp_plane_neon.c +++ /dev/null @@ -1,714 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <assert.h> -#include <arm_neon.h> -#include <memory.h> -#include <math.h> - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_ports/mem.h" -#include "config/av1_rtcd.h" -#include "av1/common/warped_motion.h" -#include "av1/common/scale.h" - -/* This is a modified version of 'warped_filter' from warped_motion.c: - * Each coefficient is stored in 8 bits instead of 16 bits - * The coefficients are rearranged in the column order 0, 2, 4, 6, 1, 3, 5, 7 - - This is done in order to avoid overflow: Since the tap with the largest - coefficient could be any of taps 2, 3, 4 or 5, we can't use the summation - order ((0 + 1) + (4 + 5)) + ((2 + 3) + (6 + 7)) used in the regular - convolve functions. - - Instead, we use the summation order - ((0 + 2) + (4 + 6)) + ((1 + 3) + (5 + 7)). - The rearrangement of coefficients in this table is so that we can get the - coefficients into the correct order more quickly. -*/ -/* clang-format off */ -DECLARE_ALIGNED(8, static const int8_t, - filter_8bit_neon[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8]) = { -#if WARPEDPIXEL_PREC_BITS == 6 - // [-1, 0) - { 0, 127, 0, 0, 0, 1, 0, 0}, { 0, 127, 0, 0, -1, 2, 0, 0}, - { 1, 127, -1, 0, -3, 4, 0, 0}, { 1, 126, -2, 0, -4, 6, 1, 0}, - { 1, 126, -3, 0, -5, 8, 1, 0}, { 1, 125, -4, 0, -6, 11, 1, 0}, - { 1, 124, -4, 0, -7, 13, 1, 0}, { 2, 123, -5, 0, -8, 15, 1, 0}, - { 2, 122, -6, 0, -9, 18, 1, 0}, { 2, 121, -6, 0, -10, 20, 1, 0}, - { 2, 120, -7, 0, -11, 22, 2, 0}, { 2, 119, -8, 0, -12, 25, 2, 0}, - { 3, 117, -8, 0, -13, 27, 2, 0}, { 3, 116, -9, 0, -13, 29, 2, 0}, - { 3, 114, -10, 0, -14, 32, 3, 0}, { 3, 113, -10, 0, -15, 35, 2, 0}, - { 3, 111, -11, 0, -15, 37, 3, 0}, { 3, 109, -11, 0, -16, 40, 3, 0}, - { 3, 108, -12, 0, -16, 42, 3, 0}, { 4, 106, -13, 0, -17, 45, 3, 0}, - { 4, 104, -13, 0, -17, 47, 3, 0}, { 4, 102, -14, 0, -17, 50, 3, 0}, - { 4, 100, -14, 0, -17, 52, 3, 0}, { 4, 98, -15, 0, -18, 55, 4, 0}, - { 4, 96, -15, 0, -18, 58, 3, 0}, { 4, 94, -16, 0, -18, 60, 4, 0}, - { 4, 91, -16, 0, -18, 63, 4, 0}, { 4, 89, -16, 0, -18, 65, 4, 0}, - { 4, 87, -17, 0, -18, 68, 4, 0}, { 4, 85, -17, 0, -18, 70, 4, 0}, - { 4, 82, -17, 0, -18, 73, 4, 0}, { 4, 80, -17, 0, -18, 75, 4, 0}, - { 4, 78, -18, 0, -18, 78, 4, 0}, { 4, 75, -18, 0, -17, 80, 4, 0}, - { 4, 73, -18, 0, -17, 82, 4, 0}, { 4, 70, -18, 0, -17, 85, 4, 0}, - { 4, 68, -18, 0, -17, 87, 4, 0}, { 4, 65, -18, 0, -16, 89, 4, 0}, - { 4, 63, -18, 0, -16, 91, 4, 0}, { 4, 60, -18, 0, -16, 94, 4, 0}, - { 3, 58, -18, 0, -15, 96, 4, 0}, { 4, 55, -18, 0, -15, 98, 4, 0}, - { 3, 52, -17, 0, -14, 100, 4, 0}, { 3, 50, -17, 0, -14, 102, 4, 0}, - { 3, 47, -17, 0, -13, 104, 4, 0}, { 3, 45, -17, 0, -13, 106, 4, 0}, - { 3, 42, -16, 0, -12, 108, 3, 0}, { 3, 40, -16, 0, -11, 109, 3, 0}, - { 3, 37, -15, 0, -11, 111, 3, 0}, { 2, 35, -15, 0, -10, 113, 3, 0}, - { 3, 32, -14, 0, -10, 114, 3, 0}, { 2, 29, -13, 0, -9, 116, 3, 0}, - { 2, 27, -13, 0, -8, 117, 3, 0}, { 2, 25, -12, 0, -8, 119, 2, 0}, - { 2, 22, -11, 0, -7, 120, 2, 0}, { 1, 20, -10, 0, -6, 121, 2, 0}, - { 1, 18, -9, 0, -6, 122, 2, 0}, { 1, 15, -8, 0, -5, 123, 2, 0}, - { 1, 13, -7, 0, -4, 124, 1, 0}, { 1, 11, -6, 0, -4, 125, 1, 0}, - { 1, 8, -5, 0, -3, 126, 1, 0}, { 1, 6, -4, 0, -2, 126, 1, 0}, - { 0, 4, -3, 0, -1, 127, 1, 0}, { 0, 2, -1, 0, 0, 127, 0, 0}, - // [0, 1) - { 0, 0, 1, 0, 0, 127, 0, 0}, { 0, -1, 2, 0, 0, 127, 0, 0}, - { 0, -3, 4, 1, 1, 127, -2, 0}, { 0, -5, 6, 1, 1, 127, -2, 0}, - { 0, -6, 8, 1, 2, 126, -3, 0}, {-1, -7, 11, 2, 2, 126, -4, -1}, - {-1, -8, 13, 2, 3, 125, -5, -1}, {-1, -10, 16, 3, 3, 124, -6, -1}, - {-1, -11, 18, 3, 4, 123, -7, -1}, {-1, -12, 20, 3, 4, 122, -7, -1}, - {-1, -13, 23, 3, 4, 121, -8, -1}, {-2, -14, 25, 4, 5, 120, -9, -1}, - {-1, -15, 27, 4, 5, 119, -10, -1}, {-1, -16, 30, 4, 5, 118, -11, -1}, - {-2, -17, 33, 5, 6, 116, -12, -1}, {-2, -17, 35, 5, 6, 114, -12, -1}, - {-2, -18, 38, 5, 6, 113, -13, -1}, {-2, -19, 41, 6, 7, 111, -14, -2}, - {-2, -19, 43, 6, 7, 110, -15, -2}, {-2, -20, 46, 6, 7, 108, -15, -2}, - {-2, -20, 49, 6, 7, 106, -16, -2}, {-2, -21, 51, 7, 7, 104, -16, -2}, - {-2, -21, 54, 7, 7, 102, -17, -2}, {-2, -21, 56, 7, 8, 100, -18, -2}, - {-2, -22, 59, 7, 8, 98, -18, -2}, {-2, -22, 62, 7, 8, 96, -19, -2}, - {-2, -22, 64, 7, 8, 94, -19, -2}, {-2, -22, 67, 8, 8, 91, -20, -2}, - {-2, -22, 69, 8, 8, 89, -20, -2}, {-2, -22, 72, 8, 8, 87, -21, -2}, - {-2, -21, 74, 8, 8, 84, -21, -2}, {-2, -22, 77, 8, 8, 82, -21, -2}, - {-2, -21, 79, 8, 8, 79, -21, -2}, {-2, -21, 82, 8, 8, 77, -22, -2}, - {-2, -21, 84, 8, 8, 74, -21, -2}, {-2, -21, 87, 8, 8, 72, -22, -2}, - {-2, -20, 89, 8, 8, 69, -22, -2}, {-2, -20, 91, 8, 8, 67, -22, -2}, - {-2, -19, 94, 8, 7, 64, -22, -2}, {-2, -19, 96, 8, 7, 62, -22, -2}, - {-2, -18, 98, 8, 7, 59, -22, -2}, {-2, -18, 100, 8, 7, 56, -21, -2}, - {-2, -17, 102, 7, 7, 54, -21, -2}, {-2, -16, 104, 7, 7, 51, -21, -2}, - {-2, -16, 106, 7, 6, 49, -20, -2}, {-2, -15, 108, 7, 6, 46, -20, -2}, - {-2, -15, 110, 7, 6, 43, -19, -2}, {-2, -14, 111, 7, 6, 41, -19, -2}, - {-1, -13, 113, 6, 5, 38, -18, -2}, {-1, -12, 114, 6, 5, 35, -17, -2}, - {-1, -12, 116, 6, 5, 33, -17, -2}, {-1, -11, 118, 5, 4, 30, -16, -1}, - {-1, -10, 119, 5, 4, 27, -15, -1}, {-1, -9, 120, 5, 4, 25, -14, -2}, - {-1, -8, 121, 4, 3, 23, -13, -1}, {-1, -7, 122, 4, 3, 20, -12, -1}, - {-1, -7, 123, 4, 3, 18, -11, -1}, {-1, -6, 124, 3, 3, 16, -10, -1}, - {-1, -5, 125, 3, 2, 13, -8, -1}, {-1, -4, 126, 2, 2, 11, -7, -1}, - { 0, -3, 126, 2, 1, 8, -6, 0}, { 0, -2, 127, 1, 1, 6, -5, 0}, - { 0, -2, 127, 1, 1, 4, -3, 0}, { 0, 0, 127, 0, 0, 2, -1, 0}, - // [1, 2) - { 0, 0, 127, 0, 0, 1, 0, 0}, { 0, 0, 127, 0, 0, -1, 2, 0}, - { 0, 1, 127, -1, 0, -3, 4, 0}, { 0, 1, 126, -2, 0, -4, 6, 1}, - { 0, 1, 126, -3, 0, -5, 8, 1}, { 0, 1, 125, -4, 0, -6, 11, 1}, - { 0, 1, 124, -4, 0, -7, 13, 1}, { 0, 2, 123, -5, 0, -8, 15, 1}, - { 0, 2, 122, -6, 0, -9, 18, 1}, { 0, 2, 121, -6, 0, -10, 20, 1}, - { 0, 2, 120, -7, 0, -11, 22, 2}, { 0, 2, 119, -8, 0, -12, 25, 2}, - { 0, 3, 117, -8, 0, -13, 27, 2}, { 0, 3, 116, -9, 0, -13, 29, 2}, - { 0, 3, 114, -10, 0, -14, 32, 3}, { 0, 3, 113, -10, 0, -15, 35, 2}, - { 0, 3, 111, -11, 0, -15, 37, 3}, { 0, 3, 109, -11, 0, -16, 40, 3}, - { 0, 3, 108, -12, 0, -16, 42, 3}, { 0, 4, 106, -13, 0, -17, 45, 3}, - { 0, 4, 104, -13, 0, -17, 47, 3}, { 0, 4, 102, -14, 0, -17, 50, 3}, - { 0, 4, 100, -14, 0, -17, 52, 3}, { 0, 4, 98, -15, 0, -18, 55, 4}, - { 0, 4, 96, -15, 0, -18, 58, 3}, { 0, 4, 94, -16, 0, -18, 60, 4}, - { 0, 4, 91, -16, 0, -18, 63, 4}, { 0, 4, 89, -16, 0, -18, 65, 4}, - { 0, 4, 87, -17, 0, -18, 68, 4}, { 0, 4, 85, -17, 0, -18, 70, 4}, - { 0, 4, 82, -17, 0, -18, 73, 4}, { 0, 4, 80, -17, 0, -18, 75, 4}, - { 0, 4, 78, -18, 0, -18, 78, 4}, { 0, 4, 75, -18, 0, -17, 80, 4}, - { 0, 4, 73, -18, 0, -17, 82, 4}, { 0, 4, 70, -18, 0, -17, 85, 4}, - { 0, 4, 68, -18, 0, -17, 87, 4}, { 0, 4, 65, -18, 0, -16, 89, 4}, - { 0, 4, 63, -18, 0, -16, 91, 4}, { 0, 4, 60, -18, 0, -16, 94, 4}, - { 0, 3, 58, -18, 0, -15, 96, 4}, { 0, 4, 55, -18, 0, -15, 98, 4}, - { 0, 3, 52, -17, 0, -14, 100, 4}, { 0, 3, 50, -17, 0, -14, 102, 4}, - { 0, 3, 47, -17, 0, -13, 104, 4}, { 0, 3, 45, -17, 0, -13, 106, 4}, - { 0, 3, 42, -16, 0, -12, 108, 3}, { 0, 3, 40, -16, 0, -11, 109, 3}, - { 0, 3, 37, -15, 0, -11, 111, 3}, { 0, 2, 35, -15, 0, -10, 113, 3}, - { 0, 3, 32, -14, 0, -10, 114, 3}, { 0, 2, 29, -13, 0, -9, 116, 3}, - { 0, 2, 27, -13, 0, -8, 117, 3}, { 0, 2, 25, -12, 0, -8, 119, 2}, - { 0, 2, 22, -11, 0, -7, 120, 2}, { 0, 1, 20, -10, 0, -6, 121, 2}, - { 0, 1, 18, -9, 0, -6, 122, 2}, { 0, 1, 15, -8, 0, -5, 123, 2}, - { 0, 1, 13, -7, 0, -4, 124, 1}, { 0, 1, 11, -6, 0, -4, 125, 1}, - { 0, 1, 8, -5, 0, -3, 126, 1}, { 0, 1, 6, -4, 0, -2, 126, 1}, - { 0, 0, 4, -3, 0, -1, 127, 1}, { 0, 0, 2, -1, 0, 0, 127, 0}, - // dummy (replicate row index 191) - { 0, 0, 2, -1, 0, 0, 127, 0}, - -#else - // [-1, 0) - { 0, 127, 0, 0, 0, 1, 0, 0}, { 1, 127, -1, 0, -3, 4, 0, 0}, - { 1, 126, -3, 0, -5, 8, 1, 0}, { 1, 124, -4, 0, -7, 13, 1, 0}, - { 2, 122, -6, 0, -9, 18, 1, 0}, { 2, 120, -7, 0, -11, 22, 2, 0}, - { 3, 117, -8, 0, -13, 27, 2, 0}, { 3, 114, -10, 0, -14, 32, 3, 0}, - { 3, 111, -11, 0, -15, 37, 3, 0}, { 3, 108, -12, 0, -16, 42, 3, 0}, - { 4, 104, -13, 0, -17, 47, 3, 0}, { 4, 100, -14, 0, -17, 52, 3, 0}, - { 4, 96, -15, 0, -18, 58, 3, 0}, { 4, 91, -16, 0, -18, 63, 4, 0}, - { 4, 87, -17, 0, -18, 68, 4, 0}, { 4, 82, -17, 0, -18, 73, 4, 0}, - { 4, 78, -18, 0, -18, 78, 4, 0}, { 4, 73, -18, 0, -17, 82, 4, 0}, - { 4, 68, -18, 0, -17, 87, 4, 0}, { 4, 63, -18, 0, -16, 91, 4, 0}, - { 3, 58, -18, 0, -15, 96, 4, 0}, { 3, 52, -17, 0, -14, 100, 4, 0}, - { 3, 47, -17, 0, -13, 104, 4, 0}, { 3, 42, -16, 0, -12, 108, 3, 0}, - { 3, 37, -15, 0, -11, 111, 3, 0}, { 3, 32, -14, 0, -10, 114, 3, 0}, - { 2, 27, -13, 0, -8, 117, 3, 0}, { 2, 22, -11, 0, -7, 120, 2, 0}, - { 1, 18, -9, 0, -6, 122, 2, 0}, { 1, 13, -7, 0, -4, 124, 1, 0}, - { 1, 8, -5, 0, -3, 126, 1, 0}, { 0, 4, -3, 0, -1, 127, 1, 0}, - // [0, 1) - { 0, 0, 1, 0, 0, 127, 0, 0}, { 0, -3, 4, 1, 1, 127, -2, 0}, - { 0, -6, 8, 1, 2, 126, -3, 0}, {-1, -8, 13, 2, 3, 125, -5, -1}, - {-1, -11, 18, 3, 4, 123, -7, -1}, {-1, -13, 23, 3, 4, 121, -8, -1}, - {-1, -15, 27, 4, 5, 119, -10, -1}, {-2, -17, 33, 5, 6, 116, -12, -1}, - {-2, -18, 38, 5, 6, 113, -13, -1}, {-2, -19, 43, 6, 7, 110, -15, -2}, - {-2, -20, 49, 6, 7, 106, -16, -2}, {-2, -21, 54, 7, 7, 102, -17, -2}, - {-2, -22, 59, 7, 8, 98, -18, -2}, {-2, -22, 64, 7, 8, 94, -19, -2}, - {-2, -22, 69, 8, 8, 89, -20, -2}, {-2, -21, 74, 8, 8, 84, -21, -2}, - {-2, -21, 79, 8, 8, 79, -21, -2}, {-2, -21, 84, 8, 8, 74, -21, -2}, - {-2, -20, 89, 8, 8, 69, -22, -2}, {-2, -19, 94, 8, 7, 64, -22, -2}, - {-2, -18, 98, 8, 7, 59, -22, -2}, {-2, -17, 102, 7, 7, 54, -21, -2}, - {-2, -16, 106, 7, 6, 49, -20, -2}, {-2, -15, 110, 7, 6, 43, -19, -2}, - {-1, -13, 113, 6, 5, 38, -18, -2}, {-1, -12, 116, 6, 5, 33, -17, -2}, - {-1, -10, 119, 5, 4, 27, -15, -1}, {-1, -8, 121, 4, 3, 23, -13, -1}, - {-1, -7, 123, 4, 3, 18, -11, -1}, {-1, -5, 125, 3, 2, 13, -8, -1}, - { 0, -3, 126, 2, 1, 8, -6, 0}, { 0, -2, 127, 1, 1, 4, -3, 0}, - // [1, 2) - { 0, 0, 127, 0, 0, 1, 0, 0}, { 0, 1, 127, -1, 0, -3, 4, 0}, - { 0, 1, 126, -3, 0, -5, 8, 1}, { 0, 1, 124, -4, 0, -7, 13, 1}, - { 0, 2, 122, -6, 0, -9, 18, 1}, { 0, 2, 120, -7, 0, -11, 22, 2}, - { 0, 3, 117, -8, 0, -13, 27, 2}, { 0, 3, 114, -10, 0, -14, 32, 3}, - { 0, 3, 111, -11, 0, -15, 37, 3}, { 0, 3, 108, -12, 0, -16, 42, 3}, - { 0, 4, 104, -13, 0, -17, 47, 3}, { 0, 4, 100, -14, 0, -17, 52, 3}, - { 0, 4, 96, -15, 0, -18, 58, 3}, { 0, 4, 91, -16, 0, -18, 63, 4}, - { 0, 4, 87, -17, 0, -18, 68, 4}, { 0, 4, 82, -17, 0, -18, 73, 4}, - { 0, 4, 78, -18, 0, -18, 78, 4}, { 0, 4, 73, -18, 0, -17, 82, 4}, - { 0, 4, 68, -18, 0, -17, 87, 4}, { 0, 4, 63, -18, 0, -16, 91, 4}, - { 0, 3, 58, -18, 0, -15, 96, 4}, { 0, 3, 52, -17, 0, -14, 100, 4}, - { 0, 3, 47, -17, 0, -13, 104, 4}, { 0, 3, 42, -16, 0, -12, 108, 3}, - { 0, 3, 37, -15, 0, -11, 111, 3}, { 0, 3, 32, -14, 0, -10, 114, 3}, - { 0, 2, 27, -13, 0, -8, 117, 3}, { 0, 2, 22, -11, 0, -7, 120, 2}, - { 0, 1, 18, -9, 0, -6, 122, 2}, { 0, 1, 13, -7, 0, -4, 124, 1}, - { 0, 1, 8, -5, 0, -3, 126, 1}, { 0, 0, 4, -3, 0, -1, 127, 1}, - // dummy (replicate row index 95) - { 0, 0, 4, -3, 0, -1, 127, 1}, -#endif // WARPEDPIXEL_PREC_BITS == 6 -}; -/* clang-format on */ - -static INLINE void convolve(int32x2x2_t x0, int32x2x2_t x1, uint8x8_t src_0, - uint8x8_t src_1, int16x4_t *res) { - int16x8_t coeff_0, coeff_1; - int16x8_t pix_0, pix_1; - - coeff_0 = vcombine_s16(vreinterpret_s16_s32(x0.val[0]), - vreinterpret_s16_s32(x1.val[0])); - coeff_1 = vcombine_s16(vreinterpret_s16_s32(x0.val[1]), - vreinterpret_s16_s32(x1.val[1])); - - pix_0 = vreinterpretq_s16_u16(vmovl_u8(src_0)); - pix_0 = vmulq_s16(coeff_0, pix_0); - - pix_1 = vreinterpretq_s16_u16(vmovl_u8(src_1)); - pix_0 = vmlaq_s16(pix_0, coeff_1, pix_1); - - *res = vpadd_s16(vget_low_s16(pix_0), vget_high_s16(pix_0)); -} - -static INLINE void horizontal_filter_neon(uint8x16_t src_1, uint8x16_t src_2, - uint8x16_t src_3, uint8x16_t src_4, - int16x8_t *tmp_dst, int sx, int alpha, - int k, const int offset_bits_horiz, - const int reduce_bits_horiz) { - const uint8x16_t mask = { 255, 0, 255, 0, 255, 0, 255, 0, - 255, 0, 255, 0, 255, 0, 255, 0 }; - const int32x4_t add_const = vdupq_n_s32((int32_t)(1 << offset_bits_horiz)); - const int16x8_t shift = vdupq_n_s16(-(int16_t)reduce_bits_horiz); - - int16x8_t f0, f1, f2, f3, f4, f5, f6, f7; - int32x2x2_t b0, b1; - uint8x8_t src_1_low, src_2_low, src_3_low, src_4_low, src_5_low, src_6_low; - int32x4_t tmp_res_low, tmp_res_high; - uint16x8_t res; - int16x4_t res_0246_even, res_0246_odd, res_1357_even, res_1357_odd; - - uint8x16_t tmp_0 = vandq_u8(src_1, mask); - uint8x16_t tmp_1 = vandq_u8(src_2, mask); - uint8x16_t tmp_2 = vandq_u8(src_3, mask); - uint8x16_t tmp_3 = vandq_u8(src_4, mask); - - tmp_2 = vextq_u8(tmp_0, tmp_0, 1); - tmp_3 = vextq_u8(tmp_1, tmp_1, 1); - - src_1 = vaddq_u8(tmp_0, tmp_2); - src_2 = vaddq_u8(tmp_1, tmp_3); - - src_1_low = vget_low_u8(src_1); - src_2_low = vget_low_u8(src_2); - src_3_low = vget_low_u8(vextq_u8(src_1, src_1, 4)); - src_4_low = vget_low_u8(vextq_u8(src_2, src_2, 4)); - src_5_low = vget_low_u8(vextq_u8(src_1, src_1, 2)); - src_6_low = vget_low_u8(vextq_u8(src_1, src_1, 6)); - - // Loading the 8 filter taps - f0 = vmovl_s8( - vld1_s8(filter_8bit_neon[(sx + 0 * alpha) >> WARPEDDIFF_PREC_BITS])); - f1 = vmovl_s8( - vld1_s8(filter_8bit_neon[(sx + 1 * alpha) >> WARPEDDIFF_PREC_BITS])); - f2 = vmovl_s8( - vld1_s8(filter_8bit_neon[(sx + 2 * alpha) >> WARPEDDIFF_PREC_BITS])); - f3 = vmovl_s8( - vld1_s8(filter_8bit_neon[(sx + 3 * alpha) >> WARPEDDIFF_PREC_BITS])); - f4 = vmovl_s8( - vld1_s8(filter_8bit_neon[(sx + 4 * alpha) >> WARPEDDIFF_PREC_BITS])); - f5 = vmovl_s8( - vld1_s8(filter_8bit_neon[(sx + 5 * alpha) >> WARPEDDIFF_PREC_BITS])); - f6 = vmovl_s8( - vld1_s8(filter_8bit_neon[(sx + 6 * alpha) >> WARPEDDIFF_PREC_BITS])); - f7 = vmovl_s8( - vld1_s8(filter_8bit_neon[(sx + 7 * alpha) >> WARPEDDIFF_PREC_BITS])); - - b0 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(f0)), - vreinterpret_s32_s16(vget_low_s16(f2))); - b1 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(f4)), - vreinterpret_s32_s16(vget_low_s16(f6))); - convolve(b0, b1, src_1_low, src_3_low, &res_0246_even); - - b0 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(f1)), - vreinterpret_s32_s16(vget_low_s16(f3))); - b1 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(f5)), - vreinterpret_s32_s16(vget_low_s16(f7))); - convolve(b0, b1, src_2_low, src_4_low, &res_0246_odd); - - b0 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(f0)), - vreinterpret_s32_s16(vget_high_s16(f2))); - b1 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(f4)), - vreinterpret_s32_s16(vget_high_s16(f6))); - convolve(b0, b1, src_2_low, src_4_low, &res_1357_even); - - b0 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(f1)), - vreinterpret_s32_s16(vget_high_s16(f3))); - b1 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(f5)), - vreinterpret_s32_s16(vget_high_s16(f7))); - convolve(b0, b1, src_5_low, src_6_low, &res_1357_odd); - - tmp_res_low = vaddl_s16(res_0246_even, res_1357_even); - tmp_res_high = vaddl_s16(res_0246_odd, res_1357_odd); - - tmp_res_low = vaddq_s32(tmp_res_low, add_const); - tmp_res_high = vaddq_s32(tmp_res_high, add_const); - - res = vcombine_u16(vqmovun_s32(tmp_res_low), vqmovun_s32(tmp_res_high)); - res = vqrshlq_u16(res, shift); - - tmp_dst[k + 7] = vreinterpretq_s16_u16(res); -} - -static INLINE void vertical_filter_neon(const int16x8_t *src, - int32x4_t *res_low, int32x4_t *res_high, - int sy, int gamma) { - int16x4_t src_0, src_1, fltr_0, fltr_1; - int32x4_t res_0, res_1; - int32x2_t res_0_im, res_1_im; - int32x4_t res_even, res_odd, im_res_0, im_res_1; - - int16x8_t f0, f1, f2, f3, f4, f5, f6, f7; - int16x8x2_t b0, b1, b2, b3; - int32x4x2_t c0, c1, c2, c3; - int32x4x2_t d0, d1, d2, d3; - - b0 = vtrnq_s16(src[0], src[1]); - b1 = vtrnq_s16(src[2], src[3]); - b2 = vtrnq_s16(src[4], src[5]); - b3 = vtrnq_s16(src[6], src[7]); - - c0 = vtrnq_s32(vreinterpretq_s32_s16(b0.val[0]), - vreinterpretq_s32_s16(b0.val[1])); - c1 = vtrnq_s32(vreinterpretq_s32_s16(b1.val[0]), - vreinterpretq_s32_s16(b1.val[1])); - c2 = vtrnq_s32(vreinterpretq_s32_s16(b2.val[0]), - vreinterpretq_s32_s16(b2.val[1])); - c3 = vtrnq_s32(vreinterpretq_s32_s16(b3.val[0]), - vreinterpretq_s32_s16(b3.val[1])); - - f0 = vld1q_s16( - (int16_t *)(warped_filter + ((sy + 0 * gamma) >> WARPEDDIFF_PREC_BITS))); - f1 = vld1q_s16( - (int16_t *)(warped_filter + ((sy + 1 * gamma) >> WARPEDDIFF_PREC_BITS))); - f2 = vld1q_s16( - (int16_t *)(warped_filter + ((sy + 2 * gamma) >> WARPEDDIFF_PREC_BITS))); - f3 = vld1q_s16( - (int16_t *)(warped_filter + ((sy + 3 * gamma) >> WARPEDDIFF_PREC_BITS))); - f4 = vld1q_s16( - (int16_t *)(warped_filter + ((sy + 4 * gamma) >> WARPEDDIFF_PREC_BITS))); - f5 = vld1q_s16( - (int16_t *)(warped_filter + ((sy + 5 * gamma) >> WARPEDDIFF_PREC_BITS))); - f6 = vld1q_s16( - (int16_t *)(warped_filter + ((sy + 6 * gamma) >> WARPEDDIFF_PREC_BITS))); - f7 = vld1q_s16( - (int16_t *)(warped_filter + ((sy + 7 * gamma) >> WARPEDDIFF_PREC_BITS))); - - d0 = vtrnq_s32(vreinterpretq_s32_s16(f0), vreinterpretq_s32_s16(f2)); - d1 = vtrnq_s32(vreinterpretq_s32_s16(f4), vreinterpretq_s32_s16(f6)); - d2 = vtrnq_s32(vreinterpretq_s32_s16(f1), vreinterpretq_s32_s16(f3)); - d3 = vtrnq_s32(vreinterpretq_s32_s16(f5), vreinterpretq_s32_s16(f7)); - - // row:0,1 even_col:0,2 - src_0 = vget_low_s16(vreinterpretq_s16_s32(c0.val[0])); - fltr_0 = vget_low_s16(vreinterpretq_s16_s32(d0.val[0])); - res_0 = vmull_s16(src_0, fltr_0); - - // row:0,1,2,3 even_col:0,2 - src_0 = vget_low_s16(vreinterpretq_s16_s32(c1.val[0])); - fltr_0 = vget_low_s16(vreinterpretq_s16_s32(d0.val[1])); - res_0 = vmlal_s16(res_0, src_0, fltr_0); - res_0_im = vpadd_s32(vget_low_s32(res_0), vget_high_s32(res_0)); - - // row:0,1 even_col:4,6 - src_1 = vget_low_s16(vreinterpretq_s16_s32(c0.val[1])); - fltr_1 = vget_low_s16(vreinterpretq_s16_s32(d1.val[0])); - res_1 = vmull_s16(src_1, fltr_1); - - // row:0,1,2,3 even_col:4,6 - src_1 = vget_low_s16(vreinterpretq_s16_s32(c1.val[1])); - fltr_1 = vget_low_s16(vreinterpretq_s16_s32(d1.val[1])); - res_1 = vmlal_s16(res_1, src_1, fltr_1); - res_1_im = vpadd_s32(vget_low_s32(res_1), vget_high_s32(res_1)); - - // row:0,1,2,3 even_col:0,2,4,6 - im_res_0 = vcombine_s32(res_0_im, res_1_im); - - // row:4,5 even_col:0,2 - src_0 = vget_low_s16(vreinterpretq_s16_s32(c2.val[0])); - fltr_0 = vget_high_s16(vreinterpretq_s16_s32(d0.val[0])); - res_0 = vmull_s16(src_0, fltr_0); - - // row:4,5,6,7 even_col:0,2 - src_0 = vget_low_s16(vreinterpretq_s16_s32(c3.val[0])); - fltr_0 = vget_high_s16(vreinterpretq_s16_s32(d0.val[1])); - res_0 = vmlal_s16(res_0, src_0, fltr_0); - res_0_im = vpadd_s32(vget_low_s32(res_0), vget_high_s32(res_0)); - - // row:4,5 even_col:4,6 - src_1 = vget_low_s16(vreinterpretq_s16_s32(c2.val[1])); - fltr_1 = vget_high_s16(vreinterpretq_s16_s32(d1.val[0])); - res_1 = vmull_s16(src_1, fltr_1); - - // row:4,5,6,7 even_col:4,6 - src_1 = vget_low_s16(vreinterpretq_s16_s32(c3.val[1])); - fltr_1 = vget_high_s16(vreinterpretq_s16_s32(d1.val[1])); - res_1 = vmlal_s16(res_1, src_1, fltr_1); - res_1_im = vpadd_s32(vget_low_s32(res_1), vget_high_s32(res_1)); - - // row:4,5,6,7 even_col:0,2,4,6 - im_res_1 = vcombine_s32(res_0_im, res_1_im); - - // row:0-7 even_col:0,2,4,6 - res_even = vaddq_s32(im_res_0, im_res_1); - - // row:0,1 odd_col:1,3 - src_0 = vget_high_s16(vreinterpretq_s16_s32(c0.val[0])); - fltr_0 = vget_low_s16(vreinterpretq_s16_s32(d2.val[0])); - res_0 = vmull_s16(src_0, fltr_0); - - // row:0,1,2,3 odd_col:1,3 - src_0 = vget_high_s16(vreinterpretq_s16_s32(c1.val[0])); - fltr_0 = vget_low_s16(vreinterpretq_s16_s32(d2.val[1])); - res_0 = vmlal_s16(res_0, src_0, fltr_0); - res_0_im = vpadd_s32(vget_low_s32(res_0), vget_high_s32(res_0)); - - // row:0,1 odd_col:5,7 - src_1 = vget_high_s16(vreinterpretq_s16_s32(c0.val[1])); - fltr_1 = vget_low_s16(vreinterpretq_s16_s32(d3.val[0])); - res_1 = vmull_s16(src_1, fltr_1); - - // row:0,1,2,3 odd_col:5,7 - src_1 = vget_high_s16(vreinterpretq_s16_s32(c1.val[1])); - fltr_1 = vget_low_s16(vreinterpretq_s16_s32(d3.val[1])); - res_1 = vmlal_s16(res_1, src_1, fltr_1); - res_1_im = vpadd_s32(vget_low_s32(res_1), vget_high_s32(res_1)); - - // row:0,1,2,3 odd_col:1,3,5,7 - im_res_0 = vcombine_s32(res_0_im, res_1_im); - - // row:4,5 odd_col:1,3 - src_0 = vget_high_s16(vreinterpretq_s16_s32(c2.val[0])); - fltr_0 = vget_high_s16(vreinterpretq_s16_s32(d2.val[0])); - res_0 = vmull_s16(src_0, fltr_0); - - // row:4,5,6,7 odd_col:1,3 - src_0 = vget_high_s16(vreinterpretq_s16_s32(c3.val[0])); - fltr_0 = vget_high_s16(vreinterpretq_s16_s32(d2.val[1])); - res_0 = vmlal_s16(res_0, src_0, fltr_0); - res_0_im = vpadd_s32(vget_low_s32(res_0), vget_high_s32(res_0)); - - // row:4,5 odd_col:5,7 - src_1 = vget_high_s16(vreinterpretq_s16_s32(c2.val[1])); - fltr_1 = vget_high_s16(vreinterpretq_s16_s32(d3.val[0])); - res_1 = vmull_s16(src_1, fltr_1); - - // row:4,5,6,7 odd_col:5,7 - src_1 = vget_high_s16(vreinterpretq_s16_s32(c3.val[1])); - fltr_1 = vget_high_s16(vreinterpretq_s16_s32(d3.val[1])); - res_1 = vmlal_s16(res_1, src_1, fltr_1); - res_1_im = vpadd_s32(vget_low_s32(res_1), vget_high_s32(res_1)); - - // row:4,5,6,7 odd_col:1,3,5,7 - im_res_1 = vcombine_s32(res_0_im, res_1_im); - - // row:0-7 odd_col:1,3,5,7 - res_odd = vaddq_s32(im_res_0, im_res_1); - - // reordering as 0 1 2 3 | 4 5 6 7 - c0 = vtrnq_s32(res_even, res_odd); - - // Final store - *res_low = vcombine_s32(vget_low_s32(c0.val[0]), vget_low_s32(c0.val[1])); - *res_high = vcombine_s32(vget_high_s32(c0.val[0]), vget_high_s32(c0.val[1])); -} - -void av1_warp_affine_neon(const int32_t *mat, const uint8_t *ref, int width, - int height, int stride, uint8_t *pred, int p_col, - int p_row, int p_width, int p_height, int p_stride, - int subsampling_x, int subsampling_y, - ConvolveParams *conv_params, int16_t alpha, - int16_t beta, int16_t gamma, int16_t delta) { - int16x8_t tmp[15]; - const int bd = 8; - const int w0 = conv_params->fwd_offset; - const int w1 = conv_params->bck_offset; - const int32x4_t fwd = vdupq_n_s32((int32_t)w0); - const int32x4_t bwd = vdupq_n_s32((int32_t)w1); - const int16x8_t sub_constant = vdupq_n_s16((1 << (bd - 1)) + (1 << bd)); - - int limit = 0; - uint8x16_t vec_dup, mask_val; - int32x4_t res_lo, res_hi; - int16x8_t result_final; - uint8x16_t src_1, src_2, src_3, src_4; - uint8x16_t indx_vec = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 - }; - uint8x16_t cmp_vec; - - const int reduce_bits_horiz = conv_params->round_0; - const int reduce_bits_vert = conv_params->is_compound - ? conv_params->round_1 - : 2 * FILTER_BITS - reduce_bits_horiz; - const int32x4_t shift_vert = vdupq_n_s32(-(int32_t)reduce_bits_vert); - const int offset_bits_horiz = bd + FILTER_BITS - 1; - - assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL)); - - const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz; - int32x4_t add_const_vert = vdupq_n_s32((int32_t)(1 << offset_bits_vert)); - const int round_bits = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int16x4_t round_bits_vec = vdup_n_s16(-(int16_t)round_bits); - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - const int16x4_t res_sub_const = - vdup_n_s16(-((1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1)))); - int k; - - assert(IMPLIES(conv_params->do_average, conv_params->is_compound)); - - for (int i = 0; i < p_height; i += 8) { - for (int j = 0; j < p_width; j += 8) { - const int32_t src_x = (p_col + j + 4) << subsampling_x; - const int32_t src_y = (p_row + i + 4) << subsampling_y; - const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0]; - const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1]; - const int32_t x4 = dst_x >> subsampling_x; - const int32_t y4 = dst_y >> subsampling_y; - - int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS; - int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); - int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS; - int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); - - sx4 += alpha * (-4) + beta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) + - (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS); - sy4 += gamma * (-4) + delta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) + - (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS); - - sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); - sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); - // horizontal - if (ix4 <= -7) { - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - int16_t dup_val = - (1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) + - ref[iy * stride] * (1 << (FILTER_BITS - reduce_bits_horiz)); - - tmp[k + 7] = vdupq_n_s16(dup_val); - } - } else if (ix4 >= width + 6) { - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - int16_t dup_val = (1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) + - ref[iy * stride + (width - 1)] * - (1 << (FILTER_BITS - reduce_bits_horiz)); - tmp[k + 7] = vdupq_n_s16(dup_val); - } - } else if (((ix4 - 7) < 0) || ((ix4 + 9) > width)) { - const int out_of_boundary_left = -(ix4 - 6); - const int out_of_boundary_right = (ix4 + 8) - width; - - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - int sx = sx4 + beta * (k + 4); - - const uint8_t *src = ref + iy * stride + ix4 - 7; - src_1 = vld1q_u8(src); - - if (out_of_boundary_left >= 0) { - limit = out_of_boundary_left + 1; - cmp_vec = vdupq_n_u8(out_of_boundary_left); - vec_dup = vdupq_n_u8(*(src + limit)); - mask_val = vcleq_u8(indx_vec, cmp_vec); - src_1 = vbslq_u8(mask_val, vec_dup, src_1); - } - if (out_of_boundary_right >= 0) { - limit = 15 - (out_of_boundary_right + 1); - cmp_vec = vdupq_n_u8(15 - out_of_boundary_right); - vec_dup = vdupq_n_u8(*(src + limit)); - mask_val = vcgeq_u8(indx_vec, cmp_vec); - src_1 = vbslq_u8(mask_val, vec_dup, src_1); - } - src_2 = vextq_u8(src_1, src_1, 1); - src_3 = vextq_u8(src_2, src_2, 1); - src_4 = vextq_u8(src_3, src_3, 1); - - horizontal_filter_neon(src_1, src_2, src_3, src_4, tmp, sx, alpha, k, - offset_bits_horiz, reduce_bits_horiz); - } - } else { - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - int sx = sx4 + beta * (k + 4); - - const uint8_t *src = ref + iy * stride + ix4 - 7; - src_1 = vld1q_u8(src); - src_2 = vextq_u8(src_1, src_1, 1); - src_3 = vextq_u8(src_2, src_2, 1); - src_4 = vextq_u8(src_3, src_3, 1); - - horizontal_filter_neon(src_1, src_2, src_3, src_4, tmp, sx, alpha, k, - offset_bits_horiz, reduce_bits_horiz); - } - } - - // vertical - for (k = -4; k < AOMMIN(4, p_height - i - 4); ++k) { - int sy = sy4 + delta * (k + 4); - - const int16x8_t *v_src = tmp + (k + 4); - - vertical_filter_neon(v_src, &res_lo, &res_hi, sy, gamma); - - res_lo = vaddq_s32(res_lo, add_const_vert); - res_hi = vaddq_s32(res_hi, add_const_vert); - - if (conv_params->is_compound) { - uint16_t *const p = - (uint16_t *)&conv_params - ->dst[(i + k + 4) * conv_params->dst_stride + j]; - - res_lo = vrshlq_s32(res_lo, shift_vert); - if (conv_params->do_average) { - uint8_t *const dst8 = &pred[(i + k + 4) * p_stride + j]; - uint16x4_t tmp16_lo = vld1_u16(p); - int32x4_t tmp32_lo = vreinterpretq_s32_u32(vmovl_u16(tmp16_lo)); - int16x4_t tmp16_low; - if (conv_params->use_jnt_comp_avg) { - res_lo = vmulq_s32(res_lo, bwd); - tmp32_lo = vmulq_s32(tmp32_lo, fwd); - tmp32_lo = vaddq_s32(tmp32_lo, res_lo); - tmp16_low = vshrn_n_s32(tmp32_lo, DIST_PRECISION_BITS); - } else { - tmp32_lo = vaddq_s32(tmp32_lo, res_lo); - tmp16_low = vshrn_n_s32(tmp32_lo, 1); - } - int16x4_t res_low = vadd_s16(tmp16_low, res_sub_const); - res_low = vqrshl_s16(res_low, round_bits_vec); - int16x8_t final_res_low = vcombine_s16(res_low, res_low); - uint8x8_t res_8_low = vqmovun_s16(final_res_low); - - vst1_lane_u32((uint32_t *)dst8, vreinterpret_u32_u8(res_8_low), 0); - } else { - uint16x4_t res_u16_low = vqmovun_s32(res_lo); - vst1_u16(p, res_u16_low); - } - if (p_width > 4) { - uint16_t *const p4 = - (uint16_t *)&conv_params - ->dst[(i + k + 4) * conv_params->dst_stride + j + 4]; - - res_hi = vrshlq_s32(res_hi, shift_vert); - if (conv_params->do_average) { - uint8_t *const dst8_4 = &pred[(i + k + 4) * p_stride + j + 4]; - - uint16x4_t tmp16_hi = vld1_u16(p4); - int32x4_t tmp32_hi = vreinterpretq_s32_u32(vmovl_u16(tmp16_hi)); - int16x4_t tmp16_high; - if (conv_params->use_jnt_comp_avg) { - res_hi = vmulq_s32(res_hi, bwd); - tmp32_hi = vmulq_s32(tmp32_hi, fwd); - tmp32_hi = vaddq_s32(tmp32_hi, res_hi); - tmp16_high = vshrn_n_s32(tmp32_hi, DIST_PRECISION_BITS); - } else { - tmp32_hi = vaddq_s32(tmp32_hi, res_hi); - tmp16_high = vshrn_n_s32(tmp32_hi, 1); - } - int16x4_t res_high = vadd_s16(tmp16_high, res_sub_const); - res_high = vqrshl_s16(res_high, round_bits_vec); - int16x8_t final_res_high = vcombine_s16(res_high, res_high); - uint8x8_t res_8_high = vqmovun_s16(final_res_high); - - vst1_lane_u32((uint32_t *)dst8_4, vreinterpret_u32_u8(res_8_high), - 0); - } else { - uint16x4_t res_u16_high = vqmovun_s32(res_hi); - vst1_u16(p4, res_u16_high); - } - } - } else { - res_lo = vrshlq_s32(res_lo, shift_vert); - res_hi = vrshlq_s32(res_hi, shift_vert); - - result_final = vcombine_s16(vmovn_s32(res_lo), vmovn_s32(res_hi)); - result_final = vsubq_s16(result_final, sub_constant); - - uint8_t *const p = (uint8_t *)&pred[(i + k + 4) * p_stride + j]; - uint8x8_t val = vqmovun_s16(result_final); - - if (p_width == 4) { - vst1_lane_u32((uint32_t *)p, vreinterpret_u32_u8(val), 0); - } else { - vst1_u8(p, val); - } - } - } - } - } -} diff --git a/third_party/aom/av1/common/arm/wiener_convolve_neon.c b/third_party/aom/av1/common/arm/wiener_convolve_neon.c deleted file mode 100644 index a9bb5bcf0..000000000 --- a/third_party/aom/av1/common/arm/wiener_convolve_neon.c +++ /dev/null @@ -1,530 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <arm_neon.h> -#include <assert.h> - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" - -#include "aom_dsp/txfm_common.h" -#include "aom_ports/mem.h" -#include "av1/common/common.h" -#include "av1/common/arm/convolve_neon.h" -#include "av1/common/arm/mem_neon.h" -#include "av1/common/arm/transpose_neon.h" - -/* Wiener filter 2D - Apply horizontal filter and store in a temporary buffer. When applying - vertical filter, overwrite the original pixel values. - */ -void av1_wiener_convolve_add_src_neon(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, - const ConvolveParams *conv_params) { - uint16_t *d_tmp; - uint8_t *d; - const uint8_t *src_ptr, *s_tmp; - uint16_t *dst_ptr; - (void)x_step_q4; - (void)y_step_q4; - - int width, height; - const int bd = 8; - const int intermediate_height = h + SUBPEL_TAPS - 1; - const int center_tap = ((SUBPEL_TAPS - 1) / 2); - int16_t filter_x_tmp[7], filter_y_tmp[7]; - - DECLARE_ALIGNED(16, uint16_t, - temp[(MAX_SB_SIZE + HORIZ_EXTRA_ROWS) * MAX_SB_SIZE]); - - assert(x_step_q4 == 16 && y_step_q4 == 16); - assert(!(w % 8)); - - assert(w <= MAX_SB_SIZE); - assert(h <= MAX_SB_SIZE); - - assert(filter_x[7] == 0); - assert(filter_y[7] == 0); - - /* assumption of horizontal filtering output will not exceed 15 bit. - ((bd) + 1 + FILTER_BITS - conv_params->round_0) <= 15 - 16 - conv_params->round_0 <= 15 -- (conv_params->round_0) >= 1 - */ - assert((conv_params->round_0) >= 1); - - memcpy(&filter_x_tmp[0], filter_x, sizeof(*filter_x) * FILTER_BITS); - memcpy(&filter_y_tmp[0], filter_y, sizeof(*filter_y) * FILTER_BITS); - - filter_x_tmp[3] += (1 << FILTER_BITS); - filter_y_tmp[3] += (1 << FILTER_BITS); - - s_tmp = src - center_tap * src_stride - center_tap; - dst_ptr = temp; - src_ptr = s_tmp; - height = intermediate_height; - - /* if height is a multiple of 8 */ - if (!(h & 7)) { - int16x8_t res0, res1, res2, res3; - uint16x8_t res4; - uint8x8_t t0, t1, t2, t3, t4, t5, t6, t7; -#if defined(__aarch64__) - uint16x8_t res5, res6, res7, res8, res9, res10, res11; - uint8x8_t t8, t9, t10, t11, t12, t13, t14; - - do { - const uint8_t *s; - - __builtin_prefetch(src_ptr + 0 * src_stride); - __builtin_prefetch(src_ptr + 1 * src_stride); - __builtin_prefetch(src_ptr + 2 * src_stride); - __builtin_prefetch(src_ptr + 3 * src_stride); - __builtin_prefetch(src_ptr + 4 * src_stride); - __builtin_prefetch(src_ptr + 5 * src_stride); - __builtin_prefetch(src_ptr + 6 * src_stride); - __builtin_prefetch(src_ptr + 7 * src_stride); - - load_u8_8x8(src_ptr, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); - - s = src_ptr + 7; - d_tmp = dst_ptr; - width = w; - - __builtin_prefetch(dst_ptr + 0 * dst_stride); - __builtin_prefetch(dst_ptr + 1 * dst_stride); - __builtin_prefetch(dst_ptr + 2 * dst_stride); - __builtin_prefetch(dst_ptr + 3 * dst_stride); - __builtin_prefetch(dst_ptr + 4 * dst_stride); - __builtin_prefetch(dst_ptr + 5 * dst_stride); - __builtin_prefetch(dst_ptr + 6 * dst_stride); - __builtin_prefetch(dst_ptr + 7 * dst_stride); - - do { - load_u8_8x8(s, src_stride, &t7, &t8, &t9, &t10, &t11, &t12, &t13, &t14); - transpose_u8_8x8(&t7, &t8, &t9, &t10, &t11, &t12, &t13, &t14); - - res0 = vreinterpretq_s16_u16(vaddl_u8(t0, t6)); - res1 = vreinterpretq_s16_u16(vaddl_u8(t1, t5)); - res2 = vreinterpretq_s16_u16(vaddl_u8(t2, t4)); - res3 = vreinterpretq_s16_u16(vmovl_u8(t3)); - res4 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp, - bd, conv_params->round_0); - - res0 = vreinterpretq_s16_u16(vaddl_u8(t1, t7)); - res1 = vreinterpretq_s16_u16(vaddl_u8(t2, t6)); - res2 = vreinterpretq_s16_u16(vaddl_u8(t3, t5)); - res3 = vreinterpretq_s16_u16(vmovl_u8(t4)); - res5 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp, - bd, conv_params->round_0); - - res0 = vreinterpretq_s16_u16(vaddl_u8(t2, t8)); - res1 = vreinterpretq_s16_u16(vaddl_u8(t3, t7)); - res2 = vreinterpretq_s16_u16(vaddl_u8(t4, t6)); - res3 = vreinterpretq_s16_u16(vmovl_u8(t5)); - res6 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp, - bd, conv_params->round_0); - - res0 = vreinterpretq_s16_u16(vaddl_u8(t3, t9)); - res1 = vreinterpretq_s16_u16(vaddl_u8(t4, t8)); - res2 = vreinterpretq_s16_u16(vaddl_u8(t5, t7)); - res3 = vreinterpretq_s16_u16(vmovl_u8(t6)); - res7 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp, - bd, conv_params->round_0); - - res0 = vreinterpretq_s16_u16(vaddl_u8(t4, t10)); - res1 = vreinterpretq_s16_u16(vaddl_u8(t5, t9)); - res2 = vreinterpretq_s16_u16(vaddl_u8(t6, t8)); - res3 = vreinterpretq_s16_u16(vmovl_u8(t7)); - res8 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp, - bd, conv_params->round_0); - - res0 = vreinterpretq_s16_u16(vaddl_u8(t5, t11)); - res1 = vreinterpretq_s16_u16(vaddl_u8(t6, t10)); - res2 = vreinterpretq_s16_u16(vaddl_u8(t7, t9)); - res3 = vreinterpretq_s16_u16(vmovl_u8(t8)); - res9 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp, - bd, conv_params->round_0); - - res0 = vreinterpretq_s16_u16(vaddl_u8(t6, t12)); - res1 = vreinterpretq_s16_u16(vaddl_u8(t7, t11)); - res2 = vreinterpretq_s16_u16(vaddl_u8(t8, t10)); - res3 = vreinterpretq_s16_u16(vmovl_u8(t9)); - res10 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp, - bd, conv_params->round_0); - - res0 = vreinterpretq_s16_u16(vaddl_u8(t7, t13)); - res1 = vreinterpretq_s16_u16(vaddl_u8(t8, t12)); - res2 = vreinterpretq_s16_u16(vaddl_u8(t9, t11)); - res3 = vreinterpretq_s16_u16(vmovl_u8(t10)); - res11 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp, - bd, conv_params->round_0); - - transpose_u16_8x8(&res4, &res5, &res6, &res7, &res8, &res9, &res10, - &res11); - store_u16_8x8(d_tmp, MAX_SB_SIZE, res4, res5, res6, res7, res8, res9, - res10, res11); - - t0 = t8; - t1 = t9; - t2 = t10; - t3 = t11; - t4 = t12; - t5 = t13; - t6 = t14; - s += 8; - d_tmp += 8; - width -= 8; - } while (width > 0); - src_ptr += 8 * src_stride; - dst_ptr += 8 * MAX_SB_SIZE; - height -= 8; - } while (height > 0); -#else - uint8x8_t temp_0; - - do { - const uint8_t *s; - - __builtin_prefetch(src_ptr); - - t0 = vld1_u8(src_ptr); // a0 a1 a2 a3 a4 a5 a6 a7 - s = src_ptr + 8; - d_tmp = dst_ptr; - width = w; - - __builtin_prefetch(dst_ptr); - - do { - t7 = vld1_u8(s); // a8 a9 a10 a11 a12 a13 a14 a15 - temp_0 = t0; - t0 = t7; - - t1 = vext_u8(temp_0, t7, 1); // a1 a2 a3 a4 a5 a6 a7 a8 - t2 = vext_u8(temp_0, t7, 2); // a2 a3 a4 a5 a6 a7 a8 a9 - t3 = vext_u8(temp_0, t7, 3); // a3 a4 a5 a6 a7 a8 a9 a10 - t4 = vext_u8(temp_0, t7, 4); // a4 a5 a6 a7 a8 a9 a10 a11 - t5 = vext_u8(temp_0, t7, 5); // a5 a6 a7 a8 a9 a10 a11 a12 - t6 = vext_u8(temp_0, t7, 6); // a6 a7 a8 a9 a10 a11 a12 a13 - t7 = vext_u8(temp_0, t7, 7); // a7 a8 a9 a10 a11 a12 a13 a14 - - res0 = vreinterpretq_s16_u16(vaddl_u8(temp_0, t6)); - res1 = vreinterpretq_s16_u16(vaddl_u8(t1, t5)); - res2 = vreinterpretq_s16_u16(vaddl_u8(t2, t4)); - res3 = vreinterpretq_s16_u16(vmovl_u8(t3)); - res4 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp, - bd, conv_params->round_0); - - vst1q_u16(d_tmp, res4); - - s += 8; - d_tmp += 8; - width -= 8; - } while (width > 0); - src_ptr += src_stride; - dst_ptr += MAX_SB_SIZE; - height--; - } while (height > 0); -#endif - } else { - /*if height is a multiple of 4*/ - const uint8_t *s; - int16x8_t tt0, tt1, tt2, tt3; - uint16x8_t d0; - uint8x8_t t0, t1, t2, t3; - -#if defined(__aarch64__) - uint16x4_t res0, res1, res2, res3, res4, res5, res6, res7; - uint16x8_t d1, d2, d3; - int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; - int16x4_t s11, s12, s13, s14; - do { - __builtin_prefetch(src_ptr + 0 * src_stride); - __builtin_prefetch(src_ptr + 1 * src_stride); - __builtin_prefetch(src_ptr + 2 * src_stride); - __builtin_prefetch(src_ptr + 3 * src_stride); - - load_u8_8x4(src_ptr, src_stride, &t0, &t1, &t2, &t3); /*8x4*/ - transpose_u8_8x4(&t0, &t1, &t2, - &t3); /*first 8 pixels of 4 rows transposed-- 4x8*/ - - tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); - tt1 = vreinterpretq_s16_u16(vmovl_u8(t1)); - tt2 = vreinterpretq_s16_u16(vmovl_u8(t2)); - tt3 = vreinterpretq_s16_u16(vmovl_u8(t3)); - - s0 = vget_low_s16(tt0); /*pa0 pb0 pc0 pd0 -- pixel_a0*/ - s1 = vget_low_s16(tt1); /*pa1 pb1 pc1 pd1 */ - s2 = vget_low_s16(tt2); /*pa2 pb2 pc2 pd2 */ - s3 = vget_low_s16(tt3); /*pa3 pb3 pc3 pd3 */ - s4 = vget_high_s16(tt0); /*pa4 pb4 pc4 pd4 */ - s5 = vget_high_s16(tt1); /*pa5 pb5 pc5 pd5 */ - s6 = vget_high_s16(tt2); /*pa6 pb6 pc6 pd6 */ - - __builtin_prefetch(dst_ptr + 0 * dst_stride); - __builtin_prefetch(dst_ptr + 1 * dst_stride); - __builtin_prefetch(dst_ptr + 2 * dst_stride); - __builtin_prefetch(dst_ptr + 3 * dst_stride); - - s = src_ptr + 7; - d_tmp = dst_ptr; - width = w; - - do { - load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3); /*8x4*/ - transpose_u8_8x4(&t0, &t1, &t2, &t3); - - tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); - tt1 = vreinterpretq_s16_u16(vmovl_u8(t1)); - tt2 = vreinterpretq_s16_u16(vmovl_u8(t2)); - tt3 = vreinterpretq_s16_u16(vmovl_u8(t3)); - - s7 = vget_low_s16(tt0); /*pa7 pb7 pc7 pd7 */ /*4x8*/ - s8 = vget_low_s16(tt1); /*pa8 pb8 pc8 pd8 */ - s9 = vget_low_s16(tt2); /*pa9 pb9 pc9 pd9 */ - s10 = vget_low_s16(tt3); /*pa10 pb10 pc10 pd10 */ - s11 = vget_high_s16(tt0); /*pa11 pb11 pc11 pd11 */ - s12 = vget_high_s16(tt1); /*pa12 pb12 pc12 pd12 */ - s13 = vget_high_s16(tt2); /*pa13 pb13 pc13 pd13 */ - s14 = vget_high_s16(tt3); /*pa14 pb14 pc14 pd14 */ - - res0 = wiener_convolve8_horiz_4x8( - s0, s1, s2, s3, s4, s5, s6, filter_x_tmp, bd, conv_params->round_0); - res1 = wiener_convolve8_horiz_4x8( - s1, s2, s3, s4, s5, s6, s7, filter_x_tmp, bd, conv_params->round_0); - res2 = wiener_convolve8_horiz_4x8( - s2, s3, s4, s5, s6, s7, s8, filter_x_tmp, bd, conv_params->round_0); - res3 = wiener_convolve8_horiz_4x8( - s3, s4, s5, s6, s7, s8, s9, filter_x_tmp, bd, conv_params->round_0); - res4 = - wiener_convolve8_horiz_4x8(s4, s5, s6, s7, s8, s9, s10, - filter_x_tmp, bd, conv_params->round_0); - res5 = - wiener_convolve8_horiz_4x8(s5, s6, s7, s8, s9, s10, s11, - filter_x_tmp, bd, conv_params->round_0); - res6 = - wiener_convolve8_horiz_4x8(s6, s7, s8, s9, s10, s11, s12, - filter_x_tmp, bd, conv_params->round_0); - res7 = - wiener_convolve8_horiz_4x8(s7, s8, s9, s10, s11, s12, s13, - filter_x_tmp, bd, conv_params->round_0); - - transpose_u16_4x8(&res0, &res1, &res2, &res3, &res4, &res5, &res6, - &res7, &d0, &d1, &d2, &d3); - - store_u16_8x4(d_tmp, MAX_SB_SIZE, d0, d1, d2, d3); - - s0 = s8; - s1 = s9; - s2 = s10; - s3 = s11; - s4 = s12; - s5 = s13; - s6 = s14; - s += 8; - d_tmp += 8; - width -= 8; - } while (width > 0); - - src_ptr += 4 * src_stride; - dst_ptr += 4 * MAX_SB_SIZE; - height -= 4; - } while (height > 0); -#else - uint8x8_t temp_0, t4, t5, t6, t7; - - do { - __builtin_prefetch(src_ptr); - - t0 = vld1_u8(src_ptr); // a0 a1 a2 a3 a4 a5 a6 a7 - - __builtin_prefetch(dst_ptr); - - s = src_ptr + 8; - d_tmp = dst_ptr; - width = w; - - do { - t7 = vld1_u8(s); // a8 a9 a10 a11 a12 a13 a14 a15 - temp_0 = t0; - t0 = t7; - - t1 = vext_u8(temp_0, t7, 1); // a1 a2 a3 a4 a5 a6 a7 a8 - t2 = vext_u8(temp_0, t7, 2); // a2 a3 a4 a5 a6 a7 a8 a9 - t3 = vext_u8(temp_0, t7, 3); // a3 a4 a5 a6 a7 a8 a9 a10 - t4 = vext_u8(temp_0, t7, 4); // a4 a5 a6 a7 a8 a9 a10 a11 - t5 = vext_u8(temp_0, t7, 5); // a5 a6 a7 a8 a9 a10 a11 a12 - t6 = vext_u8(temp_0, t7, 6); // a6 a7 a8 a9 a10 a11 a12 a13 - t7 = vext_u8(temp_0, t7, 7); // a7 a8 a9 a10 a11 a12 a13 a14 - - tt0 = vreinterpretq_s16_u16(vaddl_u8(temp_0, t6)); - tt1 = vreinterpretq_s16_u16(vaddl_u8(t1, t5)); - tt2 = vreinterpretq_s16_u16(vaddl_u8(t2, t4)); - tt3 = vreinterpretq_s16_u16(vmovl_u8(t3)); - d0 = wiener_convolve8_horiz_8x8(tt0, tt1, tt2, tt3, filter_x_tmp, bd, - conv_params->round_0); - - vst1q_u16(d_tmp, d0); - - s += 8; - d_tmp += 8; - width -= 8; - } while (width > 0); - - src_ptr += src_stride; - dst_ptr += MAX_SB_SIZE; - height -= 1; - } while (height > 0); -#endif - } - - { - int16x8_t s0, s1, s2, s3, s4, s5, s6, s7; - uint8x8_t t0; -#if defined(__aarch64__) - int16x8_t s8, s9, s10; - uint8x8_t t1, t2, t3; -#endif - int16_t *src_tmp_ptr, *s; - uint8_t *dst_tmp_ptr; - height = h; - width = w; - src_tmp_ptr = (int16_t *)temp; - dst_tmp_ptr = dst; - src_stride = MAX_SB_SIZE; - - do { - s = src_tmp_ptr; - s0 = vld1q_s16(s); - s += src_stride; - s1 = vld1q_s16(s); - s += src_stride; - s2 = vld1q_s16(s); - s += src_stride; - s3 = vld1q_s16(s); - s += src_stride; - s4 = vld1q_s16(s); - s += src_stride; - s5 = vld1q_s16(s); - s += src_stride; - s6 = vld1q_s16(s); - s += src_stride; - d = dst_tmp_ptr; - height = h; - -#if defined(__aarch64__) - do { - __builtin_prefetch(dst_tmp_ptr + 0 * dst_stride); - __builtin_prefetch(dst_tmp_ptr + 1 * dst_stride); - __builtin_prefetch(dst_tmp_ptr + 2 * dst_stride); - __builtin_prefetch(dst_tmp_ptr + 3 * dst_stride); - - s7 = vld1q_s16(s); - s += src_stride; - s8 = vld1q_s16(s); - s += src_stride; - s9 = vld1q_s16(s); - s += src_stride; - s10 = vld1q_s16(s); - s += src_stride; - - t0 = wiener_convolve8_vert_4x8(s0, s1, s2, s3, s4, s5, s6, filter_y_tmp, - bd, conv_params->round_1); - t1 = wiener_convolve8_vert_4x8(s1, s2, s3, s4, s5, s6, s7, filter_y_tmp, - bd, conv_params->round_1); - t2 = wiener_convolve8_vert_4x8(s2, s3, s4, s5, s6, s7, s8, filter_y_tmp, - bd, conv_params->round_1); - t3 = wiener_convolve8_vert_4x8(s3, s4, s5, s6, s7, s8, s9, filter_y_tmp, - bd, conv_params->round_1); - - vst1_u8(d, t0); - d += dst_stride; - vst1_u8(d, t1); - d += dst_stride; - vst1_u8(d, t2); - d += dst_stride; - vst1_u8(d, t3); - d += dst_stride; - - s0 = s4; - s1 = s5; - s2 = s6; - s3 = s7; - s4 = s8; - s5 = s9; - s6 = s10; - height -= 4; - } while (height > 3); - - if (height != 0) { - __builtin_prefetch(dst_tmp_ptr + 0 * dst_stride); - __builtin_prefetch(dst_tmp_ptr + 1 * dst_stride); - - do { - s7 = vld1q_s16(s); - s += src_stride; - - t0 = - wiener_convolve8_vert_4x8(s0, s1, s2, s3, s4, s5, s6, - filter_y_tmp, bd, conv_params->round_1); - vst1_u8(d, t0); - d += dst_stride; - - s0 = s1; - s1 = s2; - s2 = s3; - s3 = s4; - s4 = s5; - s5 = s6; - s6 = s7; - height -= 1; - } while (height > 0); - } - - src_tmp_ptr += 8; - dst_tmp_ptr += 8; - - w -= 8; - } while (w > 0); -#else - do { - __builtin_prefetch(dst_tmp_ptr + 0 * dst_stride); - - s7 = vld1q_s16(s); - s += src_stride; - - t0 = wiener_convolve8_vert_4x8(s0, s1, s2, s3, s4, s5, s6, filter_y_tmp, - bd, conv_params->round_1); - - vst1_u8(d, t0); - d += dst_stride; - - s0 = s1; - s1 = s2; - s2 = s3; - s3 = s4; - s4 = s5; - s5 = s6; - s6 = s7; - height -= 1; - } while (height > 0); - - src_tmp_ptr += 8; - dst_tmp_ptr += 8; - - w -= 8; - } while (w > 0); -#endif - } -} diff --git a/third_party/aom/av1/common/av1_inv_txfm1d.c b/third_party/aom/av1/common/av1_inv_txfm1d.c deleted file mode 100644 index 7ef2d6d7f..000000000 --- a/third_party/aom/av1/common/av1_inv_txfm1d.c +++ /dev/null @@ -1,1846 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <stdlib.h> -#include "av1/common/av1_inv_txfm1d.h" -#include "av1/common/av1_txfm.h" - -// TODO(angiebird): Make 1-d txfm functions static -// - -void av1_idct4_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - assert(output != input); - const int32_t size = 4; - const int32_t *cospi = cospi_arr(cos_bit); - - int32_t stage = 0; - int32_t *bf0, *bf1; - int32_t step[4]; - - // stage 0; - - // stage 1; - stage++; - bf1 = output; - bf1[0] = input[0]; - bf1[1] = input[2]; - bf1[2] = input[1]; - bf1[3] = input[3]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 2 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); - bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit); - bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 3 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]); - bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]); - bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]); -} - -void av1_idct8_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - assert(output != input); - const int32_t size = 8; - const int32_t *cospi = cospi_arr(cos_bit); - - int32_t stage = 0; - int32_t *bf0, *bf1; - int32_t step[8]; - - // stage 0; - - // stage 1; - stage++; - bf1 = output; - bf1[0] = input[0]; - bf1[1] = input[4]; - bf1[2] = input[2]; - bf1[3] = input[6]; - bf1[4] = input[1]; - bf1[5] = input[5]; - bf1[6] = input[3]; - bf1[7] = input[7]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 2 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit); - bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit); - bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit); - bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 3 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); - bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit); - bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit); - bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]); - bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]); - bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]); - bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 4 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]); - bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]); - bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]); - bf1[4] = bf0[4]; - bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); - bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); - bf1[7] = bf0[7]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 5 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]); - bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]); - bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]); - bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]); - bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]); - bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]); - bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]); -} - -void av1_idct16_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - assert(output != input); - const int32_t size = 16; - const int32_t *cospi = cospi_arr(cos_bit); - - int32_t stage = 0; - int32_t *bf0, *bf1; - int32_t step[16]; - - // stage 0; - - // stage 1; - stage++; - bf1 = output; - bf1[0] = input[0]; - bf1[1] = input[8]; - bf1[2] = input[4]; - bf1[3] = input[12]; - bf1[4] = input[2]; - bf1[5] = input[10]; - bf1[6] = input[6]; - bf1[7] = input[14]; - bf1[8] = input[1]; - bf1[9] = input[9]; - bf1[10] = input[5]; - bf1[11] = input[13]; - bf1[12] = input[3]; - bf1[13] = input[11]; - bf1[14] = input[7]; - bf1[15] = input[15]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 2 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = bf0[6]; - bf1[7] = bf0[7]; - bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit); - bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit); - bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit); - bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit); - bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit); - bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit); - bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit); - bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 3 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit); - bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit); - bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit); - bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit); - bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]); - bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]); - bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]); - bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]); - bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]); - bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]); - bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]); - bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 4 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); - bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit); - bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit); - bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]); - bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]); - bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]); - bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]); - bf1[8] = bf0[8]; - bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit); - bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit); - bf1[11] = bf0[11]; - bf1[12] = bf0[12]; - bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit); - bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit); - bf1[15] = bf0[15]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 5 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]); - bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]); - bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]); - bf1[4] = bf0[4]; - bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); - bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); - bf1[7] = bf0[7]; - bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]); - bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]); - bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]); - bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]); - bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]); - bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]); - bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]); - bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 6 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]); - bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]); - bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]); - bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]); - bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]); - bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]); - bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]); - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); - bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); - bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); - bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); - bf1[14] = bf0[14]; - bf1[15] = bf0[15]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 7 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]); - bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]); - bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]); - bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]); - bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]); - bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]); - bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]); - bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]); - bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]); - bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]); - bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]); - bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]); - bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]); - bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]); - bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]); -} - -void av1_idct32_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - assert(output != input); - const int32_t size = 32; - const int32_t *cospi = cospi_arr(cos_bit); - - int32_t stage = 0; - int32_t *bf0, *bf1; - int32_t step[32]; - - // stage 0; - - // stage 1; - stage++; - bf1 = output; - bf1[0] = input[0]; - bf1[1] = input[16]; - bf1[2] = input[8]; - bf1[3] = input[24]; - bf1[4] = input[4]; - bf1[5] = input[20]; - bf1[6] = input[12]; - bf1[7] = input[28]; - bf1[8] = input[2]; - bf1[9] = input[18]; - bf1[10] = input[10]; - bf1[11] = input[26]; - bf1[12] = input[6]; - bf1[13] = input[22]; - bf1[14] = input[14]; - bf1[15] = input[30]; - bf1[16] = input[1]; - bf1[17] = input[17]; - bf1[18] = input[9]; - bf1[19] = input[25]; - bf1[20] = input[5]; - bf1[21] = input[21]; - bf1[22] = input[13]; - bf1[23] = input[29]; - bf1[24] = input[3]; - bf1[25] = input[19]; - bf1[26] = input[11]; - bf1[27] = input[27]; - bf1[28] = input[7]; - bf1[29] = input[23]; - bf1[30] = input[15]; - bf1[31] = input[31]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 2 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = bf0[6]; - bf1[7] = bf0[7]; - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = bf0[10]; - bf1[11] = bf0[11]; - bf1[12] = bf0[12]; - bf1[13] = bf0[13]; - bf1[14] = bf0[14]; - bf1[15] = bf0[15]; - bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit); - bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit); - bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit); - bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit); - bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit); - bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit); - bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit); - bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit); - bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit); - bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit); - bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit); - bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit); - bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit); - bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit); - bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit); - bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 3 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = bf0[6]; - bf1[7] = bf0[7]; - bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit); - bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit); - bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit); - bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit); - bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit); - bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit); - bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit); - bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit); - bf1[16] = clamp_value(bf0[16] + bf0[17], stage_range[stage]); - bf1[17] = clamp_value(bf0[16] - bf0[17], stage_range[stage]); - bf1[18] = clamp_value(-bf0[18] + bf0[19], stage_range[stage]); - bf1[19] = clamp_value(bf0[18] + bf0[19], stage_range[stage]); - bf1[20] = clamp_value(bf0[20] + bf0[21], stage_range[stage]); - bf1[21] = clamp_value(bf0[20] - bf0[21], stage_range[stage]); - bf1[22] = clamp_value(-bf0[22] + bf0[23], stage_range[stage]); - bf1[23] = clamp_value(bf0[22] + bf0[23], stage_range[stage]); - bf1[24] = clamp_value(bf0[24] + bf0[25], stage_range[stage]); - bf1[25] = clamp_value(bf0[24] - bf0[25], stage_range[stage]); - bf1[26] = clamp_value(-bf0[26] + bf0[27], stage_range[stage]); - bf1[27] = clamp_value(bf0[26] + bf0[27], stage_range[stage]); - bf1[28] = clamp_value(bf0[28] + bf0[29], stage_range[stage]); - bf1[29] = clamp_value(bf0[28] - bf0[29], stage_range[stage]); - bf1[30] = clamp_value(-bf0[30] + bf0[31], stage_range[stage]); - bf1[31] = clamp_value(bf0[30] + bf0[31], stage_range[stage]); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 4 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit); - bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit); - bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit); - bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit); - bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]); - bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]); - bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]); - bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]); - bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]); - bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]); - bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]); - bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]); - bf1[16] = bf0[16]; - bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit); - bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit); - bf1[19] = bf0[19]; - bf1[20] = bf0[20]; - bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit); - bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit); - bf1[23] = bf0[23]; - bf1[24] = bf0[24]; - bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit); - bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit); - bf1[27] = bf0[27]; - bf1[28] = bf0[28]; - bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit); - bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit); - bf1[31] = bf0[31]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 5 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); - bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit); - bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit); - bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]); - bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]); - bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]); - bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]); - bf1[8] = bf0[8]; - bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit); - bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit); - bf1[11] = bf0[11]; - bf1[12] = bf0[12]; - bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit); - bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit); - bf1[15] = bf0[15]; - bf1[16] = clamp_value(bf0[16] + bf0[19], stage_range[stage]); - bf1[17] = clamp_value(bf0[17] + bf0[18], stage_range[stage]); - bf1[18] = clamp_value(bf0[17] - bf0[18], stage_range[stage]); - bf1[19] = clamp_value(bf0[16] - bf0[19], stage_range[stage]); - bf1[20] = clamp_value(-bf0[20] + bf0[23], stage_range[stage]); - bf1[21] = clamp_value(-bf0[21] + bf0[22], stage_range[stage]); - bf1[22] = clamp_value(bf0[21] + bf0[22], stage_range[stage]); - bf1[23] = clamp_value(bf0[20] + bf0[23], stage_range[stage]); - bf1[24] = clamp_value(bf0[24] + bf0[27], stage_range[stage]); - bf1[25] = clamp_value(bf0[25] + bf0[26], stage_range[stage]); - bf1[26] = clamp_value(bf0[25] - bf0[26], stage_range[stage]); - bf1[27] = clamp_value(bf0[24] - bf0[27], stage_range[stage]); - bf1[28] = clamp_value(-bf0[28] + bf0[31], stage_range[stage]); - bf1[29] = clamp_value(-bf0[29] + bf0[30], stage_range[stage]); - bf1[30] = clamp_value(bf0[29] + bf0[30], stage_range[stage]); - bf1[31] = clamp_value(bf0[28] + bf0[31], stage_range[stage]); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 6 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]); - bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]); - bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]); - bf1[4] = bf0[4]; - bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); - bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); - bf1[7] = bf0[7]; - bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]); - bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]); - bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]); - bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]); - bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]); - bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]); - bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]); - bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]); - bf1[16] = bf0[16]; - bf1[17] = bf0[17]; - bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit); - bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit); - bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit); - bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit); - bf1[22] = bf0[22]; - bf1[23] = bf0[23]; - bf1[24] = bf0[24]; - bf1[25] = bf0[25]; - bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit); - bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit); - bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit); - bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit); - bf1[30] = bf0[30]; - bf1[31] = bf0[31]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 7 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]); - bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]); - bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]); - bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]); - bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]); - bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]); - bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]); - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); - bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); - bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); - bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); - bf1[14] = bf0[14]; - bf1[15] = bf0[15]; - bf1[16] = clamp_value(bf0[16] + bf0[23], stage_range[stage]); - bf1[17] = clamp_value(bf0[17] + bf0[22], stage_range[stage]); - bf1[18] = clamp_value(bf0[18] + bf0[21], stage_range[stage]); - bf1[19] = clamp_value(bf0[19] + bf0[20], stage_range[stage]); - bf1[20] = clamp_value(bf0[19] - bf0[20], stage_range[stage]); - bf1[21] = clamp_value(bf0[18] - bf0[21], stage_range[stage]); - bf1[22] = clamp_value(bf0[17] - bf0[22], stage_range[stage]); - bf1[23] = clamp_value(bf0[16] - bf0[23], stage_range[stage]); - bf1[24] = clamp_value(-bf0[24] + bf0[31], stage_range[stage]); - bf1[25] = clamp_value(-bf0[25] + bf0[30], stage_range[stage]); - bf1[26] = clamp_value(-bf0[26] + bf0[29], stage_range[stage]); - bf1[27] = clamp_value(-bf0[27] + bf0[28], stage_range[stage]); - bf1[28] = clamp_value(bf0[27] + bf0[28], stage_range[stage]); - bf1[29] = clamp_value(bf0[26] + bf0[29], stage_range[stage]); - bf1[30] = clamp_value(bf0[25] + bf0[30], stage_range[stage]); - bf1[31] = clamp_value(bf0[24] + bf0[31], stage_range[stage]); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 8 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]); - bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]); - bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]); - bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]); - bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]); - bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]); - bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]); - bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]); - bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]); - bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]); - bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]); - bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]); - bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]); - bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]); - bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]); - bf1[16] = bf0[16]; - bf1[17] = bf0[17]; - bf1[18] = bf0[18]; - bf1[19] = bf0[19]; - bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit); - bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit); - bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit); - bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit); - bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit); - bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit); - bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit); - bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit); - bf1[28] = bf0[28]; - bf1[29] = bf0[29]; - bf1[30] = bf0[30]; - bf1[31] = bf0[31]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 9 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = clamp_value(bf0[0] + bf0[31], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[30], stage_range[stage]); - bf1[2] = clamp_value(bf0[2] + bf0[29], stage_range[stage]); - bf1[3] = clamp_value(bf0[3] + bf0[28], stage_range[stage]); - bf1[4] = clamp_value(bf0[4] + bf0[27], stage_range[stage]); - bf1[5] = clamp_value(bf0[5] + bf0[26], stage_range[stage]); - bf1[6] = clamp_value(bf0[6] + bf0[25], stage_range[stage]); - bf1[7] = clamp_value(bf0[7] + bf0[24], stage_range[stage]); - bf1[8] = clamp_value(bf0[8] + bf0[23], stage_range[stage]); - bf1[9] = clamp_value(bf0[9] + bf0[22], stage_range[stage]); - bf1[10] = clamp_value(bf0[10] + bf0[21], stage_range[stage]); - bf1[11] = clamp_value(bf0[11] + bf0[20], stage_range[stage]); - bf1[12] = clamp_value(bf0[12] + bf0[19], stage_range[stage]); - bf1[13] = clamp_value(bf0[13] + bf0[18], stage_range[stage]); - bf1[14] = clamp_value(bf0[14] + bf0[17], stage_range[stage]); - bf1[15] = clamp_value(bf0[15] + bf0[16], stage_range[stage]); - bf1[16] = clamp_value(bf0[15] - bf0[16], stage_range[stage]); - bf1[17] = clamp_value(bf0[14] - bf0[17], stage_range[stage]); - bf1[18] = clamp_value(bf0[13] - bf0[18], stage_range[stage]); - bf1[19] = clamp_value(bf0[12] - bf0[19], stage_range[stage]); - bf1[20] = clamp_value(bf0[11] - bf0[20], stage_range[stage]); - bf1[21] = clamp_value(bf0[10] - bf0[21], stage_range[stage]); - bf1[22] = clamp_value(bf0[9] - bf0[22], stage_range[stage]); - bf1[23] = clamp_value(bf0[8] - bf0[23], stage_range[stage]); - bf1[24] = clamp_value(bf0[7] - bf0[24], stage_range[stage]); - bf1[25] = clamp_value(bf0[6] - bf0[25], stage_range[stage]); - bf1[26] = clamp_value(bf0[5] - bf0[26], stage_range[stage]); - bf1[27] = clamp_value(bf0[4] - bf0[27], stage_range[stage]); - bf1[28] = clamp_value(bf0[3] - bf0[28], stage_range[stage]); - bf1[29] = clamp_value(bf0[2] - bf0[29], stage_range[stage]); - bf1[30] = clamp_value(bf0[1] - bf0[30], stage_range[stage]); - bf1[31] = clamp_value(bf0[0] - bf0[31], stage_range[stage]); -} - -void av1_iadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - int bit = cos_bit; - const int32_t *sinpi = sinpi_arr(bit); - int32_t s0, s1, s2, s3, s4, s5, s6, s7; - - int32_t x0 = input[0]; - int32_t x1 = input[1]; - int32_t x2 = input[2]; - int32_t x3 = input[3]; - - if (!(x0 | x1 | x2 | x3)) { - output[0] = output[1] = output[2] = output[3] = 0; - return; - } - - assert(sinpi[1] + sinpi[2] == sinpi[4]); - - // stage 1 - s0 = range_check_value(sinpi[1] * x0, stage_range[1] + bit); - s1 = range_check_value(sinpi[2] * x0, stage_range[1] + bit); - s2 = range_check_value(sinpi[3] * x1, stage_range[1] + bit); - s3 = range_check_value(sinpi[4] * x2, stage_range[1] + bit); - s4 = range_check_value(sinpi[1] * x2, stage_range[1] + bit); - s5 = range_check_value(sinpi[2] * x3, stage_range[1] + bit); - s6 = range_check_value(sinpi[4] * x3, stage_range[1] + bit); - - // stage 2 - // NOTICE: (x0 - x2) here may use one extra bit compared to the - // opt_range_row/col specified in av1_gen_inv_stage_range() - s7 = range_check_value((x0 - x2) + x3, stage_range[2]); - - // stage 3 - s0 = range_check_value(s0 + s3, stage_range[3] + bit); - s1 = range_check_value(s1 - s4, stage_range[3] + bit); - s3 = range_check_value(s2, stage_range[3] + bit); - s2 = range_check_value(sinpi[3] * s7, stage_range[3] + bit); - - // stage 4 - s0 = range_check_value(s0 + s5, stage_range[4] + bit); - s1 = range_check_value(s1 - s6, stage_range[4] + bit); - - // stage 5 - x0 = range_check_value(s0 + s3, stage_range[5] + bit); - x1 = range_check_value(s1 + s3, stage_range[5] + bit); - x2 = range_check_value(s2, stage_range[5] + bit); - x3 = range_check_value(s0 + s1, stage_range[5] + bit); - - // stage 6 - x3 = range_check_value(x3 - s3, stage_range[6] + bit); - - output[0] = round_shift(x0, bit); - output[1] = round_shift(x1, bit); - output[2] = round_shift(x2, bit); - output[3] = round_shift(x3, bit); -} - -void av1_iadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - assert(output != input); - const int32_t size = 8; - const int32_t *cospi = cospi_arr(cos_bit); - - int32_t stage = 0; - int32_t *bf0, *bf1; - int32_t step[8]; - - // stage 0; - - // stage 1; - stage++; - bf1 = output; - bf1[0] = input[7]; - bf1[1] = input[0]; - bf1[2] = input[5]; - bf1[3] = input[2]; - bf1[4] = input[3]; - bf1[5] = input[4]; - bf1[6] = input[1]; - bf1[7] = input[6]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 2 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit); - bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit); - bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit); - bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit); - bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit); - bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit); - bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 3 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = clamp_value(bf0[0] + bf0[4], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[5], stage_range[stage]); - bf1[2] = clamp_value(bf0[2] + bf0[6], stage_range[stage]); - bf1[3] = clamp_value(bf0[3] + bf0[7], stage_range[stage]); - bf1[4] = clamp_value(bf0[0] - bf0[4], stage_range[stage]); - bf1[5] = clamp_value(bf0[1] - bf0[5], stage_range[stage]); - bf1[6] = clamp_value(bf0[2] - bf0[6], stage_range[stage]); - bf1[7] = clamp_value(bf0[3] - bf0[7], stage_range[stage]); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 4 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit); - bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit); - bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit); - bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 5 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = clamp_value(bf0[0] + bf0[2], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[3], stage_range[stage]); - bf1[2] = clamp_value(bf0[0] - bf0[2], stage_range[stage]); - bf1[3] = clamp_value(bf0[1] - bf0[3], stage_range[stage]); - bf1[4] = clamp_value(bf0[4] + bf0[6], stage_range[stage]); - bf1[5] = clamp_value(bf0[5] + bf0[7], stage_range[stage]); - bf1[6] = clamp_value(bf0[4] - bf0[6], stage_range[stage]); - bf1[7] = clamp_value(bf0[5] - bf0[7], stage_range[stage]); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 6 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit); - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit); - bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 7 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[0]; - bf1[1] = -bf0[4]; - bf1[2] = bf0[6]; - bf1[3] = -bf0[2]; - bf1[4] = bf0[3]; - bf1[5] = -bf0[7]; - bf1[6] = bf0[5]; - bf1[7] = -bf0[1]; -} - -void av1_iadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - assert(output != input); - const int32_t size = 16; - const int32_t *cospi = cospi_arr(cos_bit); - - int32_t stage = 0; - int32_t *bf0, *bf1; - int32_t step[16]; - - // stage 0; - - // stage 1; - stage++; - bf1 = output; - bf1[0] = input[15]; - bf1[1] = input[0]; - bf1[2] = input[13]; - bf1[3] = input[2]; - bf1[4] = input[11]; - bf1[5] = input[4]; - bf1[6] = input[9]; - bf1[7] = input[6]; - bf1[8] = input[7]; - bf1[9] = input[8]; - bf1[10] = input[5]; - bf1[11] = input[10]; - bf1[12] = input[3]; - bf1[13] = input[12]; - bf1[14] = input[1]; - bf1[15] = input[14]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 2 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit); - bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit); - bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit); - bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit); - bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit); - bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit); - bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit); - bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit); - bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit); - bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit); - bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit); - bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit); - bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit); - bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit); - bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 3 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = clamp_value(bf0[0] + bf0[8], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[9], stage_range[stage]); - bf1[2] = clamp_value(bf0[2] + bf0[10], stage_range[stage]); - bf1[3] = clamp_value(bf0[3] + bf0[11], stage_range[stage]); - bf1[4] = clamp_value(bf0[4] + bf0[12], stage_range[stage]); - bf1[5] = clamp_value(bf0[5] + bf0[13], stage_range[stage]); - bf1[6] = clamp_value(bf0[6] + bf0[14], stage_range[stage]); - bf1[7] = clamp_value(bf0[7] + bf0[15], stage_range[stage]); - bf1[8] = clamp_value(bf0[0] - bf0[8], stage_range[stage]); - bf1[9] = clamp_value(bf0[1] - bf0[9], stage_range[stage]); - bf1[10] = clamp_value(bf0[2] - bf0[10], stage_range[stage]); - bf1[11] = clamp_value(bf0[3] - bf0[11], stage_range[stage]); - bf1[12] = clamp_value(bf0[4] - bf0[12], stage_range[stage]); - bf1[13] = clamp_value(bf0[5] - bf0[13], stage_range[stage]); - bf1[14] = clamp_value(bf0[6] - bf0[14], stage_range[stage]); - bf1[15] = clamp_value(bf0[7] - bf0[15], stage_range[stage]); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 4 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = bf0[6]; - bf1[7] = bf0[7]; - bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit); - bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit); - bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit); - bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit); - bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit); - bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit); - bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit); - bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 5 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = clamp_value(bf0[0] + bf0[4], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[5], stage_range[stage]); - bf1[2] = clamp_value(bf0[2] + bf0[6], stage_range[stage]); - bf1[3] = clamp_value(bf0[3] + bf0[7], stage_range[stage]); - bf1[4] = clamp_value(bf0[0] - bf0[4], stage_range[stage]); - bf1[5] = clamp_value(bf0[1] - bf0[5], stage_range[stage]); - bf1[6] = clamp_value(bf0[2] - bf0[6], stage_range[stage]); - bf1[7] = clamp_value(bf0[3] - bf0[7], stage_range[stage]); - bf1[8] = clamp_value(bf0[8] + bf0[12], stage_range[stage]); - bf1[9] = clamp_value(bf0[9] + bf0[13], stage_range[stage]); - bf1[10] = clamp_value(bf0[10] + bf0[14], stage_range[stage]); - bf1[11] = clamp_value(bf0[11] + bf0[15], stage_range[stage]); - bf1[12] = clamp_value(bf0[8] - bf0[12], stage_range[stage]); - bf1[13] = clamp_value(bf0[9] - bf0[13], stage_range[stage]); - bf1[14] = clamp_value(bf0[10] - bf0[14], stage_range[stage]); - bf1[15] = clamp_value(bf0[11] - bf0[15], stage_range[stage]); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 6 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit); - bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit); - bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit); - bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit); - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = bf0[10]; - bf1[11] = bf0[11]; - bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit); - bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit); - bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit); - bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 7 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = clamp_value(bf0[0] + bf0[2], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[3], stage_range[stage]); - bf1[2] = clamp_value(bf0[0] - bf0[2], stage_range[stage]); - bf1[3] = clamp_value(bf0[1] - bf0[3], stage_range[stage]); - bf1[4] = clamp_value(bf0[4] + bf0[6], stage_range[stage]); - bf1[5] = clamp_value(bf0[5] + bf0[7], stage_range[stage]); - bf1[6] = clamp_value(bf0[4] - bf0[6], stage_range[stage]); - bf1[7] = clamp_value(bf0[5] - bf0[7], stage_range[stage]); - bf1[8] = clamp_value(bf0[8] + bf0[10], stage_range[stage]); - bf1[9] = clamp_value(bf0[9] + bf0[11], stage_range[stage]); - bf1[10] = clamp_value(bf0[8] - bf0[10], stage_range[stage]); - bf1[11] = clamp_value(bf0[9] - bf0[11], stage_range[stage]); - bf1[12] = clamp_value(bf0[12] + bf0[14], stage_range[stage]); - bf1[13] = clamp_value(bf0[13] + bf0[15], stage_range[stage]); - bf1[14] = clamp_value(bf0[12] - bf0[14], stage_range[stage]); - bf1[15] = clamp_value(bf0[13] - bf0[15], stage_range[stage]); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 8 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit); - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit); - bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit); - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit); - bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit); - bf1[12] = bf0[12]; - bf1[13] = bf0[13]; - bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit); - bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 9 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[0]; - bf1[1] = -bf0[8]; - bf1[2] = bf0[12]; - bf1[3] = -bf0[4]; - bf1[4] = bf0[6]; - bf1[5] = -bf0[14]; - bf1[6] = bf0[10]; - bf1[7] = -bf0[2]; - bf1[8] = bf0[3]; - bf1[9] = -bf0[11]; - bf1[10] = bf0[15]; - bf1[11] = -bf0[7]; - bf1[12] = bf0[5]; - bf1[13] = -bf0[13]; - bf1[14] = bf0[9]; - bf1[15] = -bf0[1]; -} - -void av1_iidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - (void)cos_bit; - (void)stage_range; - for (int i = 0; i < 4; ++i) { - output[i] = round_shift((int64_t)NewSqrt2 * input[i], NewSqrt2Bits); - } - assert(stage_range[0] + NewSqrt2Bits <= 32); -} - -void av1_iidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - (void)cos_bit; - (void)stage_range; - for (int i = 0; i < 8; ++i) output[i] = (int32_t)((int64_t)input[i] * 2); -} - -void av1_iidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - (void)cos_bit; - (void)stage_range; - for (int i = 0; i < 16; ++i) - output[i] = round_shift((int64_t)NewSqrt2 * 2 * input[i], NewSqrt2Bits); - assert(stage_range[0] + NewSqrt2Bits <= 32); -} - -void av1_iidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - (void)cos_bit; - (void)stage_range; - for (int i = 0; i < 32; ++i) output[i] = (int32_t)((int64_t)input[i] * 4); -} - -void av1_idct64_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - assert(output != input); - const int32_t size = 64; - const int32_t *cospi = cospi_arr(cos_bit); - - int32_t stage = 0; - int32_t *bf0, *bf1; - int32_t step[64]; - - // stage 0; - - // stage 1; - stage++; - bf1 = output; - bf1[0] = input[0]; - bf1[1] = input[32]; - bf1[2] = input[16]; - bf1[3] = input[48]; - bf1[4] = input[8]; - bf1[5] = input[40]; - bf1[6] = input[24]; - bf1[7] = input[56]; - bf1[8] = input[4]; - bf1[9] = input[36]; - bf1[10] = input[20]; - bf1[11] = input[52]; - bf1[12] = input[12]; - bf1[13] = input[44]; - bf1[14] = input[28]; - bf1[15] = input[60]; - bf1[16] = input[2]; - bf1[17] = input[34]; - bf1[18] = input[18]; - bf1[19] = input[50]; - bf1[20] = input[10]; - bf1[21] = input[42]; - bf1[22] = input[26]; - bf1[23] = input[58]; - bf1[24] = input[6]; - bf1[25] = input[38]; - bf1[26] = input[22]; - bf1[27] = input[54]; - bf1[28] = input[14]; - bf1[29] = input[46]; - bf1[30] = input[30]; - bf1[31] = input[62]; - bf1[32] = input[1]; - bf1[33] = input[33]; - bf1[34] = input[17]; - bf1[35] = input[49]; - bf1[36] = input[9]; - bf1[37] = input[41]; - bf1[38] = input[25]; - bf1[39] = input[57]; - bf1[40] = input[5]; - bf1[41] = input[37]; - bf1[42] = input[21]; - bf1[43] = input[53]; - bf1[44] = input[13]; - bf1[45] = input[45]; - bf1[46] = input[29]; - bf1[47] = input[61]; - bf1[48] = input[3]; - bf1[49] = input[35]; - bf1[50] = input[19]; - bf1[51] = input[51]; - bf1[52] = input[11]; - bf1[53] = input[43]; - bf1[54] = input[27]; - bf1[55] = input[59]; - bf1[56] = input[7]; - bf1[57] = input[39]; - bf1[58] = input[23]; - bf1[59] = input[55]; - bf1[60] = input[15]; - bf1[61] = input[47]; - bf1[62] = input[31]; - bf1[63] = input[63]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 2 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = bf0[6]; - bf1[7] = bf0[7]; - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = bf0[10]; - bf1[11] = bf0[11]; - bf1[12] = bf0[12]; - bf1[13] = bf0[13]; - bf1[14] = bf0[14]; - bf1[15] = bf0[15]; - bf1[16] = bf0[16]; - bf1[17] = bf0[17]; - bf1[18] = bf0[18]; - bf1[19] = bf0[19]; - bf1[20] = bf0[20]; - bf1[21] = bf0[21]; - bf1[22] = bf0[22]; - bf1[23] = bf0[23]; - bf1[24] = bf0[24]; - bf1[25] = bf0[25]; - bf1[26] = bf0[26]; - bf1[27] = bf0[27]; - bf1[28] = bf0[28]; - bf1[29] = bf0[29]; - bf1[30] = bf0[30]; - bf1[31] = bf0[31]; - bf1[32] = half_btf(cospi[63], bf0[32], -cospi[1], bf0[63], cos_bit); - bf1[33] = half_btf(cospi[31], bf0[33], -cospi[33], bf0[62], cos_bit); - bf1[34] = half_btf(cospi[47], bf0[34], -cospi[17], bf0[61], cos_bit); - bf1[35] = half_btf(cospi[15], bf0[35], -cospi[49], bf0[60], cos_bit); - bf1[36] = half_btf(cospi[55], bf0[36], -cospi[9], bf0[59], cos_bit); - bf1[37] = half_btf(cospi[23], bf0[37], -cospi[41], bf0[58], cos_bit); - bf1[38] = half_btf(cospi[39], bf0[38], -cospi[25], bf0[57], cos_bit); - bf1[39] = half_btf(cospi[7], bf0[39], -cospi[57], bf0[56], cos_bit); - bf1[40] = half_btf(cospi[59], bf0[40], -cospi[5], bf0[55], cos_bit); - bf1[41] = half_btf(cospi[27], bf0[41], -cospi[37], bf0[54], cos_bit); - bf1[42] = half_btf(cospi[43], bf0[42], -cospi[21], bf0[53], cos_bit); - bf1[43] = half_btf(cospi[11], bf0[43], -cospi[53], bf0[52], cos_bit); - bf1[44] = half_btf(cospi[51], bf0[44], -cospi[13], bf0[51], cos_bit); - bf1[45] = half_btf(cospi[19], bf0[45], -cospi[45], bf0[50], cos_bit); - bf1[46] = half_btf(cospi[35], bf0[46], -cospi[29], bf0[49], cos_bit); - bf1[47] = half_btf(cospi[3], bf0[47], -cospi[61], bf0[48], cos_bit); - bf1[48] = half_btf(cospi[61], bf0[47], cospi[3], bf0[48], cos_bit); - bf1[49] = half_btf(cospi[29], bf0[46], cospi[35], bf0[49], cos_bit); - bf1[50] = half_btf(cospi[45], bf0[45], cospi[19], bf0[50], cos_bit); - bf1[51] = half_btf(cospi[13], bf0[44], cospi[51], bf0[51], cos_bit); - bf1[52] = half_btf(cospi[53], bf0[43], cospi[11], bf0[52], cos_bit); - bf1[53] = half_btf(cospi[21], bf0[42], cospi[43], bf0[53], cos_bit); - bf1[54] = half_btf(cospi[37], bf0[41], cospi[27], bf0[54], cos_bit); - bf1[55] = half_btf(cospi[5], bf0[40], cospi[59], bf0[55], cos_bit); - bf1[56] = half_btf(cospi[57], bf0[39], cospi[7], bf0[56], cos_bit); - bf1[57] = half_btf(cospi[25], bf0[38], cospi[39], bf0[57], cos_bit); - bf1[58] = half_btf(cospi[41], bf0[37], cospi[23], bf0[58], cos_bit); - bf1[59] = half_btf(cospi[9], bf0[36], cospi[55], bf0[59], cos_bit); - bf1[60] = half_btf(cospi[49], bf0[35], cospi[15], bf0[60], cos_bit); - bf1[61] = half_btf(cospi[17], bf0[34], cospi[47], bf0[61], cos_bit); - bf1[62] = half_btf(cospi[33], bf0[33], cospi[31], bf0[62], cos_bit); - bf1[63] = half_btf(cospi[1], bf0[32], cospi[63], bf0[63], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 3 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = bf0[6]; - bf1[7] = bf0[7]; - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = bf0[10]; - bf1[11] = bf0[11]; - bf1[12] = bf0[12]; - bf1[13] = bf0[13]; - bf1[14] = bf0[14]; - bf1[15] = bf0[15]; - bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit); - bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit); - bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit); - bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit); - bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit); - bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit); - bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit); - bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit); - bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit); - bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit); - bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit); - bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit); - bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit); - bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit); - bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit); - bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit); - bf1[32] = clamp_value(bf0[32] + bf0[33], stage_range[stage]); - bf1[33] = clamp_value(bf0[32] - bf0[33], stage_range[stage]); - bf1[34] = clamp_value(-bf0[34] + bf0[35], stage_range[stage]); - bf1[35] = clamp_value(bf0[34] + bf0[35], stage_range[stage]); - bf1[36] = clamp_value(bf0[36] + bf0[37], stage_range[stage]); - bf1[37] = clamp_value(bf0[36] - bf0[37], stage_range[stage]); - bf1[38] = clamp_value(-bf0[38] + bf0[39], stage_range[stage]); - bf1[39] = clamp_value(bf0[38] + bf0[39], stage_range[stage]); - bf1[40] = clamp_value(bf0[40] + bf0[41], stage_range[stage]); - bf1[41] = clamp_value(bf0[40] - bf0[41], stage_range[stage]); - bf1[42] = clamp_value(-bf0[42] + bf0[43], stage_range[stage]); - bf1[43] = clamp_value(bf0[42] + bf0[43], stage_range[stage]); - bf1[44] = clamp_value(bf0[44] + bf0[45], stage_range[stage]); - bf1[45] = clamp_value(bf0[44] - bf0[45], stage_range[stage]); - bf1[46] = clamp_value(-bf0[46] + bf0[47], stage_range[stage]); - bf1[47] = clamp_value(bf0[46] + bf0[47], stage_range[stage]); - bf1[48] = clamp_value(bf0[48] + bf0[49], stage_range[stage]); - bf1[49] = clamp_value(bf0[48] - bf0[49], stage_range[stage]); - bf1[50] = clamp_value(-bf0[50] + bf0[51], stage_range[stage]); - bf1[51] = clamp_value(bf0[50] + bf0[51], stage_range[stage]); - bf1[52] = clamp_value(bf0[52] + bf0[53], stage_range[stage]); - bf1[53] = clamp_value(bf0[52] - bf0[53], stage_range[stage]); - bf1[54] = clamp_value(-bf0[54] + bf0[55], stage_range[stage]); - bf1[55] = clamp_value(bf0[54] + bf0[55], stage_range[stage]); - bf1[56] = clamp_value(bf0[56] + bf0[57], stage_range[stage]); - bf1[57] = clamp_value(bf0[56] - bf0[57], stage_range[stage]); - bf1[58] = clamp_value(-bf0[58] + bf0[59], stage_range[stage]); - bf1[59] = clamp_value(bf0[58] + bf0[59], stage_range[stage]); - bf1[60] = clamp_value(bf0[60] + bf0[61], stage_range[stage]); - bf1[61] = clamp_value(bf0[60] - bf0[61], stage_range[stage]); - bf1[62] = clamp_value(-bf0[62] + bf0[63], stage_range[stage]); - bf1[63] = clamp_value(bf0[62] + bf0[63], stage_range[stage]); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 4 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = bf0[6]; - bf1[7] = bf0[7]; - bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit); - bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit); - bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit); - bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit); - bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit); - bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit); - bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit); - bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit); - bf1[16] = clamp_value(bf0[16] + bf0[17], stage_range[stage]); - bf1[17] = clamp_value(bf0[16] - bf0[17], stage_range[stage]); - bf1[18] = clamp_value(-bf0[18] + bf0[19], stage_range[stage]); - bf1[19] = clamp_value(bf0[18] + bf0[19], stage_range[stage]); - bf1[20] = clamp_value(bf0[20] + bf0[21], stage_range[stage]); - bf1[21] = clamp_value(bf0[20] - bf0[21], stage_range[stage]); - bf1[22] = clamp_value(-bf0[22] + bf0[23], stage_range[stage]); - bf1[23] = clamp_value(bf0[22] + bf0[23], stage_range[stage]); - bf1[24] = clamp_value(bf0[24] + bf0[25], stage_range[stage]); - bf1[25] = clamp_value(bf0[24] - bf0[25], stage_range[stage]); - bf1[26] = clamp_value(-bf0[26] + bf0[27], stage_range[stage]); - bf1[27] = clamp_value(bf0[26] + bf0[27], stage_range[stage]); - bf1[28] = clamp_value(bf0[28] + bf0[29], stage_range[stage]); - bf1[29] = clamp_value(bf0[28] - bf0[29], stage_range[stage]); - bf1[30] = clamp_value(-bf0[30] + bf0[31], stage_range[stage]); - bf1[31] = clamp_value(bf0[30] + bf0[31], stage_range[stage]); - bf1[32] = bf0[32]; - bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit); - bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit); - bf1[35] = bf0[35]; - bf1[36] = bf0[36]; - bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit); - bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit); - bf1[39] = bf0[39]; - bf1[40] = bf0[40]; - bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit); - bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit); - bf1[43] = bf0[43]; - bf1[44] = bf0[44]; - bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit); - bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit); - bf1[47] = bf0[47]; - bf1[48] = bf0[48]; - bf1[49] = half_btf(-cospi[52], bf0[46], cospi[12], bf0[49], cos_bit); - bf1[50] = half_btf(cospi[12], bf0[45], cospi[52], bf0[50], cos_bit); - bf1[51] = bf0[51]; - bf1[52] = bf0[52]; - bf1[53] = half_btf(-cospi[20], bf0[42], cospi[44], bf0[53], cos_bit); - bf1[54] = half_btf(cospi[44], bf0[41], cospi[20], bf0[54], cos_bit); - bf1[55] = bf0[55]; - bf1[56] = bf0[56]; - bf1[57] = half_btf(-cospi[36], bf0[38], cospi[28], bf0[57], cos_bit); - bf1[58] = half_btf(cospi[28], bf0[37], cospi[36], bf0[58], cos_bit); - bf1[59] = bf0[59]; - bf1[60] = bf0[60]; - bf1[61] = half_btf(-cospi[4], bf0[34], cospi[60], bf0[61], cos_bit); - bf1[62] = half_btf(cospi[60], bf0[33], cospi[4], bf0[62], cos_bit); - bf1[63] = bf0[63]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 5 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit); - bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit); - bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit); - bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit); - bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]); - bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]); - bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]); - bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]); - bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]); - bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]); - bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]); - bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]); - bf1[16] = bf0[16]; - bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit); - bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit); - bf1[19] = bf0[19]; - bf1[20] = bf0[20]; - bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit); - bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit); - bf1[23] = bf0[23]; - bf1[24] = bf0[24]; - bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit); - bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit); - bf1[27] = bf0[27]; - bf1[28] = bf0[28]; - bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit); - bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit); - bf1[31] = bf0[31]; - bf1[32] = clamp_value(bf0[32] + bf0[35], stage_range[stage]); - bf1[33] = clamp_value(bf0[33] + bf0[34], stage_range[stage]); - bf1[34] = clamp_value(bf0[33] - bf0[34], stage_range[stage]); - bf1[35] = clamp_value(bf0[32] - bf0[35], stage_range[stage]); - bf1[36] = clamp_value(-bf0[36] + bf0[39], stage_range[stage]); - bf1[37] = clamp_value(-bf0[37] + bf0[38], stage_range[stage]); - bf1[38] = clamp_value(bf0[37] + bf0[38], stage_range[stage]); - bf1[39] = clamp_value(bf0[36] + bf0[39], stage_range[stage]); - bf1[40] = clamp_value(bf0[40] + bf0[43], stage_range[stage]); - bf1[41] = clamp_value(bf0[41] + bf0[42], stage_range[stage]); - bf1[42] = clamp_value(bf0[41] - bf0[42], stage_range[stage]); - bf1[43] = clamp_value(bf0[40] - bf0[43], stage_range[stage]); - bf1[44] = clamp_value(-bf0[44] + bf0[47], stage_range[stage]); - bf1[45] = clamp_value(-bf0[45] + bf0[46], stage_range[stage]); - bf1[46] = clamp_value(bf0[45] + bf0[46], stage_range[stage]); - bf1[47] = clamp_value(bf0[44] + bf0[47], stage_range[stage]); - bf1[48] = clamp_value(bf0[48] + bf0[51], stage_range[stage]); - bf1[49] = clamp_value(bf0[49] + bf0[50], stage_range[stage]); - bf1[50] = clamp_value(bf0[49] - bf0[50], stage_range[stage]); - bf1[51] = clamp_value(bf0[48] - bf0[51], stage_range[stage]); - bf1[52] = clamp_value(-bf0[52] + bf0[55], stage_range[stage]); - bf1[53] = clamp_value(-bf0[53] + bf0[54], stage_range[stage]); - bf1[54] = clamp_value(bf0[53] + bf0[54], stage_range[stage]); - bf1[55] = clamp_value(bf0[52] + bf0[55], stage_range[stage]); - bf1[56] = clamp_value(bf0[56] + bf0[59], stage_range[stage]); - bf1[57] = clamp_value(bf0[57] + bf0[58], stage_range[stage]); - bf1[58] = clamp_value(bf0[57] - bf0[58], stage_range[stage]); - bf1[59] = clamp_value(bf0[56] - bf0[59], stage_range[stage]); - bf1[60] = clamp_value(-bf0[60] + bf0[63], stage_range[stage]); - bf1[61] = clamp_value(-bf0[61] + bf0[62], stage_range[stage]); - bf1[62] = clamp_value(bf0[61] + bf0[62], stage_range[stage]); - bf1[63] = clamp_value(bf0[60] + bf0[63], stage_range[stage]); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 6 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); - bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit); - bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit); - bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]); - bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]); - bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]); - bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]); - bf1[8] = bf0[8]; - bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit); - bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit); - bf1[11] = bf0[11]; - bf1[12] = bf0[12]; - bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit); - bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit); - bf1[15] = bf0[15]; - bf1[16] = clamp_value(bf0[16] + bf0[19], stage_range[stage]); - bf1[17] = clamp_value(bf0[17] + bf0[18], stage_range[stage]); - bf1[18] = clamp_value(bf0[17] - bf0[18], stage_range[stage]); - bf1[19] = clamp_value(bf0[16] - bf0[19], stage_range[stage]); - bf1[20] = clamp_value(-bf0[20] + bf0[23], stage_range[stage]); - bf1[21] = clamp_value(-bf0[21] + bf0[22], stage_range[stage]); - bf1[22] = clamp_value(bf0[21] + bf0[22], stage_range[stage]); - bf1[23] = clamp_value(bf0[20] + bf0[23], stage_range[stage]); - bf1[24] = clamp_value(bf0[24] + bf0[27], stage_range[stage]); - bf1[25] = clamp_value(bf0[25] + bf0[26], stage_range[stage]); - bf1[26] = clamp_value(bf0[25] - bf0[26], stage_range[stage]); - bf1[27] = clamp_value(bf0[24] - bf0[27], stage_range[stage]); - bf1[28] = clamp_value(-bf0[28] + bf0[31], stage_range[stage]); - bf1[29] = clamp_value(-bf0[29] + bf0[30], stage_range[stage]); - bf1[30] = clamp_value(bf0[29] + bf0[30], stage_range[stage]); - bf1[31] = clamp_value(bf0[28] + bf0[31], stage_range[stage]); - bf1[32] = bf0[32]; - bf1[33] = bf0[33]; - bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit); - bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit); - bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit); - bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit); - bf1[38] = bf0[38]; - bf1[39] = bf0[39]; - bf1[40] = bf0[40]; - bf1[41] = bf0[41]; - bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit); - bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit); - bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit); - bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit); - bf1[46] = bf0[46]; - bf1[47] = bf0[47]; - bf1[48] = bf0[48]; - bf1[49] = bf0[49]; - bf1[50] = half_btf(-cospi[40], bf0[45], cospi[24], bf0[50], cos_bit); - bf1[51] = half_btf(-cospi[40], bf0[44], cospi[24], bf0[51], cos_bit); - bf1[52] = half_btf(cospi[24], bf0[43], cospi[40], bf0[52], cos_bit); - bf1[53] = half_btf(cospi[24], bf0[42], cospi[40], bf0[53], cos_bit); - bf1[54] = bf0[54]; - bf1[55] = bf0[55]; - bf1[56] = bf0[56]; - bf1[57] = bf0[57]; - bf1[58] = half_btf(-cospi[8], bf0[37], cospi[56], bf0[58], cos_bit); - bf1[59] = half_btf(-cospi[8], bf0[36], cospi[56], bf0[59], cos_bit); - bf1[60] = half_btf(cospi[56], bf0[35], cospi[8], bf0[60], cos_bit); - bf1[61] = half_btf(cospi[56], bf0[34], cospi[8], bf0[61], cos_bit); - bf1[62] = bf0[62]; - bf1[63] = bf0[63]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 7 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]); - bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]); - bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]); - bf1[4] = bf0[4]; - bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); - bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); - bf1[7] = bf0[7]; - bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]); - bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]); - bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]); - bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]); - bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]); - bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]); - bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]); - bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]); - bf1[16] = bf0[16]; - bf1[17] = bf0[17]; - bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit); - bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit); - bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit); - bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit); - bf1[22] = bf0[22]; - bf1[23] = bf0[23]; - bf1[24] = bf0[24]; - bf1[25] = bf0[25]; - bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit); - bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit); - bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit); - bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit); - bf1[30] = bf0[30]; - bf1[31] = bf0[31]; - bf1[32] = clamp_value(bf0[32] + bf0[39], stage_range[stage]); - bf1[33] = clamp_value(bf0[33] + bf0[38], stage_range[stage]); - bf1[34] = clamp_value(bf0[34] + bf0[37], stage_range[stage]); - bf1[35] = clamp_value(bf0[35] + bf0[36], stage_range[stage]); - bf1[36] = clamp_value(bf0[35] - bf0[36], stage_range[stage]); - bf1[37] = clamp_value(bf0[34] - bf0[37], stage_range[stage]); - bf1[38] = clamp_value(bf0[33] - bf0[38], stage_range[stage]); - bf1[39] = clamp_value(bf0[32] - bf0[39], stage_range[stage]); - bf1[40] = clamp_value(-bf0[40] + bf0[47], stage_range[stage]); - bf1[41] = clamp_value(-bf0[41] + bf0[46], stage_range[stage]); - bf1[42] = clamp_value(-bf0[42] + bf0[45], stage_range[stage]); - bf1[43] = clamp_value(-bf0[43] + bf0[44], stage_range[stage]); - bf1[44] = clamp_value(bf0[43] + bf0[44], stage_range[stage]); - bf1[45] = clamp_value(bf0[42] + bf0[45], stage_range[stage]); - bf1[46] = clamp_value(bf0[41] + bf0[46], stage_range[stage]); - bf1[47] = clamp_value(bf0[40] + bf0[47], stage_range[stage]); - bf1[48] = clamp_value(bf0[48] + bf0[55], stage_range[stage]); - bf1[49] = clamp_value(bf0[49] + bf0[54], stage_range[stage]); - bf1[50] = clamp_value(bf0[50] + bf0[53], stage_range[stage]); - bf1[51] = clamp_value(bf0[51] + bf0[52], stage_range[stage]); - bf1[52] = clamp_value(bf0[51] - bf0[52], stage_range[stage]); - bf1[53] = clamp_value(bf0[50] - bf0[53], stage_range[stage]); - bf1[54] = clamp_value(bf0[49] - bf0[54], stage_range[stage]); - bf1[55] = clamp_value(bf0[48] - bf0[55], stage_range[stage]); - bf1[56] = clamp_value(-bf0[56] + bf0[63], stage_range[stage]); - bf1[57] = clamp_value(-bf0[57] + bf0[62], stage_range[stage]); - bf1[58] = clamp_value(-bf0[58] + bf0[61], stage_range[stage]); - bf1[59] = clamp_value(-bf0[59] + bf0[60], stage_range[stage]); - bf1[60] = clamp_value(bf0[59] + bf0[60], stage_range[stage]); - bf1[61] = clamp_value(bf0[58] + bf0[61], stage_range[stage]); - bf1[62] = clamp_value(bf0[57] + bf0[62], stage_range[stage]); - bf1[63] = clamp_value(bf0[56] + bf0[63], stage_range[stage]); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 8 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]); - bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]); - bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]); - bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]); - bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]); - bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]); - bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]); - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); - bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); - bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); - bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); - bf1[14] = bf0[14]; - bf1[15] = bf0[15]; - bf1[16] = clamp_value(bf0[16] + bf0[23], stage_range[stage]); - bf1[17] = clamp_value(bf0[17] + bf0[22], stage_range[stage]); - bf1[18] = clamp_value(bf0[18] + bf0[21], stage_range[stage]); - bf1[19] = clamp_value(bf0[19] + bf0[20], stage_range[stage]); - bf1[20] = clamp_value(bf0[19] - bf0[20], stage_range[stage]); - bf1[21] = clamp_value(bf0[18] - bf0[21], stage_range[stage]); - bf1[22] = clamp_value(bf0[17] - bf0[22], stage_range[stage]); - bf1[23] = clamp_value(bf0[16] - bf0[23], stage_range[stage]); - bf1[24] = clamp_value(-bf0[24] + bf0[31], stage_range[stage]); - bf1[25] = clamp_value(-bf0[25] + bf0[30], stage_range[stage]); - bf1[26] = clamp_value(-bf0[26] + bf0[29], stage_range[stage]); - bf1[27] = clamp_value(-bf0[27] + bf0[28], stage_range[stage]); - bf1[28] = clamp_value(bf0[27] + bf0[28], stage_range[stage]); - bf1[29] = clamp_value(bf0[26] + bf0[29], stage_range[stage]); - bf1[30] = clamp_value(bf0[25] + bf0[30], stage_range[stage]); - bf1[31] = clamp_value(bf0[24] + bf0[31], stage_range[stage]); - bf1[32] = bf0[32]; - bf1[33] = bf0[33]; - bf1[34] = bf0[34]; - bf1[35] = bf0[35]; - bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit); - bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit); - bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit); - bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit); - bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit); - bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit); - bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit); - bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit); - bf1[44] = bf0[44]; - bf1[45] = bf0[45]; - bf1[46] = bf0[46]; - bf1[47] = bf0[47]; - bf1[48] = bf0[48]; - bf1[49] = bf0[49]; - bf1[50] = bf0[50]; - bf1[51] = bf0[51]; - bf1[52] = half_btf(-cospi[16], bf0[43], cospi[48], bf0[52], cos_bit); - bf1[53] = half_btf(-cospi[16], bf0[42], cospi[48], bf0[53], cos_bit); - bf1[54] = half_btf(-cospi[16], bf0[41], cospi[48], bf0[54], cos_bit); - bf1[55] = half_btf(-cospi[16], bf0[40], cospi[48], bf0[55], cos_bit); - bf1[56] = half_btf(cospi[48], bf0[39], cospi[16], bf0[56], cos_bit); - bf1[57] = half_btf(cospi[48], bf0[38], cospi[16], bf0[57], cos_bit); - bf1[58] = half_btf(cospi[48], bf0[37], cospi[16], bf0[58], cos_bit); - bf1[59] = half_btf(cospi[48], bf0[36], cospi[16], bf0[59], cos_bit); - bf1[60] = bf0[60]; - bf1[61] = bf0[61]; - bf1[62] = bf0[62]; - bf1[63] = bf0[63]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 9 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]); - bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]); - bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]); - bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]); - bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]); - bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]); - bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]); - bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]); - bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]); - bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]); - bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]); - bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]); - bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]); - bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]); - bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]); - bf1[16] = bf0[16]; - bf1[17] = bf0[17]; - bf1[18] = bf0[18]; - bf1[19] = bf0[19]; - bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit); - bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit); - bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit); - bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit); - bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit); - bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit); - bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit); - bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit); - bf1[28] = bf0[28]; - bf1[29] = bf0[29]; - bf1[30] = bf0[30]; - bf1[31] = bf0[31]; - bf1[32] = clamp_value(bf0[32] + bf0[47], stage_range[stage]); - bf1[33] = clamp_value(bf0[33] + bf0[46], stage_range[stage]); - bf1[34] = clamp_value(bf0[34] + bf0[45], stage_range[stage]); - bf1[35] = clamp_value(bf0[35] + bf0[44], stage_range[stage]); - bf1[36] = clamp_value(bf0[36] + bf0[43], stage_range[stage]); - bf1[37] = clamp_value(bf0[37] + bf0[42], stage_range[stage]); - bf1[38] = clamp_value(bf0[38] + bf0[41], stage_range[stage]); - bf1[39] = clamp_value(bf0[39] + bf0[40], stage_range[stage]); - bf1[40] = clamp_value(bf0[39] - bf0[40], stage_range[stage]); - bf1[41] = clamp_value(bf0[38] - bf0[41], stage_range[stage]); - bf1[42] = clamp_value(bf0[37] - bf0[42], stage_range[stage]); - bf1[43] = clamp_value(bf0[36] - bf0[43], stage_range[stage]); - bf1[44] = clamp_value(bf0[35] - bf0[44], stage_range[stage]); - bf1[45] = clamp_value(bf0[34] - bf0[45], stage_range[stage]); - bf1[46] = clamp_value(bf0[33] - bf0[46], stage_range[stage]); - bf1[47] = clamp_value(bf0[32] - bf0[47], stage_range[stage]); - bf1[48] = clamp_value(-bf0[48] + bf0[63], stage_range[stage]); - bf1[49] = clamp_value(-bf0[49] + bf0[62], stage_range[stage]); - bf1[50] = clamp_value(-bf0[50] + bf0[61], stage_range[stage]); - bf1[51] = clamp_value(-bf0[51] + bf0[60], stage_range[stage]); - bf1[52] = clamp_value(-bf0[52] + bf0[59], stage_range[stage]); - bf1[53] = clamp_value(-bf0[53] + bf0[58], stage_range[stage]); - bf1[54] = clamp_value(-bf0[54] + bf0[57], stage_range[stage]); - bf1[55] = clamp_value(-bf0[55] + bf0[56], stage_range[stage]); - bf1[56] = clamp_value(bf0[55] + bf0[56], stage_range[stage]); - bf1[57] = clamp_value(bf0[54] + bf0[57], stage_range[stage]); - bf1[58] = clamp_value(bf0[53] + bf0[58], stage_range[stage]); - bf1[59] = clamp_value(bf0[52] + bf0[59], stage_range[stage]); - bf1[60] = clamp_value(bf0[51] + bf0[60], stage_range[stage]); - bf1[61] = clamp_value(bf0[50] + bf0[61], stage_range[stage]); - bf1[62] = clamp_value(bf0[49] + bf0[62], stage_range[stage]); - bf1[63] = clamp_value(bf0[48] + bf0[63], stage_range[stage]); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 10 - stage++; - bf0 = output; - bf1 = step; - bf1[0] = clamp_value(bf0[0] + bf0[31], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[30], stage_range[stage]); - bf1[2] = clamp_value(bf0[2] + bf0[29], stage_range[stage]); - bf1[3] = clamp_value(bf0[3] + bf0[28], stage_range[stage]); - bf1[4] = clamp_value(bf0[4] + bf0[27], stage_range[stage]); - bf1[5] = clamp_value(bf0[5] + bf0[26], stage_range[stage]); - bf1[6] = clamp_value(bf0[6] + bf0[25], stage_range[stage]); - bf1[7] = clamp_value(bf0[7] + bf0[24], stage_range[stage]); - bf1[8] = clamp_value(bf0[8] + bf0[23], stage_range[stage]); - bf1[9] = clamp_value(bf0[9] + bf0[22], stage_range[stage]); - bf1[10] = clamp_value(bf0[10] + bf0[21], stage_range[stage]); - bf1[11] = clamp_value(bf0[11] + bf0[20], stage_range[stage]); - bf1[12] = clamp_value(bf0[12] + bf0[19], stage_range[stage]); - bf1[13] = clamp_value(bf0[13] + bf0[18], stage_range[stage]); - bf1[14] = clamp_value(bf0[14] + bf0[17], stage_range[stage]); - bf1[15] = clamp_value(bf0[15] + bf0[16], stage_range[stage]); - bf1[16] = clamp_value(bf0[15] - bf0[16], stage_range[stage]); - bf1[17] = clamp_value(bf0[14] - bf0[17], stage_range[stage]); - bf1[18] = clamp_value(bf0[13] - bf0[18], stage_range[stage]); - bf1[19] = clamp_value(bf0[12] - bf0[19], stage_range[stage]); - bf1[20] = clamp_value(bf0[11] - bf0[20], stage_range[stage]); - bf1[21] = clamp_value(bf0[10] - bf0[21], stage_range[stage]); - bf1[22] = clamp_value(bf0[9] - bf0[22], stage_range[stage]); - bf1[23] = clamp_value(bf0[8] - bf0[23], stage_range[stage]); - bf1[24] = clamp_value(bf0[7] - bf0[24], stage_range[stage]); - bf1[25] = clamp_value(bf0[6] - bf0[25], stage_range[stage]); - bf1[26] = clamp_value(bf0[5] - bf0[26], stage_range[stage]); - bf1[27] = clamp_value(bf0[4] - bf0[27], stage_range[stage]); - bf1[28] = clamp_value(bf0[3] - bf0[28], stage_range[stage]); - bf1[29] = clamp_value(bf0[2] - bf0[29], stage_range[stage]); - bf1[30] = clamp_value(bf0[1] - bf0[30], stage_range[stage]); - bf1[31] = clamp_value(bf0[0] - bf0[31], stage_range[stage]); - bf1[32] = bf0[32]; - bf1[33] = bf0[33]; - bf1[34] = bf0[34]; - bf1[35] = bf0[35]; - bf1[36] = bf0[36]; - bf1[37] = bf0[37]; - bf1[38] = bf0[38]; - bf1[39] = bf0[39]; - bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit); - bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit); - bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit); - bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit); - bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit); - bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit); - bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit); - bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit); - bf1[48] = half_btf(cospi[32], bf0[47], cospi[32], bf0[48], cos_bit); - bf1[49] = half_btf(cospi[32], bf0[46], cospi[32], bf0[49], cos_bit); - bf1[50] = half_btf(cospi[32], bf0[45], cospi[32], bf0[50], cos_bit); - bf1[51] = half_btf(cospi[32], bf0[44], cospi[32], bf0[51], cos_bit); - bf1[52] = half_btf(cospi[32], bf0[43], cospi[32], bf0[52], cos_bit); - bf1[53] = half_btf(cospi[32], bf0[42], cospi[32], bf0[53], cos_bit); - bf1[54] = half_btf(cospi[32], bf0[41], cospi[32], bf0[54], cos_bit); - bf1[55] = half_btf(cospi[32], bf0[40], cospi[32], bf0[55], cos_bit); - bf1[56] = bf0[56]; - bf1[57] = bf0[57]; - bf1[58] = bf0[58]; - bf1[59] = bf0[59]; - bf1[60] = bf0[60]; - bf1[61] = bf0[61]; - bf1[62] = bf0[62]; - bf1[63] = bf0[63]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 11 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = clamp_value(bf0[0] + bf0[63], stage_range[stage]); - bf1[1] = clamp_value(bf0[1] + bf0[62], stage_range[stage]); - bf1[2] = clamp_value(bf0[2] + bf0[61], stage_range[stage]); - bf1[3] = clamp_value(bf0[3] + bf0[60], stage_range[stage]); - bf1[4] = clamp_value(bf0[4] + bf0[59], stage_range[stage]); - bf1[5] = clamp_value(bf0[5] + bf0[58], stage_range[stage]); - bf1[6] = clamp_value(bf0[6] + bf0[57], stage_range[stage]); - bf1[7] = clamp_value(bf0[7] + bf0[56], stage_range[stage]); - bf1[8] = clamp_value(bf0[8] + bf0[55], stage_range[stage]); - bf1[9] = clamp_value(bf0[9] + bf0[54], stage_range[stage]); - bf1[10] = clamp_value(bf0[10] + bf0[53], stage_range[stage]); - bf1[11] = clamp_value(bf0[11] + bf0[52], stage_range[stage]); - bf1[12] = clamp_value(bf0[12] + bf0[51], stage_range[stage]); - bf1[13] = clamp_value(bf0[13] + bf0[50], stage_range[stage]); - bf1[14] = clamp_value(bf0[14] + bf0[49], stage_range[stage]); - bf1[15] = clamp_value(bf0[15] + bf0[48], stage_range[stage]); - bf1[16] = clamp_value(bf0[16] + bf0[47], stage_range[stage]); - bf1[17] = clamp_value(bf0[17] + bf0[46], stage_range[stage]); - bf1[18] = clamp_value(bf0[18] + bf0[45], stage_range[stage]); - bf1[19] = clamp_value(bf0[19] + bf0[44], stage_range[stage]); - bf1[20] = clamp_value(bf0[20] + bf0[43], stage_range[stage]); - bf1[21] = clamp_value(bf0[21] + bf0[42], stage_range[stage]); - bf1[22] = clamp_value(bf0[22] + bf0[41], stage_range[stage]); - bf1[23] = clamp_value(bf0[23] + bf0[40], stage_range[stage]); - bf1[24] = clamp_value(bf0[24] + bf0[39], stage_range[stage]); - bf1[25] = clamp_value(bf0[25] + bf0[38], stage_range[stage]); - bf1[26] = clamp_value(bf0[26] + bf0[37], stage_range[stage]); - bf1[27] = clamp_value(bf0[27] + bf0[36], stage_range[stage]); - bf1[28] = clamp_value(bf0[28] + bf0[35], stage_range[stage]); - bf1[29] = clamp_value(bf0[29] + bf0[34], stage_range[stage]); - bf1[30] = clamp_value(bf0[30] + bf0[33], stage_range[stage]); - bf1[31] = clamp_value(bf0[31] + bf0[32], stage_range[stage]); - bf1[32] = clamp_value(bf0[31] - bf0[32], stage_range[stage]); - bf1[33] = clamp_value(bf0[30] - bf0[33], stage_range[stage]); - bf1[34] = clamp_value(bf0[29] - bf0[34], stage_range[stage]); - bf1[35] = clamp_value(bf0[28] - bf0[35], stage_range[stage]); - bf1[36] = clamp_value(bf0[27] - bf0[36], stage_range[stage]); - bf1[37] = clamp_value(bf0[26] - bf0[37], stage_range[stage]); - bf1[38] = clamp_value(bf0[25] - bf0[38], stage_range[stage]); - bf1[39] = clamp_value(bf0[24] - bf0[39], stage_range[stage]); - bf1[40] = clamp_value(bf0[23] - bf0[40], stage_range[stage]); - bf1[41] = clamp_value(bf0[22] - bf0[41], stage_range[stage]); - bf1[42] = clamp_value(bf0[21] - bf0[42], stage_range[stage]); - bf1[43] = clamp_value(bf0[20] - bf0[43], stage_range[stage]); - bf1[44] = clamp_value(bf0[19] - bf0[44], stage_range[stage]); - bf1[45] = clamp_value(bf0[18] - bf0[45], stage_range[stage]); - bf1[46] = clamp_value(bf0[17] - bf0[46], stage_range[stage]); - bf1[47] = clamp_value(bf0[16] - bf0[47], stage_range[stage]); - bf1[48] = clamp_value(bf0[15] - bf0[48], stage_range[stage]); - bf1[49] = clamp_value(bf0[14] - bf0[49], stage_range[stage]); - bf1[50] = clamp_value(bf0[13] - bf0[50], stage_range[stage]); - bf1[51] = clamp_value(bf0[12] - bf0[51], stage_range[stage]); - bf1[52] = clamp_value(bf0[11] - bf0[52], stage_range[stage]); - bf1[53] = clamp_value(bf0[10] - bf0[53], stage_range[stage]); - bf1[54] = clamp_value(bf0[9] - bf0[54], stage_range[stage]); - bf1[55] = clamp_value(bf0[8] - bf0[55], stage_range[stage]); - bf1[56] = clamp_value(bf0[7] - bf0[56], stage_range[stage]); - bf1[57] = clamp_value(bf0[6] - bf0[57], stage_range[stage]); - bf1[58] = clamp_value(bf0[5] - bf0[58], stage_range[stage]); - bf1[59] = clamp_value(bf0[4] - bf0[59], stage_range[stage]); - bf1[60] = clamp_value(bf0[3] - bf0[60], stage_range[stage]); - bf1[61] = clamp_value(bf0[2] - bf0[61], stage_range[stage]); - bf1[62] = clamp_value(bf0[1] - bf0[62], stage_range[stage]); - bf1[63] = clamp_value(bf0[0] - bf0[63], stage_range[stage]); -} diff --git a/third_party/aom/av1/common/av1_inv_txfm1d.h b/third_party/aom/av1/common/av1_inv_txfm1d.h deleted file mode 100644 index c31c019aa..000000000 --- a/third_party/aom/av1/common/av1_inv_txfm1d.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_AV1_INV_TXFM1D_H_ -#define AOM_AV1_COMMON_AV1_INV_TXFM1D_H_ - -#include "av1/common/av1_txfm.h" - -#ifdef __cplusplus -extern "C" { -#endif - -static INLINE int32_t clamp_value(int32_t value, int8_t bit) { - if (bit <= 0) return value; // Do nothing for invalid clamp bit. - const int64_t max_value = (1LL << (bit - 1)) - 1; - const int64_t min_value = -(1LL << (bit - 1)); - return (int32_t)clamp64(value, min_value, max_value); -} - -static INLINE void clamp_buf(int32_t *buf, int32_t size, int8_t bit) { - for (int i = 0; i < size; ++i) buf[i] = clamp_value(buf[i], bit); -} - -void av1_idct4_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_idct8_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_idct16_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_idct32_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_idct64_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_iadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_iadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_iadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_iidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_iidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_iidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_iidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); - -#ifdef __cplusplus -} -#endif - -#endif // AOM_AV1_COMMON_AV1_INV_TXFM1D_H_ diff --git a/third_party/aom/av1/common/av1_inv_txfm1d_cfg.h b/third_party/aom/av1/common/av1_inv_txfm1d_cfg.h deleted file mode 100644 index 7d80a0099..000000000 --- a/third_party/aom/av1/common/av1_inv_txfm1d_cfg.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_AV1_INV_TXFM1D_CFG_H_ -#define AOM_AV1_COMMON_AV1_INV_TXFM1D_CFG_H_ -#include "av1/common/av1_inv_txfm1d.h" - -// sum of fwd_shift_## -static const int8_t inv_start_range[TX_SIZES_ALL] = { - 5, // 4x4 transform - 6, // 8x8 transform - 7, // 16x16 transform - 7, // 32x32 transform - 7, // 64x64 transform - 5, // 4x8 transform - 5, // 8x4 transform - 6, // 8x16 transform - 6, // 16x8 transform - 6, // 16x32 transform - 6, // 32x16 transform - 6, // 32x64 transform - 6, // 64x32 transform - 6, // 4x16 transform - 6, // 16x4 transform - 7, // 8x32 transform - 7, // 32x8 transform - 7, // 16x64 transform - 7, // 64x16 transform -}; - -extern const int8_t *inv_txfm_shift_ls[TX_SIZES_ALL]; - -// Values in both inv_cos_bit_col and inv_cos_bit_row are always 12 -// for each valid row and col combination -#define INV_COS_BIT 12 -extern const int8_t inv_cos_bit_col[5 /*row*/][5 /*col*/]; -extern const int8_t inv_cos_bit_row[5 /*row*/][5 /*col*/]; - -#endif // AOM_AV1_COMMON_AV1_INV_TXFM1D_CFG_H_ diff --git a/third_party/aom/av1/common/av1_inv_txfm2d.c b/third_party/aom/av1/common/av1_inv_txfm2d.c deleted file mode 100644 index 4e6944314..000000000 --- a/third_party/aom/av1/common/av1_inv_txfm2d.c +++ /dev/null @@ -1,505 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "config/aom_dsp_rtcd.h" -#include "config/av1_rtcd.h" - -#include "av1/common/enums.h" -#include "av1/common/av1_txfm.h" -#include "av1/common/av1_inv_txfm1d.h" -#include "av1/common/av1_inv_txfm1d_cfg.h" - -void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, - 0.5 shifts per pixel. */ - int i; - tran_low_t output[16]; - tran_low_t a1, b1, c1, d1, e1; - const tran_low_t *ip = input; - tran_low_t *op = output; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - for (i = 0; i < 4; i++) { - a1 = ip[0] >> UNIT_QUANT_SHIFT; - c1 = ip[1] >> UNIT_QUANT_SHIFT; - d1 = ip[2] >> UNIT_QUANT_SHIFT; - b1 = ip[3] >> UNIT_QUANT_SHIFT; - a1 += c1; - d1 -= b1; - e1 = (a1 - d1) >> 1; - b1 = e1 - b1; - c1 = e1 - c1; - a1 -= b1; - d1 += c1; - - op[0] = a1; - op[1] = b1; - op[2] = c1; - op[3] = d1; - ip += 4; - op += 4; - } - - ip = output; - for (i = 0; i < 4; i++) { - a1 = ip[4 * 0]; - c1 = ip[4 * 1]; - d1 = ip[4 * 2]; - b1 = ip[4 * 3]; - a1 += c1; - d1 -= b1; - e1 = (a1 - d1) >> 1; - b1 = e1 - b1; - c1 = e1 - c1; - a1 -= b1; - d1 += c1; - - range_check_value(a1, bd + 1); - range_check_value(b1, bd + 1); - range_check_value(c1, bd + 1); - range_check_value(d1, bd + 1); - - dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd); - dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd); - dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd); - dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd); - - ip++; - dest++; - } -} - -void av1_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, - int dest_stride, int bd) { - int i; - tran_low_t a1, e1; - tran_low_t tmp[4]; - const tran_low_t *ip = in; - tran_low_t *op = tmp; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - (void)bd; - - a1 = ip[0] >> UNIT_QUANT_SHIFT; - e1 = a1 >> 1; - a1 -= e1; - op[0] = a1; - op[1] = op[2] = op[3] = e1; - - ip = tmp; - for (i = 0; i < 4; i++) { - e1 = ip[0] >> 1; - a1 = ip[0] - e1; - dest[dest_stride * 0] = - highbd_clip_pixel_add(dest[dest_stride * 0], a1, bd); - dest[dest_stride * 1] = - highbd_clip_pixel_add(dest[dest_stride * 1], e1, bd); - dest[dest_stride * 2] = - highbd_clip_pixel_add(dest[dest_stride * 2], e1, bd); - dest[dest_stride * 3] = - highbd_clip_pixel_add(dest[dest_stride * 3], e1, bd); - ip++; - dest++; - } -} - -static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) { - switch (txfm_type) { - case TXFM_TYPE_DCT4: return av1_idct4_new; - case TXFM_TYPE_DCT8: return av1_idct8_new; - case TXFM_TYPE_DCT16: return av1_idct16_new; - case TXFM_TYPE_DCT32: return av1_idct32_new; - case TXFM_TYPE_DCT64: return av1_idct64_new; - case TXFM_TYPE_ADST4: return av1_iadst4_new; - case TXFM_TYPE_ADST8: return av1_iadst8_new; - case TXFM_TYPE_ADST16: return av1_iadst16_new; - case TXFM_TYPE_IDENTITY4: return av1_iidentity4_c; - case TXFM_TYPE_IDENTITY8: return av1_iidentity8_c; - case TXFM_TYPE_IDENTITY16: return av1_iidentity16_c; - case TXFM_TYPE_IDENTITY32: return av1_iidentity32_c; - default: assert(0); return NULL; - } -} - -static const int8_t inv_shift_4x4[2] = { 0, -4 }; -static const int8_t inv_shift_8x8[2] = { -1, -4 }; -static const int8_t inv_shift_16x16[2] = { -2, -4 }; -static const int8_t inv_shift_32x32[2] = { -2, -4 }; -static const int8_t inv_shift_64x64[2] = { -2, -4 }; -static const int8_t inv_shift_4x8[2] = { 0, -4 }; -static const int8_t inv_shift_8x4[2] = { 0, -4 }; -static const int8_t inv_shift_8x16[2] = { -1, -4 }; -static const int8_t inv_shift_16x8[2] = { -1, -4 }; -static const int8_t inv_shift_16x32[2] = { -1, -4 }; -static const int8_t inv_shift_32x16[2] = { -1, -4 }; -static const int8_t inv_shift_32x64[2] = { -1, -4 }; -static const int8_t inv_shift_64x32[2] = { -1, -4 }; -static const int8_t inv_shift_4x16[2] = { -1, -4 }; -static const int8_t inv_shift_16x4[2] = { -1, -4 }; -static const int8_t inv_shift_8x32[2] = { -2, -4 }; -static const int8_t inv_shift_32x8[2] = { -2, -4 }; -static const int8_t inv_shift_16x64[2] = { -2, -4 }; -static const int8_t inv_shift_64x16[2] = { -2, -4 }; - -const int8_t *inv_txfm_shift_ls[TX_SIZES_ALL] = { - inv_shift_4x4, inv_shift_8x8, inv_shift_16x16, inv_shift_32x32, - inv_shift_64x64, inv_shift_4x8, inv_shift_8x4, inv_shift_8x16, - inv_shift_16x8, inv_shift_16x32, inv_shift_32x16, inv_shift_32x64, - inv_shift_64x32, inv_shift_4x16, inv_shift_16x4, inv_shift_8x32, - inv_shift_32x8, inv_shift_16x64, inv_shift_64x16, -}; - -/* clang-format off */ -const int8_t inv_cos_bit_col[MAX_TXWH_IDX] // txw_idx - [MAX_TXWH_IDX] = { // txh_idx - { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0, 0 }, - { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0 }, - { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }, - { 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }, - { 0, 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT } - }; - -const int8_t inv_cos_bit_row[MAX_TXWH_IDX] // txw_idx - [MAX_TXWH_IDX] = { // txh_idx - { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0, 0 }, - { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0 }, - { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }, - { 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }, - { 0, 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT } - }; -/* clang-format on */ - -const int8_t iadst4_range[7] = { 0, 1, 0, 0, 0, 0, 0 }; - -void av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size, - TXFM_2D_FLIP_CFG *cfg) { - assert(cfg != NULL); - cfg->tx_size = tx_size; - set_flip_cfg(tx_type, cfg); - av1_zero(cfg->stage_range_col); - av1_zero(cfg->stage_range_row); - set_flip_cfg(tx_type, cfg); - const TX_TYPE_1D tx_type_1d_col = vtx_tab[tx_type]; - const TX_TYPE_1D tx_type_1d_row = htx_tab[tx_type]; - cfg->shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - cfg->cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - cfg->cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - cfg->txfm_type_col = av1_txfm_type_ls[txh_idx][tx_type_1d_col]; - if (cfg->txfm_type_col == TXFM_TYPE_ADST4) { - memcpy(cfg->stage_range_col, iadst4_range, sizeof(iadst4_range)); - } - cfg->txfm_type_row = av1_txfm_type_ls[txw_idx][tx_type_1d_row]; - if (cfg->txfm_type_row == TXFM_TYPE_ADST4) { - memcpy(cfg->stage_range_row, iadst4_range, sizeof(iadst4_range)); - } - cfg->stage_num_col = av1_txfm_stage_num_list[cfg->txfm_type_col]; - cfg->stage_num_row = av1_txfm_stage_num_list[cfg->txfm_type_row]; -} - -void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row, - const TXFM_2D_FLIP_CFG *cfg, TX_SIZE tx_size, - int bd) { - const int fwd_shift = inv_start_range[tx_size]; - const int8_t *shift = cfg->shift; - int8_t opt_range_row, opt_range_col; - if (bd == 8) { - opt_range_row = 16; - opt_range_col = 16; - } else if (bd == 10) { - opt_range_row = 18; - opt_range_col = 16; - } else { - assert(bd == 12); - opt_range_row = 20; - opt_range_col = 18; - } - // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning - for (int i = 0; i < cfg->stage_num_row && i < MAX_TXFM_STAGE_NUM; ++i) { - int real_range_row = cfg->stage_range_row[i] + fwd_shift + bd + 1; - (void)real_range_row; - if (cfg->txfm_type_row == TXFM_TYPE_ADST4 && i == 1) { - // the adst4 may use 1 extra bit on top of opt_range_row at stage 1 - // so opt_range_col >= real_range_col will not hold - stage_range_row[i] = opt_range_row; - } else { - assert(opt_range_row >= real_range_row); - stage_range_row[i] = opt_range_row; - } - } - // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning - for (int i = 0; i < cfg->stage_num_col && i < MAX_TXFM_STAGE_NUM; ++i) { - int real_range_col = - cfg->stage_range_col[i] + fwd_shift + shift[0] + bd + 1; - (void)real_range_col; - if (cfg->txfm_type_col == TXFM_TYPE_ADST4 && i == 1) { - // the adst4 may use 1 extra bit on top of opt_range_row at stage 1 - // so opt_range_col >= real_range_col will not hold - stage_range_col[i] = opt_range_col; - } else { - assert(opt_range_col >= real_range_col); - stage_range_col[i] = opt_range_col; - } - } -} - -static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output, - int stride, TXFM_2D_FLIP_CFG *cfg, - int32_t *txfm_buf, TX_SIZE tx_size, - int bd) { - // Note when assigning txfm_size_col, we use the txfm_size from the - // row configuration and vice versa. This is intentionally done to - // accurately perform rectangular transforms. When the transform is - // rectangular, the number of columns will be the same as the - // txfm_size stored in the row cfg struct. It will make no difference - // for square transforms. - const int txfm_size_col = tx_size_wide[cfg->tx_size]; - const int txfm_size_row = tx_size_high[cfg->tx_size]; - // Take the shift from the larger dimension in the rectangular case. - const int8_t *shift = cfg->shift; - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - int8_t stage_range_row[MAX_TXFM_STAGE_NUM]; - int8_t stage_range_col[MAX_TXFM_STAGE_NUM]; - assert(cfg->stage_num_row <= MAX_TXFM_STAGE_NUM); - assert(cfg->stage_num_col <= MAX_TXFM_STAGE_NUM); - av1_gen_inv_stage_range(stage_range_col, stage_range_row, cfg, tx_size, bd); - - const int8_t cos_bit_col = cfg->cos_bit_col; - const int8_t cos_bit_row = cfg->cos_bit_row; - const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->txfm_type_col); - const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->txfm_type_row); - - // txfm_buf's length is txfm_size_row * txfm_size_col + 2 * - // AOMMAX(txfm_size_row, txfm_size_col) - // it is used for intermediate data buffering - const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col); - int32_t *temp_in = txfm_buf; - int32_t *temp_out = temp_in + buf_offset; - int32_t *buf = temp_out + buf_offset; - int32_t *buf_ptr = buf; - int c, r; - - // Rows - for (r = 0; r < txfm_size_row; ++r) { - if (abs(rect_type) == 1) { - for (c = 0; c < txfm_size_col; ++c) { - temp_in[c] = round_shift((int64_t)input[c] * NewInvSqrt2, NewSqrt2Bits); - } - clamp_buf(temp_in, txfm_size_col, bd + 8); - txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row); - } else { - for (c = 0; c < txfm_size_col; ++c) { - temp_in[c] = input[c]; - } - clamp_buf(temp_in, txfm_size_col, bd + 8); - txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row); - } - av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]); - input += txfm_size_col; - buf_ptr += txfm_size_col; - } - - // Columns - for (c = 0; c < txfm_size_col; ++c) { - if (cfg->lr_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + c]; - } else { - // flip left right - for (r = 0; r < txfm_size_row; ++r) - temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)]; - } - clamp_buf(temp_in, txfm_size_row, AOMMAX(bd + 6, 16)); - txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col); - av1_round_shift_array(temp_out, txfm_size_row, -shift[1]); - if (cfg->ud_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = - highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd); - } - } else { - // flip upside down - for (r = 0; r < txfm_size_row; ++r) { - output[r * stride + c] = highbd_clip_pixel_add( - output[r * stride + c], temp_out[txfm_size_row - r - 1], bd); - } - } - } -} - -static INLINE void inv_txfm2d_add_facade(const int32_t *input, uint16_t *output, - int stride, int32_t *txfm_buf, - TX_TYPE tx_type, TX_SIZE tx_size, - int bd) { - TXFM_2D_FLIP_CFG cfg; - av1_get_inv_txfm_cfg(tx_type, tx_size, &cfg); - // Forward shift sum uses larger square size, to be consistent with what - // av1_gen_inv_stage_range() does for inverse shifts. - inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf, tx_size, bd); -} - -void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int, txfm_buf[4 * 8 + 8 + 8]); - inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X8, bd); -} - -void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int, txfm_buf[8 * 4 + 8 + 8]); - inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X4, bd); -} - -void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int, txfm_buf[8 * 16 + 16 + 16]); - inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X16, bd); -} - -void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int, txfm_buf[16 * 8 + 16 + 16]); - inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X8, bd); -} - -void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int, txfm_buf[16 * 32 + 32 + 32]); - inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X32, bd); -} - -void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int, txfm_buf[32 * 16 + 32 + 32]); - inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X16, bd); -} - -void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int, txfm_buf[4 * 4 + 4 + 4]); - inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X4, bd); -} - -void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int, txfm_buf[8 * 8 + 8 + 8]); - inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X8, bd); -} - -void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int, txfm_buf[16 * 16 + 16 + 16]); - inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X16, bd); -} - -void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]); - inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X32, bd); -} - -void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - // TODO(urvang): Can the same array be reused, instead of using a new array? - // Remap 32x32 input into a modified 64x64 by: - // - Copying over these values in top-left 32x32 locations. - // - Setting the rest of the locations to 0. - int32_t mod_input[64 * 64]; - for (int row = 0; row < 32; ++row) { - memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input)); - memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input)); - } - memset(mod_input + 32 * 64, 0, 32 * 64 * sizeof(*mod_input)); - DECLARE_ALIGNED(32, int, txfm_buf[64 * 64 + 64 + 64]); - inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X64, - bd); -} - -void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - // Remap 32x32 input into a modified 64x32 by: - // - Copying over these values in top-left 32x32 locations. - // - Setting the rest of the locations to 0. - int32_t mod_input[64 * 32]; - for (int row = 0; row < 32; ++row) { - memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input)); - memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input)); - } - DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]); - inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X32, - bd); -} - -void av1_inv_txfm2d_add_32x64_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - // Remap 32x32 input into a modified 32x64 input by: - // - Copying over these values in top-left 32x32 locations. - // - Setting the rest of the locations to 0. - int32_t mod_input[32 * 64]; - memcpy(mod_input, input, 32 * 32 * sizeof(*mod_input)); - memset(mod_input + 32 * 32, 0, 32 * 32 * sizeof(*mod_input)); - DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]); - inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_32X64, - bd); -} - -void av1_inv_txfm2d_add_16x64_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - // Remap 16x32 input into a modified 16x64 input by: - // - Copying over these values in top-left 16x32 locations. - // - Setting the rest of the locations to 0. - int32_t mod_input[16 * 64]; - memcpy(mod_input, input, 16 * 32 * sizeof(*mod_input)); - memset(mod_input + 16 * 32, 0, 16 * 32 * sizeof(*mod_input)); - DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]); - inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_16X64, - bd); -} - -void av1_inv_txfm2d_add_64x16_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - // Remap 32x16 input into a modified 64x16 by: - // - Copying over these values in top-left 32x16 locations. - // - Setting the rest of the locations to 0. - int32_t mod_input[64 * 16]; - for (int row = 0; row < 16; ++row) { - memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input)); - memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input)); - } - DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]); - inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X16, - bd); -} - -void av1_inv_txfm2d_add_4x16_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]); - inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X16, bd); -} - -void av1_inv_txfm2d_add_16x4_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]); - inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X4, bd); -} - -void av1_inv_txfm2d_add_8x32_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]); - inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X32, bd); -} - -void av1_inv_txfm2d_add_32x8_c(const int32_t *input, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]); - inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X8, bd); -} diff --git a/third_party/aom/av1/common/av1_loopfilter.c b/third_party/aom/av1/common/av1_loopfilter.c deleted file mode 100644 index 537d8dfe9..000000000 --- a/third_party/aom/av1/common/av1_loopfilter.c +++ /dev/null @@ -1,2377 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <math.h> - -#include "config/aom_config.h" -#include "config/aom_dsp_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_mem/aom_mem.h" -#include "aom_ports/mem.h" -#include "av1/common/av1_loopfilter.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/reconinter.h" -#include "av1/common/seg_common.h" - -static const SEG_LVL_FEATURES seg_lvl_lf_lut[MAX_MB_PLANE][2] = { - { SEG_LVL_ALT_LF_Y_V, SEG_LVL_ALT_LF_Y_H }, - { SEG_LVL_ALT_LF_U, SEG_LVL_ALT_LF_U }, - { SEG_LVL_ALT_LF_V, SEG_LVL_ALT_LF_V } -}; - -static const int delta_lf_id_lut[MAX_MB_PLANE][2] = { - { 0, 1 }, { 2, 2 }, { 3, 3 } -}; - -typedef enum EDGE_DIR { VERT_EDGE = 0, HORZ_EDGE = 1, NUM_EDGE_DIRS } EDGE_DIR; - -static const int mode_lf_lut[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES - 1, 1, 0, 1, // INTER_MODES (GLOBALMV == 0) - 1, 1, 1, 1, 1, 1, 0, 1 // INTER_COMPOUND_MODES (GLOBAL_GLOBALMV == 0) -}; - -#if LOOP_FILTER_BITMASK -// 256 bit masks (64x64 / 4x4) for left transform size for Y plane. -// We use 4 uint64_t to represent the 256 bit. -// Each 1 represents a position where we should apply a loop filter -// across the left border of an 4x4 block boundary. -// -// In the case of TX_8x8-> ( in low order byte first we end up with -// a mask that looks like this (-- and | are used for better view) -// -// 10101010|10101010 -// 10101010|10101010 -// 10101010|10101010 -// 10101010|10101010 -// 10101010|10101010 -// 10101010|10101010 -// 10101010|10101010 -// 10101010|10101010 -// ----------------- -// 10101010|10101010 -// 10101010|10101010 -// 10101010|10101010 -// 10101010|10101010 -// 10101010|10101010 -// 10101010|10101010 -// 10101010|10101010 -// 10101010|10101010 -// -// A loopfilter should be applied to every other 4x4 horizontally. - -// 256 bit masks (64x64 / 4x4) for above transform size for Y plane. -// We use 4 uint64_t to represent the 256 bit. -// Each 1 represents a position where we should apply a loop filter -// across the top border of an 4x4 block boundary. -// -// In the case of TX_8x8-> ( in low order byte first we end up with -// a mask that looks like this -// -// 11111111|11111111 -// 00000000|00000000 -// 11111111|11111111 -// 00000000|00000000 -// 11111111|11111111 -// 00000000|00000000 -// 11111111|11111111 -// 00000000|00000000 -// ----------------- -// 11111111|11111111 -// 00000000|00000000 -// 11111111|11111111 -// 00000000|00000000 -// 11111111|11111111 -// 00000000|00000000 -// 11111111|11111111 -// 00000000|00000000 -// -// A loopfilter should be applied to every other 4x4 horizontally. - -const int mask_id_table_tx_4x4[BLOCK_SIZES_ALL] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1, -1, 13, 14, 15, 16, 17, 18 -}; - -const int mask_id_table_tx_8x8[BLOCK_SIZES_ALL] = { - -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, 10, 11, 12, 13 -}; - -const int mask_id_table_tx_16x16[BLOCK_SIZES_ALL] = { - -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, -1, -1, -1, -1, -1, -1, -1, 7, 8 -}; - -const int mask_id_table_tx_32x32[BLOCK_SIZES_ALL] = { -1, -1, -1, -1, -1, -1, - -1, -1, -1, 0, 1, 2, - 3, -1, -1, -1, -1, -1, - -1, -1, -1, -1 }; - -const FilterMask left_mask_univariant_reordered[67] = { - // TX_4X4 - { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 4X4, TX_4X4 - { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 4X8, TX_4X4 - { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X4, TX_4X4 - { { 0x0000000000030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X8, TX_4X4 - { { 0x0003000300030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X16, TX_4X4 - { { 0x00000000000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X8, TX_4X4 - { { 0x000f000f000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X16, TX_4X4 - { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X32, TX_4X4 - { { 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X16, TX_4X4 - { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X32, TX_4X4 - { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, - 0x00ff00ff00ff00ffULL } }, // block size 32X64, TX_4X4 - { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X32, TX_4X4 - { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, - 0xffffffffffffffffULL } }, // block size 64X64, TX_4X4 - { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 4X16, TX_4X4 - { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X4, TX_4X4 - { { 0x0003000300030003ULL, 0x0003000300030003ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X32, TX_4X4 - { { 0x0000000000ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X8, TX_4X4 - { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x000f000f000f000fULL, - 0x000f000f000f000fULL } }, // block size 16X64, TX_4X4 - { { 0xffffffffffffffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X16, TX_4X4 - // TX_8X8 - { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X8, TX_8X8 - { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X16, TX_8X8 - { { 0x0000000000050005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X8, TX_8X8 - { { 0x0005000500050005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X16, TX_8X8 - { { 0x0005000500050005ULL, 0x0005000500050005ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X32, TX_8X8 - { { 0x0055005500550055ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X16, TX_8X8 - { { 0x0055005500550055ULL, 0x0055005500550055ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X32, TX_8X8 - { { 0x0055005500550055ULL, 0x0055005500550055ULL, 0x0055005500550055ULL, - 0x0055005500550055ULL } }, // block size 32X64, TX_8X8 - { { 0x5555555555555555ULL, 0x5555555555555555ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X32, TX_8X8 - { { 0x5555555555555555ULL, 0x5555555555555555ULL, 0x5555555555555555ULL, - 0x5555555555555555ULL } }, // block size 64X64, TX_8X8 - { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X32, TX_8X8 - { { 0x0000000000550055ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X8, TX_8X8 - { { 0x0005000500050005ULL, 0x0005000500050005ULL, 0x0005000500050005ULL, - 0x0005000500050005ULL } }, // block size 16X64, TX_8X8 - { { 0x5555555555555555ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X16, TX_8X8 - // TX_16X16 - { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X16, TX_16X16 - { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X32, TX_16X16 - { { 0x0011001100110011ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X16, TX_16X16 - { { 0x0011001100110011ULL, 0x0011001100110011ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X32, TX_16X16 - { { 0x0011001100110011ULL, 0x0011001100110011ULL, 0x0011001100110011ULL, - 0x0011001100110011ULL } }, // block size 32X64, TX_16X16 - { { 0x1111111111111111ULL, 0x1111111111111111ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X32, TX_16X16 - { { 0x1111111111111111ULL, 0x1111111111111111ULL, 0x1111111111111111ULL, - 0x1111111111111111ULL } }, // block size 64X64, TX_16X16 - { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL, - 0x0001000100010001ULL } }, // block size 16X64, TX_16X16 - { { 0x1111111111111111ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X16, TX_16X16 - // TX_32X32 - { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X32, TX_32X32 - { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0101010101010101ULL, - 0x0101010101010101ULL } }, // block size 32X64, TX_32X32 - { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X32, TX_32X32 - { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0101010101010101ULL, - 0x0101010101010101ULL } }, // block size 64X64, TX_32X32 - // TX_64X64 - { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL, - 0x0001000100010001ULL } }, // block size 64X64, TX_64X64 - // 2:1, 1:2 transform sizes. - { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 4X8, TX_4X8 - { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 4X16, TX_4X8 - { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X4, TX_8X4 - { { 0x0000000000000005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X4, TX_8X4 - { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X16, TX_8X16 - { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X32, TX_8X16 - { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X8, TX_16X8 - { { 0x0000000000110011ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X8, TX_16X8 - { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X32, TX_16X32 - { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL, - 0x0001000100010001ULL } }, // block size 16X64, TX_16X32 - { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X16, TX_32X16 - { { 0x0101010101010101ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X16, TX_32X16 - { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL, - 0x0001000100010001ULL } }, // block size 32X64, TX_32X64 - { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X32, TX_64X32 - // 4:1, 1:4 transform sizes. - { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 4X16, TX_4X16 - { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X4, TX_16X4 - { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X32, TX_8X32 - { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X8, TX_32X8 - { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL, - 0x0001000100010001ULL } }, // block size 16X64, TX_16X64 - { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X16, TX_64X16 -}; - -const FilterMask above_mask_univariant_reordered[67] = { - // TX_4X4 - { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 4X4, TX_4X4 - { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 4X8, TX_4X4 - { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X4, TX_4X4 - { { 0x0000000000030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X8, TX_4X4 - { { 0x0003000300030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X16, TX_4X4 - { { 0x00000000000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X8, TX_4X4 - { { 0x000f000f000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X16, TX_4X4 - { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X32, TX_4X4 - { { 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X16, TX_4X4 - { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X32, TX_4X4 - { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, - 0x00ff00ff00ff00ffULL } }, // block size 32X64, TX_4X4 - { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X32, TX_4X4 - { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, - 0xffffffffffffffffULL } }, // block size 64X64, TX_4x4 - { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 4X16, TX_4X4 - { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X4, TX_4X4 - { { 0x0003000300030003ULL, 0x0003000300030003ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X32, TX_4X4 - { { 0x0000000000ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X8, TX_4X4 - { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x000f000f000f000fULL, - 0x000f000f000f000fULL } }, // block size 16X64, TX_4X4 - { { 0xffffffffffffffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X16, TX_4X4 - // TX_8X8 - { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X8, TX_8X8 - { { 0x0000000300000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X16, TX_8X8 - { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X8, TX_8X8 - { { 0x0000000f0000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X16, TX_8X8 - { { 0x0000000f0000000fULL, 0x0000000f0000000fULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X32, TX_8X8 - { { 0x000000ff000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X16, TX_8X8 - { { 0x000000ff000000ffULL, 0x000000ff000000ffULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X32, TX_8X8 - { { 0x000000ff000000ffULL, 0x000000ff000000ffULL, 0x000000ff000000ffULL, - 0x000000ff000000ffULL } }, // block size 32X64, TX_8X8 - { { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X32, TX_8X8 - { { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, - 0x0000ffff0000ffffULL } }, // block size 64X64, TX_8X8 - { { 0x0000000300000003ULL, 0x0000000300000003ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X32, TX_8X8 - { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X8, TX_8X8 - { { 0x0000000f0000000fULL, 0x0000000f0000000fULL, 0x0000000f0000000fULL, - 0x0000000f0000000fULL } }, // block size 16X64, TX_8X8 - { { 0x0000ffff0000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X16, TX_8X8 - // TX_16X16 - { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X16, TX_16X16 - { { 0x000000000000000fULL, 0x000000000000000fULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X32, TX_16X16 - { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X16, TX_16X16 - { { 0x00000000000000ffULL, 0x00000000000000ffULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X32, TX_16X16 - { { 0x00000000000000ffULL, 0x00000000000000ffULL, 0x00000000000000ffULL, - 0x00000000000000ffULL } }, // block size 32X64, TX_16X16 - { { 0x000000000000ffffULL, 0x000000000000ffffULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X32, TX_16X16 - { { 0x000000000000ffffULL, 0x000000000000ffffULL, 0x000000000000ffffULL, - 0x000000000000ffffULL } }, // block size 64X64, TX_16X16 - { { 0x000000000000000fULL, 0x000000000000000fULL, 0x000000000000000fULL, - 0x000000000000000fULL } }, // block size 16X64, TX_16X16 - { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X16, TX_16X16 - // TX_32X32 - { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X32, TX_32X32 - { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x00000000000000ffULL, - 0x0000000000000000ULL } }, // block size 32X64, TX_32X32 - { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X32, TX_32X32 - { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x000000000000ffffULL, - 0x0000000000000000ULL } }, // block size 64X64, TX_32X32 - // TX_64X64 - { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X64, TX_64X64 - // 2:1, 1:2 transform sizes. - { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 4X8, TX_4X8 - { { 0x0000000100000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 4X16, TX_4X8 - { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X4, TX_8X4 - { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X4, TX_8X4 - { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X16, TX_8X16 - { { 0x0000000000000003ULL, 0x0000000000000003ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X32, TX_8X16 - { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X8, TX_16X8 - { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X8, TX_16X8 - { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X32, TX_16X32 - { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x000000000000000fULL, - 0x0000000000000000ULL } }, // block size 16X64, TX_16X32 - { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X16, TX_32X16 - { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X16, TX_32X16 - { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X64, TX_32X64 - { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X32, TX_64X32 - // 4:1, 1:4 transform sizes. - { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 4X16, TX_4X16 - { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X4, TX_16X4 - { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 8X32, TX_8X32 - { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 32X8, TX_32X8 - { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 16X64, TX_16X64 - { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL, - 0x0000000000000000ULL } }, // block size 64X16, TX_64X16 -}; - -LoopFilterMask *get_loop_filter_mask(const AV1_COMMON *const cm, int mi_row, - int mi_col) { - assert(cm->lf.lfm != NULL); - const int row = mi_row >> MIN_MIB_SIZE_LOG2; // 64x64 - const int col = mi_col >> MIN_MIB_SIZE_LOG2; - return &cm->lf.lfm[row * cm->lf.lfm_stride + col]; -} - -typedef void (*LpfFunc)(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh); - -typedef void (*LpfDualFunc)(uint8_t *s, int p, const uint8_t *blimit0, - const uint8_t *limit0, const uint8_t *thresh0, - const uint8_t *blimit1, const uint8_t *limit1, - const uint8_t *thresh1); - -typedef void (*HbdLpfFunc)(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, int bd); - -typedef void (*HbdLpfDualFunc)(uint16_t *s, int p, const uint8_t *blimit0, - const uint8_t *limit0, const uint8_t *thresh0, - const uint8_t *blimit1, const uint8_t *limit1, - const uint8_t *thresh1, int bd); -#endif // LOOP_FILTER_BITMASK - -static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { - int lvl; - - // For each possible value for the loop filter fill out limits - for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) { - // Set loop filter parameters that control sharpness. - int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4)); - - if (sharpness_lvl > 0) { - if (block_inside_limit > (9 - sharpness_lvl)) - block_inside_limit = (9 - sharpness_lvl); - } - - if (block_inside_limit < 1) block_inside_limit = 1; - - memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH); - memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit), - SIMD_WIDTH); - } -} - -uint8_t get_filter_level(const AV1_COMMON *cm, const loop_filter_info_n *lfi_n, - const int dir_idx, int plane, - const MB_MODE_INFO *mbmi) { - const int segment_id = mbmi->segment_id; - if (cm->delta_lf_present_flag) { - int delta_lf; - if (cm->delta_lf_multi) { - const int delta_lf_idx = delta_lf_id_lut[plane][dir_idx]; - delta_lf = mbmi->delta_lf[delta_lf_idx]; - } else { - delta_lf = mbmi->delta_lf_from_base; - } - int base_level; - if (plane == 0) - base_level = cm->lf.filter_level[dir_idx]; - else if (plane == 1) - base_level = cm->lf.filter_level_u; - else - base_level = cm->lf.filter_level_v; - int lvl_seg = clamp(delta_lf + base_level, 0, MAX_LOOP_FILTER); - assert(plane >= 0 && plane <= 2); - const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir_idx]; - if (segfeature_active(&cm->seg, segment_id, seg_lf_feature_id)) { - const int data = get_segdata(&cm->seg, segment_id, seg_lf_feature_id); - lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER); - } - - if (cm->lf.mode_ref_delta_enabled) { - const int scale = 1 << (lvl_seg >> 5); - lvl_seg += cm->lf.ref_deltas[mbmi->ref_frame[0]] * scale; - if (mbmi->ref_frame[0] > INTRA_FRAME) - lvl_seg += cm->lf.mode_deltas[mode_lf_lut[mbmi->mode]] * scale; - lvl_seg = clamp(lvl_seg, 0, MAX_LOOP_FILTER); - } - return lvl_seg; - } else { - return lfi_n->lvl[plane][segment_id][dir_idx][mbmi->ref_frame[0]] - [mode_lf_lut[mbmi->mode]]; - } -} - -void av1_loop_filter_init(AV1_COMMON *cm) { - assert(MB_MODE_COUNT == NELEMENTS(mode_lf_lut)); - loop_filter_info_n *lfi = &cm->lf_info; - struct loopfilter *lf = &cm->lf; - int lvl; - - lf->combine_vert_horz_lf = 1; - - // init limits for given sharpness - update_sharpness(lfi, lf->sharpness_level); - - // init hev threshold const vectors - for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) - memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH); -} - -// Update the loop filter for the current frame. -// This should be called before loop_filter_rows(), -// av1_loop_filter_frame() calls this function directly. -void av1_loop_filter_frame_init(AV1_COMMON *cm, int plane_start, - int plane_end) { - int filt_lvl[MAX_MB_PLANE], filt_lvl_r[MAX_MB_PLANE]; - int plane; - int seg_id; - // n_shift is the multiplier for lf_deltas - // the multiplier is 1 for when filter_lvl is between 0 and 31; - // 2 when filter_lvl is between 32 and 63 - loop_filter_info_n *const lfi = &cm->lf_info; - struct loopfilter *const lf = &cm->lf; - const struct segmentation *const seg = &cm->seg; - - // update sharpness limits - update_sharpness(lfi, lf->sharpness_level); - - filt_lvl[0] = cm->lf.filter_level[0]; - filt_lvl[1] = cm->lf.filter_level_u; - filt_lvl[2] = cm->lf.filter_level_v; - - filt_lvl_r[0] = cm->lf.filter_level[1]; - filt_lvl_r[1] = cm->lf.filter_level_u; - filt_lvl_r[2] = cm->lf.filter_level_v; - - for (plane = plane_start; plane < plane_end; plane++) { - if (plane == 0 && !filt_lvl[0] && !filt_lvl_r[0]) - break; - else if (plane == 1 && !filt_lvl[1]) - continue; - else if (plane == 2 && !filt_lvl[2]) - continue; - - for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { - for (int dir = 0; dir < 2; ++dir) { - int lvl_seg = (dir == 0) ? filt_lvl[plane] : filt_lvl_r[plane]; - assert(plane >= 0 && plane <= 2); - const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir]; - if (segfeature_active(seg, seg_id, seg_lf_feature_id)) { - const int data = get_segdata(&cm->seg, seg_id, seg_lf_feature_id); - lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER); - } - - if (!lf->mode_ref_delta_enabled) { - // we could get rid of this if we assume that deltas are set to - // zero when not in use; encoder always uses deltas - memset(lfi->lvl[plane][seg_id][dir], lvl_seg, - sizeof(lfi->lvl[plane][seg_id][dir])); - } else { - int ref, mode; - const int scale = 1 << (lvl_seg >> 5); - const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; - lfi->lvl[plane][seg_id][dir][INTRA_FRAME][0] = - clamp(intra_lvl, 0, MAX_LOOP_FILTER); - - for (ref = LAST_FRAME; ref < REF_FRAMES; ++ref) { - for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { - const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale + - lf->mode_deltas[mode] * scale; - lfi->lvl[plane][seg_id][dir][ref][mode] = - clamp(inter_lvl, 0, MAX_LOOP_FILTER); - } - } - } - } - } - } -} - -#if LOOP_FILTER_BITMASK -// A 64x64 tx block requires 256 bits to represent each 4x4 tx block. -// Every 4 rows is represented by one uint64_t mask. Hence, -// there are 4 uint64_t bitmask[4] to represent the 64x64 block. -// -// Given a location by (mi_col, mi_row), This function returns the index -// 0, 1, 2, 3 to select which bitmask[] to use, and the shift value. -// -// For example, mi_row is the offset of pixels in mi size (4), -// (mi_row / 4) returns which uint64_t. -// After locating which uint64_t, mi_row % 4 is the -// row offset, and each row has 16 = 1 << stride_log2 4x4 units. -// Therefore, shift = (row << stride_log2) + mi_col; -int get_index_shift(int mi_col, int mi_row, int *index) { - // *index = mi_row >> 2; - // rows = mi_row % 4; - // stride_log2 = 4; - // shift = (rows << stride_log2) + mi_col; - *index = mi_row >> 2; - return ((mi_row & 3) << 4) | mi_col; -} - -static void check_mask(const FilterMask *lfm) { -#ifndef NDEBUG - for (int i = 0; i < 4; ++i) { - assert(!(lfm[TX_4X4].bits[i] & lfm[TX_8X8].bits[i])); - assert(!(lfm[TX_4X4].bits[i] & lfm[TX_16X16].bits[i])); - assert(!(lfm[TX_4X4].bits[i] & lfm[TX_32X32].bits[i])); - assert(!(lfm[TX_4X4].bits[i] & lfm[TX_64X64].bits[i])); - assert(!(lfm[TX_8X8].bits[i] & lfm[TX_16X16].bits[i])); - assert(!(lfm[TX_8X8].bits[i] & lfm[TX_32X32].bits[i])); - assert(!(lfm[TX_8X8].bits[i] & lfm[TX_64X64].bits[i])); - assert(!(lfm[TX_16X16].bits[i] & lfm[TX_32X32].bits[i])); - assert(!(lfm[TX_16X16].bits[i] & lfm[TX_64X64].bits[i])); - assert(!(lfm[TX_32X32].bits[i] & lfm[TX_64X64].bits[i])); - } -#else - (void)lfm; -#endif -} - -static void check_loop_filter_masks(const LoopFilterMask *lfm, int plane) { - if (plane == 0) { - // Assert if we try to apply 2 different loop filters at the same - // position. - check_mask(lfm->left_y); - check_mask(lfm->above_y); - } else if (plane == 1) { - check_mask(lfm->left_u); - check_mask(lfm->above_u); - } else { - check_mask(lfm->left_v); - check_mask(lfm->above_v); - } -} - -static void update_masks(EDGE_DIR dir, int plane, uint64_t *mask, - TX_SIZE sqr_tx_size, LoopFilterMask *lfm) { - if (dir == VERT_EDGE) { - switch (plane) { - case 0: - for (int i = 0; i < 4; ++i) lfm->left_y[sqr_tx_size].bits[i] |= mask[i]; - break; - case 1: - for (int i = 0; i < 4; ++i) lfm->left_u[sqr_tx_size].bits[i] |= mask[i]; - break; - case 2: - for (int i = 0; i < 4; ++i) lfm->left_v[sqr_tx_size].bits[i] |= mask[i]; - break; - default: assert(plane <= 2); - } - } else { - switch (plane) { - case 0: - for (int i = 0; i < 4; ++i) - lfm->above_y[sqr_tx_size].bits[i] |= mask[i]; - break; - case 1: - for (int i = 0; i < 4; ++i) - lfm->above_u[sqr_tx_size].bits[i] |= mask[i]; - break; - case 2: - for (int i = 0; i < 4; ++i) - lfm->above_v[sqr_tx_size].bits[i] |= mask[i]; - break; - default: assert(plane <= 2); - } - } -} - -static int is_frame_boundary(AV1_COMMON *const cm, int plane, int mi_row, - int mi_col, int ssx, int ssy, EDGE_DIR dir) { - if (plane && (ssx || ssy)) { - if (ssx && ssy) { // format 420 - if ((mi_row << MI_SIZE_LOG2) > cm->height || - (mi_col << MI_SIZE_LOG2) > cm->width) - return 1; - } else if (ssx) { // format 422 - if ((mi_row << MI_SIZE_LOG2) >= cm->height || - (mi_col << MI_SIZE_LOG2) > cm->width) - return 1; - } - } else { - if ((mi_row << MI_SIZE_LOG2) >= cm->height || - (mi_col << MI_SIZE_LOG2) >= cm->width) - return 1; - } - - int row_or_col; - if (plane == 0) { - row_or_col = dir == VERT_EDGE ? mi_col : mi_row; - } else { - // chroma sub8x8 block uses bottom/right mi of co-located 8x8 luma block. - // So if mi_col == 1, it is actually the frame boundary. - if (dir == VERT_EDGE) { - row_or_col = ssx ? (mi_col & 0x0FFFFFFE) : mi_col; - } else { - row_or_col = ssy ? (mi_row & 0x0FFFFFFE) : mi_row; - } - } - return row_or_col == 0; -} - -static void setup_masks(AV1_COMMON *const cm, int mi_row, int mi_col, int plane, - int ssx, int ssy, TX_SIZE tx_size) { - LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col); - const int x = (mi_col << (MI_SIZE_LOG2 - ssx)); - const int y = (mi_row << (MI_SIZE_LOG2 - ssy)); - // decide whether current vertical/horizontal edge needs loop filtering - for (EDGE_DIR dir = VERT_EDGE; dir <= HORZ_EDGE; ++dir) { - // chroma sub8x8 block uses bottom/right mi of co-located 8x8 luma block. - mi_row |= ssy; - mi_col |= ssx; - - MB_MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col; - const MB_MODE_INFO *const mbmi = mi[0]; - const int curr_skip = mbmi->skip && is_inter_block(mbmi); - const BLOCK_SIZE bsize = mbmi->sb_type; - const BLOCK_SIZE bsizec = scale_chroma_bsize(bsize, ssx, ssy); - const BLOCK_SIZE plane_bsize = ss_size_lookup[bsizec][ssx][ssy]; - const uint8_t level = get_filter_level(cm, &cm->lf_info, dir, plane, mbmi); - const int prediction_masks = dir == VERT_EDGE - ? block_size_wide[plane_bsize] - 1 - : block_size_high[plane_bsize] - 1; - const int is_coding_block_border = - dir == VERT_EDGE ? !(x & prediction_masks) : !(y & prediction_masks); - - // TODO(chengchen): step can be optimized. - const int row_step = mi_size_high[TX_4X4] << ssy; - const int col_step = mi_size_wide[TX_4X4] << ssx; - const int mi_height = - dir == VERT_EDGE ? tx_size_high_unit[tx_size] << ssy : row_step; - const int mi_width = - dir == VERT_EDGE ? col_step : tx_size_wide_unit[tx_size] << ssx; - - // assign filter levels - for (int r = mi_row; r < mi_row + mi_height; r += row_step) { - for (int c = mi_col; c < mi_col + mi_width; c += col_step) { - // do not filter frame boundary - // Note: when chroma planes' size are half of luma plane, - // chroma plane mi corresponds to even position. - // If frame size is not even, we still need to filter this chroma - // position. Therefore the boundary condition check needs to be - // separated to two cases. - if (plane && (ssx || ssy)) { - if (ssx && ssy) { // format 420 - if ((r << MI_SIZE_LOG2) > cm->height || - (c << MI_SIZE_LOG2) > cm->width) - continue; - } else if (ssx) { // format 422 - if ((r << MI_SIZE_LOG2) >= cm->height || - (c << MI_SIZE_LOG2) > cm->width) - continue; - } - } else { - if ((r << MI_SIZE_LOG2) >= cm->height || - (c << MI_SIZE_LOG2) >= cm->width) - continue; - } - - const int row = r % MI_SIZE_64X64; - const int col = c % MI_SIZE_64X64; - if (plane == 0) { - if (dir == VERT_EDGE) - lfm->lfl_y_ver[row][col] = level; - else - lfm->lfl_y_hor[row][col] = level; - } else if (plane == 1) { - lfm->lfl_u[row][col] = level; - } else { - lfm->lfl_v[row][col] = level; - } - } - } - - for (int r = mi_row; r < mi_row + mi_height; r += row_step) { - for (int c = mi_col; c < mi_col + mi_width; c += col_step) { - // do not filter frame boundary - if (is_frame_boundary(cm, plane, r, c, ssx, ssy, dir)) continue; - - uint64_t mask[4] = { 0 }; - const int prev_row = dir == VERT_EDGE ? r : r - (1 << ssy); - const int prev_col = dir == VERT_EDGE ? c - (1 << ssx) : c; - MB_MODE_INFO **mi_prev = - cm->mi_grid_visible + prev_row * cm->mi_stride + prev_col; - const MB_MODE_INFO *const mbmi_prev = mi_prev[0]; - const int prev_skip = mbmi_prev->skip && is_inter_block(mbmi_prev); - const uint8_t level_prev = - get_filter_level(cm, &cm->lf_info, dir, plane, mbmi_prev); - const int is_edge = - (level || level_prev) && - (!curr_skip || !prev_skip || is_coding_block_border); - - if (is_edge) { - const TX_SIZE prev_tx_size = - plane ? av1_get_max_uv_txsize(mbmi_prev->sb_type, ssx, ssy) - : mbmi_prev->tx_size; - TX_SIZE min_tx_size = (dir == VERT_EDGE) - ? AOMMIN(txsize_horz_map[tx_size], - txsize_horz_map[prev_tx_size]) - : AOMMIN(txsize_vert_map[tx_size], - txsize_vert_map[prev_tx_size]); - min_tx_size = AOMMIN(min_tx_size, TX_16X16); - assert(min_tx_size < TX_SIZES); - const int row = r % MI_SIZE_64X64; - const int col = c % MI_SIZE_64X64; - int index = 0; - const int shift = get_index_shift(col, row, &index); - assert(index < 4 && index >= 0); - mask[index] |= ((uint64_t)1 << shift); - // set mask on corresponding bit - update_masks(dir, plane, mask, min_tx_size, lfm); - } - } - } - } -} - -static void setup_tx_block_mask(AV1_COMMON *const cm, int mi_row, int mi_col, - int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, - int plane, int ssx, int ssy) { - blk_row <<= ssy; - blk_col <<= ssx; - if (((mi_row + blk_row) << MI_SIZE_LOG2) >= cm->height || - ((mi_col + blk_col) << MI_SIZE_LOG2) >= cm->width) - return; - - // U/V plane, tx_size is always the largest size - if (plane) { - assert(tx_size_wide[tx_size] <= 32 && tx_size_high[tx_size] <= 32); - setup_masks(cm, mi_row + blk_row, mi_col + blk_col, plane, ssx, ssy, - tx_size); - return; - } - - MB_MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col; - const MB_MODE_INFO *const mbmi = mi[0]; - // For Y plane: - // If intra block, tx size is univariant. - // If inter block, tx size follows inter_tx_size. - TX_SIZE plane_tx_size = tx_size; - const int is_inter = is_inter_block(mbmi); - - if (plane == 0) { - if (is_inter) { - if (mbmi->skip) { - // TODO(chengchen): change av1_get_transform_size() to be consistant. - // plane_tx_size = get_max_rect_tx_size(plane_bsize); - plane_tx_size = mbmi->tx_size; - } else { - plane_tx_size = mbmi->inter_tx_size[av1_get_txb_size_index( - plane_bsize, blk_row, blk_col)]; - } - } else { - MB_MODE_INFO **mi_this = cm->mi_grid_visible + - (mi_row + blk_row) * cm->mi_stride + mi_col + - blk_col; - const MB_MODE_INFO *const mbmi_this = mi_this[0]; - plane_tx_size = mbmi_this->tx_size; - } - } - - assert(txsize_to_bsize[plane_tx_size] <= plane_bsize); - - if (plane || plane_tx_size == tx_size) { - setup_masks(cm, mi_row + blk_row, mi_col + blk_col, plane, ssx, ssy, - tx_size); - } else { - const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; - const int bsw = tx_size_wide_unit[sub_txs]; - const int bsh = tx_size_high_unit[sub_txs]; - for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) { - for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) { - const int offsetr = blk_row + row; - const int offsetc = blk_col + col; - setup_tx_block_mask(cm, mi_row, mi_col, offsetr, offsetc, plane_bsize, - sub_txs, plane, ssx, ssy); - } - } - } -} - -static void setup_fix_block_mask(AV1_COMMON *const cm, int mi_row, int mi_col, - int plane, int ssx, int ssy) { - MB_MODE_INFO **mi = - cm->mi_grid_visible + (mi_row | ssy) * cm->mi_stride + (mi_col | ssx); - const MB_MODE_INFO *const mbmi = mi[0]; - - const BLOCK_SIZE bsize = mbmi->sb_type; - const BLOCK_SIZE bsizec = scale_chroma_bsize(bsize, ssx, ssy); - const BLOCK_SIZE plane_bsize = ss_size_lookup[bsizec][ssx][ssy]; - - const int block_width = mi_size_wide[plane_bsize]; - const int block_height = mi_size_high[plane_bsize]; - - TX_SIZE max_txsize = max_txsize_rect_lookup[plane_bsize]; - // The decoder is designed so that it can process 64x64 luma pixels at a - // time. If this is a chroma plane with subsampling and bsize corresponds to - // a subsampled BLOCK_128X128 then the lookup above will give TX_64X64. That - // mustn't be used for the subsampled plane (because it would be bigger than - // a 64x64 luma block) so we round down to TX_32X32. - if (plane && txsize_sqr_up_map[max_txsize] == TX_64X64) { - if (max_txsize == TX_16X64) - max_txsize = TX_16X32; - else if (max_txsize == TX_64X16) - max_txsize = TX_32X16; - else - max_txsize = TX_32X32; - } - - const BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize]; - const int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0]; - const int bh = block_size_high[txb_size] >> tx_size_wide_log2[0]; - const BLOCK_SIZE max_unit_bsize = ss_size_lookup[BLOCK_64X64][ssx][ssy]; - int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0]; - int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0]; - - mu_blocks_wide = AOMMIN(block_width, mu_blocks_wide); - mu_blocks_high = AOMMIN(block_height, mu_blocks_high); - - // Y: Largest tx_size is 64x64, while superblock size can be 128x128. - // Here we ensure that setup_tx_block_mask process at most a 64x64 block. - // U/V: largest tx size is 32x32. - for (int idy = 0; idy < block_height; idy += mu_blocks_high) { - for (int idx = 0; idx < block_width; idx += mu_blocks_wide) { - const int unit_height = AOMMIN(mu_blocks_high + idy, block_height); - const int unit_width = AOMMIN(mu_blocks_wide + idx, block_width); - for (int blk_row = idy; blk_row < unit_height; blk_row += bh) { - for (int blk_col = idx; blk_col < unit_width; blk_col += bw) { - setup_tx_block_mask(cm, mi_row, mi_col, blk_row, blk_col, plane_bsize, - max_txsize, plane, ssx, ssy); - } - } - } - } -} - -static void setup_block_mask(AV1_COMMON *const cm, int mi_row, int mi_col, - BLOCK_SIZE bsize, int plane, int ssx, int ssy) { - if ((mi_row << MI_SIZE_LOG2) >= cm->height || - (mi_col << MI_SIZE_LOG2) >= cm->width) - return; - - const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize); - const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); - const int hbs = mi_size_wide[bsize] / 2; - const int quarter_step = mi_size_wide[bsize] / 4; - const int allow_sub8x8 = (ssx || ssy) ? bsize > BLOCK_8X8 : 1; - const int has_next_row = - (((mi_row + hbs) << MI_SIZE_LOG2) < cm->height) & allow_sub8x8; - const int has_next_col = - (((mi_col + hbs) << MI_SIZE_LOG2) < cm->width) & allow_sub8x8; - int i; - - switch (partition) { - case PARTITION_NONE: - setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy); - break; - case PARTITION_HORZ: - setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy); - if (has_next_row) - setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy); - break; - case PARTITION_VERT: - setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy); - if (has_next_col) - setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy); - break; - case PARTITION_SPLIT: - setup_block_mask(cm, mi_row, mi_col, subsize, plane, ssx, ssy); - if (has_next_col) - setup_block_mask(cm, mi_row, mi_col + hbs, subsize, plane, ssx, ssy); - if (has_next_row) - setup_block_mask(cm, mi_row + hbs, mi_col, subsize, plane, ssx, ssy); - if (has_next_col & has_next_row) - setup_block_mask(cm, mi_row + hbs, mi_col + hbs, subsize, plane, ssx, - ssy); - break; - case PARTITION_HORZ_A: - setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy); - if (has_next_col) - setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy); - if (has_next_row) - setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy); - break; - case PARTITION_HORZ_B: - setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy); - if (has_next_row) - setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy); - if (has_next_col & has_next_row) - setup_fix_block_mask(cm, mi_row + hbs, mi_col + hbs, plane, ssx, ssy); - break; - case PARTITION_VERT_A: - setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy); - if (has_next_row) - setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy); - if (has_next_col) - setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy); - break; - case PARTITION_VERT_B: - setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy); - if (has_next_col) - setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy); - if (has_next_row) - setup_fix_block_mask(cm, mi_row + hbs, mi_col + hbs, plane, ssx, ssy); - break; - case PARTITION_HORZ_4: - for (i = 0; i < 4; ++i) { - int this_mi_row = mi_row + i * quarter_step; - if (i > 0 && (this_mi_row << MI_SIZE_LOG2) >= cm->height) break; - // chroma plane filter the odd location - if (plane && bsize == BLOCK_16X16 && (i & 0x01)) continue; - - setup_fix_block_mask(cm, this_mi_row, mi_col, plane, ssx, ssy); - } - break; - case PARTITION_VERT_4: - for (i = 0; i < 4; ++i) { - int this_mi_col = mi_col + i * quarter_step; - if (i > 0 && this_mi_col >= cm->mi_cols) break; - // chroma plane filter the odd location - if (plane && bsize == BLOCK_16X16 && (i & 0x01)) continue; - - setup_fix_block_mask(cm, mi_row, this_mi_col, plane, ssx, ssy); - } - break; - default: assert(0); - } -} - -// TODO(chengchen): if lossless, do not need to setup mask. But when -// segments enabled, each segment has different lossless settings. -void av1_setup_bitmask(AV1_COMMON *const cm, int mi_row, int mi_col, int plane, - int subsampling_x, int subsampling_y, int row_end, - int col_end) { - const int num_64x64 = cm->seq_params.mib_size >> MIN_MIB_SIZE_LOG2; - for (int y = 0; y < num_64x64; ++y) { - for (int x = 0; x < num_64x64; ++x) { - const int row = mi_row + y * MI_SIZE_64X64; - const int col = mi_col + x * MI_SIZE_64X64; - if (row >= row_end || col >= col_end) continue; - if ((row << MI_SIZE_LOG2) >= cm->height || - (col << MI_SIZE_LOG2) >= cm->width) - continue; - - LoopFilterMask *lfm = get_loop_filter_mask(cm, row, col); - if (lfm == NULL) return; - - // init mask to zero - if (plane == 0) { - av1_zero(lfm->left_y); - av1_zero(lfm->above_y); - av1_zero(lfm->lfl_y_ver); - av1_zero(lfm->lfl_y_hor); - } else if (plane == 1) { - av1_zero(lfm->left_u); - av1_zero(lfm->above_u); - av1_zero(lfm->lfl_u); - } else { - av1_zero(lfm->left_v); - av1_zero(lfm->above_v); - av1_zero(lfm->lfl_v); - } - } - } - - // set up bitmask for each superblock - setup_block_mask(cm, mi_row, mi_col, cm->seq_params.sb_size, plane, - subsampling_x, subsampling_y); - - for (int y = 0; y < num_64x64; ++y) { - for (int x = 0; x < num_64x64; ++x) { - const int row = mi_row + y * MI_SIZE_64X64; - const int col = mi_col + x * MI_SIZE_64X64; - if (row >= row_end || col >= col_end) continue; - if ((row << MI_SIZE_LOG2) >= cm->height || - (col << MI_SIZE_LOG2) >= cm->width) - continue; - - LoopFilterMask *lfm = get_loop_filter_mask(cm, row, col); - if (lfm == NULL) return; - - // check if the mask is valid - check_loop_filter_masks(lfm, plane); - - { - // Let 16x16 hold 32x32 (Y/U/V) and 64x64(Y only). - // Even tx size is greater, we only apply max length filter, which - // is 16. - if (plane == 0) { - for (int j = 0; j < 4; ++j) { - lfm->left_y[TX_16X16].bits[j] |= lfm->left_y[TX_32X32].bits[j]; - lfm->left_y[TX_16X16].bits[j] |= lfm->left_y[TX_64X64].bits[j]; - lfm->above_y[TX_16X16].bits[j] |= lfm->above_y[TX_32X32].bits[j]; - lfm->above_y[TX_16X16].bits[j] |= lfm->above_y[TX_64X64].bits[j]; - - // set 32x32 and 64x64 to 0 - lfm->left_y[TX_32X32].bits[j] = 0; - lfm->left_y[TX_64X64].bits[j] = 0; - lfm->above_y[TX_32X32].bits[j] = 0; - lfm->above_y[TX_64X64].bits[j] = 0; - } - } else if (plane == 1) { - for (int j = 0; j < 4; ++j) { - lfm->left_u[TX_16X16].bits[j] |= lfm->left_u[TX_32X32].bits[j]; - lfm->above_u[TX_16X16].bits[j] |= lfm->above_u[TX_32X32].bits[j]; - - // set 32x32 to 0 - lfm->left_u[TX_32X32].bits[j] = 0; - lfm->above_u[TX_32X32].bits[j] = 0; - } - } else { - for (int j = 0; j < 4; ++j) { - lfm->left_v[TX_16X16].bits[j] |= lfm->left_v[TX_32X32].bits[j]; - lfm->above_v[TX_16X16].bits[j] |= lfm->above_v[TX_32X32].bits[j]; - - // set 32x32 to 0 - lfm->left_v[TX_32X32].bits[j] = 0; - lfm->above_v[TX_32X32].bits[j] = 0; - } - } - } - - // check if the mask is valid - check_loop_filter_masks(lfm, plane); - } - } -} - -static void filter_selectively_vert_row2( - int subsampling_factor, uint8_t *s, int pitch, int plane, - uint64_t mask_16x16_0, uint64_t mask_8x8_0, uint64_t mask_4x4_0, - uint64_t mask_16x16_1, uint64_t mask_8x8_1, uint64_t mask_4x4_1, - const loop_filter_info_n *lfi_n, uint8_t *lfl, uint8_t *lfl2) { - uint64_t mask; - const int step = 1 << subsampling_factor; - - for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_16x16_1 | - mask_8x8_1 | mask_4x4_1; - mask; mask >>= step) { - const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; - const loop_filter_thresh *lfi1 = lfi_n->lfthr + *lfl2; - - if (mask & 1) { - if ((mask_16x16_0 | mask_16x16_1) & 1) { - // chroma plane filters less pixels introduced in deblock_13tap - // experiment - LpfFunc lpf_vertical = plane ? aom_lpf_vertical_6 : aom_lpf_vertical_14; - - if ((mask_16x16_0 & mask_16x16_1) & 1) { - if (plane) { - aom_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); - } else { - aom_lpf_vertical_14_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); - } - } else if (mask_16x16_0 & 1) { - lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr); - } else { - lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); - } - } - - if ((mask_8x8_0 | mask_8x8_1) & 1) { - // chroma plane filters less pixels introduced in deblock_13tap - // experiment - LpfFunc lpf_vertical = plane ? aom_lpf_vertical_6 : aom_lpf_vertical_8; - - if ((mask_8x8_0 & mask_8x8_1) & 1) { - if (plane) { - aom_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); - } else { - aom_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); - } - } else if (mask_8x8_0 & 1) { - lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr); - } else { - lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); - } - } - - if ((mask_4x4_0 | mask_4x4_1) & 1) { - if ((mask_4x4_0 & mask_4x4_1) & 1) { - aom_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); - } else if (mask_4x4_0 & 1) { - aom_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr); - } else { - aom_lpf_vertical_4(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); - } - } - } - - s += 4; - lfl += step; - lfl2 += step; - mask_16x16_0 >>= step; - mask_8x8_0 >>= step; - mask_4x4_0 >>= step; - mask_16x16_1 >>= step; - mask_8x8_1 >>= step; - mask_4x4_1 >>= step; - } -} - -static void highbd_filter_selectively_vert_row2( - int subsampling_factor, uint16_t *s, int pitch, int plane, - uint64_t mask_16x16_0, uint64_t mask_8x8_0, uint64_t mask_4x4_0, - uint64_t mask_16x16_1, uint64_t mask_8x8_1, uint64_t mask_4x4_1, - const loop_filter_info_n *lfi_n, uint8_t *lfl, uint8_t *lfl2, int bd) { - uint64_t mask; - const int step = 1 << subsampling_factor; - - for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_16x16_1 | - mask_8x8_1 | mask_4x4_1; - mask; mask >>= step) { - const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; - const loop_filter_thresh *lfi1 = lfi_n->lfthr + *lfl2; - - if (mask & 1) { - if ((mask_16x16_0 | mask_16x16_1) & 1) { - // chroma plane filters less pixels introduced in deblock_13tap - // experiment - HbdLpfFunc highbd_lpf_vertical = - plane ? aom_highbd_lpf_vertical_6 : aom_highbd_lpf_vertical_14; - - if ((mask_16x16_0 & mask_16x16_1) & 1) { - if (plane) { - aom_highbd_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, bd); - } else { - aom_highbd_lpf_vertical_14_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, bd); - } - } else if (mask_16x16_0 & 1) { - highbd_lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, - bd); - } else { - highbd_lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, bd); - } - } - - if ((mask_8x8_0 | mask_8x8_1) & 1) { - HbdLpfFunc highbd_lpf_vertical = - plane ? aom_highbd_lpf_vertical_6 : aom_highbd_lpf_vertical_8; - - if ((mask_8x8_0 & mask_8x8_1) & 1) { - if (plane) { - aom_highbd_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, bd); - } else { - aom_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, bd); - } - } else if (mask_8x8_0 & 1) { - highbd_lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, - bd); - } else { - highbd_lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, bd); - } - } - - if ((mask_4x4_0 | mask_4x4_1) & 1) { - if ((mask_4x4_0 & mask_4x4_1) & 1) { - aom_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, bd); - } else if (mask_4x4_0 & 1) { - aom_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, bd); - } else { - aom_highbd_lpf_vertical_4(s + 4 * pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, bd); - } - } - } - - s += 4; - lfl += step; - lfl2 += step; - mask_16x16_0 >>= step; - mask_8x8_0 >>= step; - mask_4x4_0 >>= step; - mask_16x16_1 >>= step; - mask_8x8_1 >>= step; - mask_4x4_1 >>= step; - } -} - -static void filter_selectively_horiz(uint8_t *s, int pitch, int plane, - int subsampling, uint64_t mask_16x16, - uint64_t mask_8x8, uint64_t mask_4x4, - const loop_filter_info_n *lfi_n, - const uint8_t *lfl) { - uint64_t mask; - int count; - const int step = 1 << subsampling; - const unsigned int two_block_mask = subsampling ? 5 : 3; - - for (mask = mask_16x16 | mask_8x8 | mask_4x4; mask; mask >>= step * count) { - const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; - // Next block's thresholds. - const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + step); - - count = 1; - if (mask & 1) { - if (mask_16x16 & 1) { - // chroma plane filters less pixels introduced in deblock_13tap - // experiment - LpfFunc lpf_horizontal = - plane ? aom_lpf_horizontal_6 : aom_lpf_horizontal_14; - - if ((mask_16x16 & two_block_mask) == two_block_mask) { - if (plane) { - aom_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, lfin->lim, - lfin->hev_thr); - } else { - aom_lpf_horizontal_14_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, lfin->lim, - lfin->hev_thr); - } - count = 2; - } else { - lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); - } - } else if (mask_8x8 & 1) { - // chroma plane filters less pixels introduced in deblock_13tap - // experiment - LpfFunc lpf_horizontal = - plane ? aom_lpf_horizontal_6 : aom_lpf_horizontal_8; - - if ((mask_8x8 & two_block_mask) == two_block_mask) { - if (plane) { - aom_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, lfin->lim, - lfin->hev_thr); - } else { - aom_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, lfin->lim, - lfin->hev_thr); - } - count = 2; - } else { - lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); - } - } else if (mask_4x4 & 1) { - if ((mask_4x4 & two_block_mask) == two_block_mask) { - aom_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, lfin->lim, - lfin->hev_thr); - count = 2; - } else { - aom_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); - } - } - } - - s += 4 * count; - lfl += step * count; - mask_16x16 >>= step * count; - mask_8x8 >>= step * count; - mask_4x4 >>= step * count; - } -} - -static void highbd_filter_selectively_horiz( - uint16_t *s, int pitch, int plane, int subsampling, uint64_t mask_16x16, - uint64_t mask_8x8, uint64_t mask_4x4, const loop_filter_info_n *lfi_n, - uint8_t *lfl, int bd) { - uint64_t mask; - int count; - const int step = 1 << subsampling; - const unsigned int two_block_mask = subsampling ? 5 : 3; - - for (mask = mask_16x16 | mask_8x8 | mask_4x4; mask; mask >>= step * count) { - const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; - // Next block's thresholds. - const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + step); - - count = 1; - if (mask & 1) { - if (mask_16x16 & 1) { - HbdLpfFunc highbd_lpf_horizontal = - plane ? aom_highbd_lpf_horizontal_6 : aom_highbd_lpf_horizontal_14; - - if ((mask_16x16 & two_block_mask) == two_block_mask) { - if (plane) { - aom_highbd_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, - lfin->lim, lfin->hev_thr, bd); - } else { - aom_highbd_lpf_horizontal_14_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, - lfin->lim, lfin->hev_thr, bd); - } - count = 2; - } else { - highbd_lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, - bd); - } - } else if (mask_8x8 & 1) { - HbdLpfFunc highbd_lpf_horizontal = - plane ? aom_highbd_lpf_horizontal_6 : aom_highbd_lpf_horizontal_8; - - if ((mask_8x8 & two_block_mask) == two_block_mask) { - if (plane) { - aom_highbd_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, - lfin->lim, lfin->hev_thr, bd); - } else { - aom_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, - lfin->lim, lfin->hev_thr, bd); - } - count = 2; - } else { - highbd_lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, - bd); - } - } else if (mask_4x4 & 1) { - if ((mask_4x4 & two_block_mask) == two_block_mask) { - aom_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, lfin->lim, - lfin->hev_thr, bd); - count = 2; - } else { - aom_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - } - } - } - - s += 4 * count; - lfl += step * count; - mask_16x16 >>= step * count; - mask_8x8 >>= step * count; - mask_4x4 >>= step * count; - } -} - -void av1_build_bitmask_vert_info( - AV1_COMMON *const cm, const struct macroblockd_plane *const plane_ptr, - int plane) { - const int subsampling_x = plane_ptr->subsampling_x; - const int subsampling_y = plane_ptr->subsampling_y; - const int row_step = (MI_SIZE >> MI_SIZE_LOG2); - const int is_uv = plane > 0; - TX_SIZE tx_size = TX_16X16, prev_tx_size = TX_16X16; - uint8_t level, prev_level = 1; - int skip, prev_skip = 0; - int is_coding_block_border; - - for (int r = 0; (r << MI_SIZE_LOG2) < plane_ptr->dst.height; r += row_step) { - const int mi_row = r << subsampling_y; - const int row = mi_row % MI_SIZE_64X64; - int index = 0; - const int shift = get_index_shift(0, row, &index); - - for (int c = 0; (c << MI_SIZE_LOG2) < plane_ptr->dst.width; - c += (tx_size_wide_unit[TX_64X64] >> subsampling_x)) { - const int mi_col = c << subsampling_x; - LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col); - - for (int col_in_unit = 0; - col_in_unit < (tx_size_wide_unit[TX_64X64] >> subsampling_x);) { - const int x = (c + col_in_unit) << MI_SIZE_LOG2; - if (x >= plane_ptr->dst.width) break; - const int col = col_in_unit << subsampling_x; - const uint64_t mask = ((uint64_t)1 << (shift | col)); - skip = lfm->skip.bits[index] & mask; - is_coding_block_border = lfm->is_vert_border.bits[index] & mask; - switch (plane) { - case 0: level = lfm->lfl_y_ver[row][col]; break; - case 1: level = lfm->lfl_u[row][col]; break; - case 2: level = lfm->lfl_v[row][col]; break; - default: assert(plane >= 0 && plane <= 2); return; - } - for (TX_SIZE ts = TX_4X4; ts <= TX_64X64; ++ts) { - if (is_uv && ts == TX_64X64) continue; - if (lfm->tx_size_ver[is_uv][ts].bits[index] & mask) { - tx_size = ts; - break; - } - } - if ((c + col_in_unit > 0) && (level || prev_level) && - (!prev_skip || !skip || is_coding_block_border)) { - const TX_SIZE min_tx_size = - AOMMIN(TX_16X16, AOMMIN(tx_size, prev_tx_size)); - const int tmp_row = (mi_row | subsampling_y) % MI_SIZE_64X64; - const int tmp_col = (col | subsampling_x) % MI_SIZE_64X64; - const int shift_1 = get_index_shift(tmp_col, tmp_row, &index); - const uint64_t mask_1 = ((uint64_t)1 << shift_1); - switch (plane) { - case 0: lfm->left_y[min_tx_size].bits[index] |= mask_1; break; - case 1: lfm->left_u[min_tx_size].bits[index] |= mask_1; break; - case 2: lfm->left_v[min_tx_size].bits[index] |= mask_1; break; - default: assert(plane >= 0 && plane <= 2); return; - } - } - - // update prev info - prev_level = level; - prev_skip = skip; - prev_tx_size = tx_size; - // advance - col_in_unit += tx_size_wide_unit[tx_size]; - } - } - } -} - -void av1_build_bitmask_horz_info( - AV1_COMMON *const cm, const struct macroblockd_plane *const plane_ptr, - int plane) { - const int subsampling_x = plane_ptr->subsampling_x; - const int subsampling_y = plane_ptr->subsampling_y; - const int col_step = (MI_SIZE >> MI_SIZE_LOG2); - const int is_uv = plane > 0; - TX_SIZE tx_size = TX_16X16, prev_tx_size = TX_16X16; - uint8_t level, prev_level = 1; - int skip, prev_skip = 0; - int is_coding_block_border; - - for (int c = 0; (c << MI_SIZE_LOG2) < plane_ptr->dst.width; c += col_step) { - const int mi_col = c << subsampling_x; - const int col = mi_col % MI_SIZE_64X64; - - for (int r = 0; (r << MI_SIZE_LOG2) < plane_ptr->dst.height; - r += (tx_size_high_unit[TX_64X64] >> subsampling_y)) { - const int mi_row = r << subsampling_y; - LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col); - - for (int r_in_unit = 0; - r_in_unit < (tx_size_high_unit[TX_64X64] >> subsampling_y);) { - const int y = (r + r_in_unit) << MI_SIZE_LOG2; - if (y >= plane_ptr->dst.height) break; - const int row = r_in_unit << subsampling_y; - int index = 0; - const int shift = get_index_shift(col, row, &index); - const uint64_t mask = ((uint64_t)1 << shift); - skip = lfm->skip.bits[index] & mask; - is_coding_block_border = lfm->is_horz_border.bits[index] & mask; - switch (plane) { - case 0: level = lfm->lfl_y_hor[row][col]; break; - case 1: level = lfm->lfl_u[row][col]; break; - case 2: level = lfm->lfl_v[row][col]; break; - default: assert(plane >= 0 && plane <= 2); return; - } - for (TX_SIZE ts = TX_4X4; ts <= TX_64X64; ++ts) { - if (is_uv && ts == TX_64X64) continue; - if (lfm->tx_size_hor[is_uv][ts].bits[index] & mask) { - tx_size = ts; - break; - } - } - if ((r + r_in_unit > 0) && (level || prev_level) && - (!prev_skip || !skip || is_coding_block_border)) { - const TX_SIZE min_tx_size = - AOMMIN(TX_16X16, AOMMIN(tx_size, prev_tx_size)); - const int tmp_row = (row | subsampling_y) % MI_SIZE_64X64; - const int tmp_col = (mi_col | subsampling_x) % MI_SIZE_64X64; - const int shift_1 = get_index_shift(tmp_col, tmp_row, &index); - const uint64_t mask_1 = ((uint64_t)1 << shift_1); - - switch (plane) { - case 0: lfm->above_y[min_tx_size].bits[index] |= mask_1; break; - case 1: lfm->above_u[min_tx_size].bits[index] |= mask_1; break; - case 2: lfm->above_v[min_tx_size].bits[index] |= mask_1; break; - default: assert(plane >= 0 && plane <= 2); return; - } - } - - // update prev info - prev_level = level; - prev_skip = skip; - prev_tx_size = tx_size; - // advance - r_in_unit += tx_size_high_unit[tx_size]; - } - } - } -} - -void av1_filter_block_plane_bitmask_vert( - AV1_COMMON *const cm, struct macroblockd_plane *const plane_ptr, int pl, - int mi_row, int mi_col) { - struct buf_2d *const dst = &plane_ptr->dst; - uint8_t *const buf0 = dst->buf; - const int ssx = plane_ptr->subsampling_x; - const int ssy = plane_ptr->subsampling_y; - const int mask_cutoff = 0xffff; - const int row_step = 1 << ssy; - const int two_row_step = 2 << ssy; - const int row_stride = dst->stride << MI_SIZE_LOG2; - const int two_row_stride = row_stride << 1; - uint64_t mask_16x16 = 0; - uint64_t mask_8x8 = 0; - uint64_t mask_4x4 = 0; - uint8_t *lfl; - uint8_t *lfl2; - LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col); - assert(lfm); - - // 1. vertical filtering. filter two rows at a time - for (int r = 0; - ((mi_row + r) << MI_SIZE_LOG2) < cm->height && r < MI_SIZE_64X64; - r += two_row_step) { - const int row = r | ssy; - const int row_next = row + row_step; - const int col = ssx; - int index = 0; - const int shift = get_index_shift(col, row, &index); - int index_next = 0; - const int shift_next = get_index_shift(col, row_next, &index_next); - switch (pl) { - case 0: - mask_16x16 = lfm->left_y[TX_16X16].bits[index]; - mask_8x8 = lfm->left_y[TX_8X8].bits[index]; - mask_4x4 = lfm->left_y[TX_4X4].bits[index]; - lfl = &lfm->lfl_y_ver[row][col]; - lfl2 = &lfm->lfl_y_ver[row_next][col]; - break; - case 1: - mask_16x16 = lfm->left_u[TX_16X16].bits[index]; - mask_8x8 = lfm->left_u[TX_8X8].bits[index]; - mask_4x4 = lfm->left_u[TX_4X4].bits[index]; - lfl = &lfm->lfl_u[row][col]; - lfl2 = &lfm->lfl_u[row_next][col]; - break; - case 2: - mask_16x16 = lfm->left_v[TX_16X16].bits[index]; - mask_8x8 = lfm->left_v[TX_8X8].bits[index]; - mask_4x4 = lfm->left_v[TX_4X4].bits[index]; - lfl = &lfm->lfl_v[row][col]; - lfl2 = &lfm->lfl_v[row_next][col]; - break; - default: assert(pl >= 0 && pl <= 2); return; - } - uint64_t mask_16x16_0 = (mask_16x16 >> shift) & mask_cutoff; - uint64_t mask_8x8_0 = (mask_8x8 >> shift) & mask_cutoff; - uint64_t mask_4x4_0 = (mask_4x4 >> shift) & mask_cutoff; - uint64_t mask_16x16_1 = (mask_16x16 >> shift_next) & mask_cutoff; - uint64_t mask_8x8_1 = (mask_8x8 >> shift_next) & mask_cutoff; - uint64_t mask_4x4_1 = (mask_4x4 >> shift_next) & mask_cutoff; - - if (cm->seq_params.use_highbitdepth) - highbd_filter_selectively_vert_row2( - ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0, - mask_8x8_0, mask_4x4_0, mask_16x16_1, mask_8x8_1, mask_4x4_1, - &cm->lf_info, lfl, lfl2, (int)cm->seq_params.bit_depth); - else - filter_selectively_vert_row2( - ssx, dst->buf, dst->stride, pl, mask_16x16_0, mask_8x8_0, mask_4x4_0, - mask_16x16_1, mask_8x8_1, mask_4x4_1, &cm->lf_info, lfl, lfl2); - dst->buf += two_row_stride; - } - // reset buf pointer for horizontal filtering - dst->buf = buf0; -} - -void av1_filter_block_plane_bitmask_horz( - AV1_COMMON *const cm, struct macroblockd_plane *const plane_ptr, int pl, - int mi_row, int mi_col) { - struct buf_2d *const dst = &plane_ptr->dst; - uint8_t *const buf0 = dst->buf; - const int ssx = plane_ptr->subsampling_x; - const int ssy = plane_ptr->subsampling_y; - const int mask_cutoff = 0xffff; - const int row_step = 1 << ssy; - const int row_stride = dst->stride << MI_SIZE_LOG2; - uint64_t mask_16x16 = 0; - uint64_t mask_8x8 = 0; - uint64_t mask_4x4 = 0; - uint8_t *lfl; - LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col); - assert(lfm); - for (int r = 0; - ((mi_row + r) << MI_SIZE_LOG2) < cm->height && r < MI_SIZE_64X64; - r += row_step) { - if (mi_row + r == 0) { - dst->buf += row_stride; - continue; - } - const int row = r | ssy; - const int col = ssx; - int index = 0; - const int shift = get_index_shift(col, row, &index); - switch (pl) { - case 0: - mask_16x16 = lfm->above_y[TX_16X16].bits[index]; - mask_8x8 = lfm->above_y[TX_8X8].bits[index]; - mask_4x4 = lfm->above_y[TX_4X4].bits[index]; - lfl = &lfm->lfl_y_hor[row][col]; - break; - case 1: - mask_16x16 = lfm->above_u[TX_16X16].bits[index]; - mask_8x8 = lfm->above_u[TX_8X8].bits[index]; - mask_4x4 = lfm->above_u[TX_4X4].bits[index]; - lfl = &lfm->lfl_u[row][col]; - break; - case 2: - mask_16x16 = lfm->above_v[TX_16X16].bits[index]; - mask_8x8 = lfm->above_v[TX_8X8].bits[index]; - mask_4x4 = lfm->above_v[TX_4X4].bits[index]; - lfl = &lfm->lfl_v[row][col]; - break; - default: assert(pl >= 0 && pl <= 2); return; - } - mask_16x16 = (mask_16x16 >> shift) & mask_cutoff; - mask_8x8 = (mask_8x8 >> shift) & mask_cutoff; - mask_4x4 = (mask_4x4 >> shift) & mask_cutoff; - - if (cm->seq_params.use_highbitdepth) - highbd_filter_selectively_horiz( - CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, ssx, mask_16x16, - mask_8x8, mask_4x4, &cm->lf_info, lfl, (int)cm->seq_params.bit_depth); - else - filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16, - mask_8x8, mask_4x4, &cm->lf_info, lfl); - dst->buf += row_stride; - } - // reset buf pointer for next block - dst->buf = buf0; -} - -void av1_filter_block_plane_ver(AV1_COMMON *const cm, - struct macroblockd_plane *const plane_ptr, - int pl, int mi_row, int mi_col) { - struct buf_2d *const dst = &plane_ptr->dst; - int r, c; - const int ssx = plane_ptr->subsampling_x; - const int ssy = plane_ptr->subsampling_y; - const int mask_cutoff = 0xffff; - const int single_step = 1 << ssy; - const int r_step = 2 << ssy; - uint64_t mask_16x16 = 0; - uint64_t mask_8x8 = 0; - uint64_t mask_4x4 = 0; - uint8_t *lfl; - uint8_t *lfl2; - - // filter two rows at a time - for (r = 0; r < cm->seq_params.mib_size && - ((mi_row + r) << MI_SIZE_LOG2 < cm->height); - r += r_step) { - for (c = 0; c < cm->seq_params.mib_size && - ((mi_col + c) << MI_SIZE_LOG2 < cm->width); - c += MI_SIZE_64X64) { - dst->buf += ((c << MI_SIZE_LOG2) >> ssx); - LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row + r, mi_col + c); - assert(lfm); - const int row = ((mi_row + r) | ssy) % MI_SIZE_64X64; - const int col = ((mi_col + c) | ssx) % MI_SIZE_64X64; - int index = 0; - const int shift = get_index_shift(col, row, &index); - // current and next row should belong to the same mask_idx and index - // next row's shift - const int row_next = row + single_step; - int index_next = 0; - const int shift_next = get_index_shift(col, row_next, &index_next); - switch (pl) { - case 0: - mask_16x16 = lfm->left_y[TX_16X16].bits[index]; - mask_8x8 = lfm->left_y[TX_8X8].bits[index]; - mask_4x4 = lfm->left_y[TX_4X4].bits[index]; - lfl = &lfm->lfl_y_ver[row][col]; - lfl2 = &lfm->lfl_y_ver[row_next][col]; - break; - case 1: - mask_16x16 = lfm->left_u[TX_16X16].bits[index]; - mask_8x8 = lfm->left_u[TX_8X8].bits[index]; - mask_4x4 = lfm->left_u[TX_4X4].bits[index]; - lfl = &lfm->lfl_u[row][col]; - lfl2 = &lfm->lfl_u[row_next][col]; - break; - case 2: - mask_16x16 = lfm->left_v[TX_16X16].bits[index]; - mask_8x8 = lfm->left_v[TX_8X8].bits[index]; - mask_4x4 = lfm->left_v[TX_4X4].bits[index]; - lfl = &lfm->lfl_v[row][col]; - lfl2 = &lfm->lfl_v[row_next][col]; - break; - default: assert(pl >= 0 && pl <= 2); return; - } - uint64_t mask_16x16_0 = (mask_16x16 >> shift) & mask_cutoff; - uint64_t mask_8x8_0 = (mask_8x8 >> shift) & mask_cutoff; - uint64_t mask_4x4_0 = (mask_4x4 >> shift) & mask_cutoff; - uint64_t mask_16x16_1 = (mask_16x16 >> shift_next) & mask_cutoff; - uint64_t mask_8x8_1 = (mask_8x8 >> shift_next) & mask_cutoff; - uint64_t mask_4x4_1 = (mask_4x4 >> shift_next) & mask_cutoff; - - if (cm->seq_params.use_highbitdepth) - highbd_filter_selectively_vert_row2( - ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0, - mask_8x8_0, mask_4x4_0, mask_16x16_1, mask_8x8_1, mask_4x4_1, - &cm->lf_info, lfl, lfl2, (int)cm->seq_params.bit_depth); - else - filter_selectively_vert_row2(ssx, dst->buf, dst->stride, pl, - mask_16x16_0, mask_8x8_0, mask_4x4_0, - mask_16x16_1, mask_8x8_1, mask_4x4_1, - &cm->lf_info, lfl, lfl2); - dst->buf -= ((c << MI_SIZE_LOG2) >> ssx); - } - dst->buf += 2 * MI_SIZE * dst->stride; - } -} - -void av1_filter_block_plane_hor(AV1_COMMON *const cm, - struct macroblockd_plane *const plane_ptr, - int pl, int mi_row, int mi_col) { - struct buf_2d *const dst = &plane_ptr->dst; - int r, c; - const int ssx = plane_ptr->subsampling_x; - const int ssy = plane_ptr->subsampling_y; - const int mask_cutoff = 0xffff; - const int r_step = 1 << ssy; - uint64_t mask_16x16 = 0; - uint64_t mask_8x8 = 0; - uint64_t mask_4x4 = 0; - uint8_t *lfl; - - for (r = 0; r < cm->seq_params.mib_size && - ((mi_row + r) << MI_SIZE_LOG2 < cm->height); - r += r_step) { - for (c = 0; c < cm->seq_params.mib_size && - ((mi_col + c) << MI_SIZE_LOG2 < cm->width); - c += MI_SIZE_64X64) { - if (mi_row + r == 0) continue; - - dst->buf += ((c << MI_SIZE_LOG2) >> ssx); - LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row + r, mi_col + c); - assert(lfm); - const int row = ((mi_row + r) | ssy) % MI_SIZE_64X64; - const int col = ((mi_col + c) | ssx) % MI_SIZE_64X64; - int index = 0; - const int shift = get_index_shift(col, row, &index); - switch (pl) { - case 0: - mask_16x16 = lfm->above_y[TX_16X16].bits[index]; - mask_8x8 = lfm->above_y[TX_8X8].bits[index]; - mask_4x4 = lfm->above_y[TX_4X4].bits[index]; - lfl = &lfm->lfl_y_hor[row][col]; - break; - case 1: - mask_16x16 = lfm->above_u[TX_16X16].bits[index]; - mask_8x8 = lfm->above_u[TX_8X8].bits[index]; - mask_4x4 = lfm->above_u[TX_4X4].bits[index]; - lfl = &lfm->lfl_u[row][col]; - break; - case 2: - mask_16x16 = lfm->above_v[TX_16X16].bits[index]; - mask_8x8 = lfm->above_v[TX_8X8].bits[index]; - mask_4x4 = lfm->above_v[TX_4X4].bits[index]; - lfl = &lfm->lfl_v[row][col]; - break; - default: assert(pl >= 0 && pl <= 2); return; - } - mask_16x16 = (mask_16x16 >> shift) & mask_cutoff; - mask_8x8 = (mask_8x8 >> shift) & mask_cutoff; - mask_4x4 = (mask_4x4 >> shift) & mask_cutoff; - - if (cm->seq_params.use_highbitdepth) - highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, pl, ssx, mask_16x16, - mask_8x8, mask_4x4, &cm->lf_info, lfl, - (int)cm->seq_params.bit_depth); - else - filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16, - mask_8x8, mask_4x4, &cm->lf_info, lfl); - dst->buf -= ((c << MI_SIZE_LOG2) >> ssx); - } - dst->buf += MI_SIZE * dst->stride; - } -} -#endif // LOOP_FILTER_BITMASK - -static TX_SIZE get_transform_size(const MACROBLOCKD *const xd, - const MB_MODE_INFO *const mbmi, - const EDGE_DIR edge_dir, const int mi_row, - const int mi_col, const int plane, - const struct macroblockd_plane *plane_ptr) { - assert(mbmi != NULL); - if (xd && xd->lossless[mbmi->segment_id]) return TX_4X4; - - TX_SIZE tx_size = - (plane == AOM_PLANE_Y) - ? mbmi->tx_size - : av1_get_max_uv_txsize(mbmi->sb_type, plane_ptr->subsampling_x, - plane_ptr->subsampling_y); - assert(tx_size < TX_SIZES_ALL); - if ((plane == AOM_PLANE_Y) && is_inter_block(mbmi) && !mbmi->skip) { - const BLOCK_SIZE sb_type = mbmi->sb_type; - const int blk_row = mi_row & (mi_size_high[sb_type] - 1); - const int blk_col = mi_col & (mi_size_wide[sb_type] - 1); - const TX_SIZE mb_tx_size = - mbmi->inter_tx_size[av1_get_txb_size_index(sb_type, blk_row, blk_col)]; - assert(mb_tx_size < TX_SIZES_ALL); - tx_size = mb_tx_size; - } - - // since in case of chrominance or non-square transorm need to convert - // transform size into transform size in particular direction. - // for vertical edge, filter direction is horizontal, for horizontal - // edge, filter direction is vertical. - tx_size = (VERT_EDGE == edge_dir) ? txsize_horz_map[tx_size] - : txsize_vert_map[tx_size]; - return tx_size; -} - -typedef struct AV1_DEBLOCKING_PARAMETERS { - // length of the filter applied to the outer edge - uint32_t filter_length; - // deblocking limits - const uint8_t *lim; - const uint8_t *mblim; - const uint8_t *hev_thr; -} AV1_DEBLOCKING_PARAMETERS; - -// Return TX_SIZE from get_transform_size(), so it is plane and direction -// awared -static TX_SIZE set_lpf_parameters( - AV1_DEBLOCKING_PARAMETERS *const params, const ptrdiff_t mode_step, - const AV1_COMMON *const cm, const MACROBLOCKD *const xd, - const EDGE_DIR edge_dir, const uint32_t x, const uint32_t y, - const int plane, const struct macroblockd_plane *const plane_ptr) { - // reset to initial values - params->filter_length = 0; - - // no deblocking is required - const uint32_t width = plane_ptr->dst.width; - const uint32_t height = plane_ptr->dst.height; - if ((width <= x) || (height <= y)) { - // just return the smallest transform unit size - return TX_4X4; - } - - const uint32_t scale_horz = plane_ptr->subsampling_x; - const uint32_t scale_vert = plane_ptr->subsampling_y; - // for sub8x8 block, chroma prediction mode is obtained from the bottom/right - // mi structure of the co-located 8x8 luma block. so for chroma plane, mi_row - // and mi_col should map to the bottom/right mi structure, i.e, both mi_row - // and mi_col should be odd number for chroma plane. - const int mi_row = scale_vert | ((y << scale_vert) >> MI_SIZE_LOG2); - const int mi_col = scale_horz | ((x << scale_horz) >> MI_SIZE_LOG2); - MB_MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col; - const MB_MODE_INFO *mbmi = mi[0]; - // If current mbmi is not correctly setup, return an invalid value to stop - // filtering. One example is that if this tile is not coded, then its mbmi - // it not set up. - if (mbmi == NULL) return TX_INVALID; - - const TX_SIZE ts = - get_transform_size(xd, mi[0], edge_dir, mi_row, mi_col, plane, plane_ptr); - - { - const uint32_t coord = (VERT_EDGE == edge_dir) ? (x) : (y); - const uint32_t transform_masks = - edge_dir == VERT_EDGE ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1; - const int32_t tu_edge = (coord & transform_masks) ? (0) : (1); - - if (!tu_edge) return ts; - - // prepare outer edge parameters. deblock the edge if it's an edge of a TU - { - const uint32_t curr_level = - get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi); - const int curr_skipped = mbmi->skip && is_inter_block(mbmi); - uint32_t level = curr_level; - if (coord) { - { - const MB_MODE_INFO *const mi_prev = *(mi - mode_step); - if (mi_prev == NULL) return TX_INVALID; - const int pv_row = - (VERT_EDGE == edge_dir) ? (mi_row) : (mi_row - (1 << scale_vert)); - const int pv_col = - (VERT_EDGE == edge_dir) ? (mi_col - (1 << scale_horz)) : (mi_col); - const TX_SIZE pv_ts = get_transform_size( - xd, mi_prev, edge_dir, pv_row, pv_col, plane, plane_ptr); - - const uint32_t pv_lvl = - get_filter_level(cm, &cm->lf_info, edge_dir, plane, mi_prev); - - const int pv_skip = mi_prev->skip && is_inter_block(mi_prev); - const BLOCK_SIZE bsize = - get_plane_block_size(mbmi->sb_type, plane_ptr->subsampling_x, - plane_ptr->subsampling_y); - const int prediction_masks = edge_dir == VERT_EDGE - ? block_size_wide[bsize] - 1 - : block_size_high[bsize] - 1; - const int32_t pu_edge = !(coord & prediction_masks); - // if the current and the previous blocks are skipped, - // deblock the edge if the edge belongs to a PU's edge only. - if ((curr_level || pv_lvl) && - (!pv_skip || !curr_skipped || pu_edge)) { - const TX_SIZE min_ts = AOMMIN(ts, pv_ts); - if (TX_4X4 >= min_ts) { - params->filter_length = 4; - } else if (TX_8X8 == min_ts) { - if (plane != 0) - params->filter_length = 6; - else - params->filter_length = 8; - } else { - params->filter_length = 14; - // No wide filtering for chroma plane - if (plane != 0) { - params->filter_length = 6; - } - } - - // update the level if the current block is skipped, - // but the previous one is not - level = (curr_level) ? (curr_level) : (pv_lvl); - } - } - } - // prepare common parameters - if (params->filter_length) { - const loop_filter_thresh *const limits = cm->lf_info.lfthr + level; - params->lim = limits->lim; - params->mblim = limits->mblim; - params->hev_thr = limits->hev_thr; - } - } - } - - return ts; -} - -void av1_filter_block_plane_vert(const AV1_COMMON *const cm, - const MACROBLOCKD *const xd, const int plane, - const MACROBLOCKD_PLANE *const plane_ptr, - const uint32_t mi_row, const uint32_t mi_col) { - const int row_step = MI_SIZE >> MI_SIZE_LOG2; - const uint32_t scale_horz = plane_ptr->subsampling_x; - const uint32_t scale_vert = plane_ptr->subsampling_y; - uint8_t *const dst_ptr = plane_ptr->dst.buf; - const int dst_stride = plane_ptr->dst.stride; - const int y_range = (MAX_MIB_SIZE >> scale_vert); - const int x_range = (MAX_MIB_SIZE >> scale_horz); - const int use_highbitdepth = cm->seq_params.use_highbitdepth; - const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth; - for (int y = 0; y < y_range; y += row_step) { - uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride; - for (int x = 0; x < x_range;) { - // inner loop always filter vertical edges in a MI block. If MI size - // is 8x8, it will filter the vertical edge aligned with a 8x8 block. - // If 4x4 trasnform is used, it will then filter the internal edge - // aligned with a 4x4 block - const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE; - const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE; - uint32_t advance_units; - TX_SIZE tx_size; - AV1_DEBLOCKING_PARAMETERS params; - memset(¶ms, 0, sizeof(params)); - - tx_size = - set_lpf_parameters(¶ms, ((ptrdiff_t)1 << scale_horz), cm, xd, - VERT_EDGE, curr_x, curr_y, plane, plane_ptr); - if (tx_size == TX_INVALID) { - params.filter_length = 0; - tx_size = TX_4X4; - } - - switch (params.filter_length) { - // apply 4-tap filtering - case 4: - if (use_highbitdepth) - aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(p), dst_stride, - params.mblim, params.lim, params.hev_thr, - bit_depth); - else - aom_lpf_vertical_4(p, dst_stride, params.mblim, params.lim, - params.hev_thr); - break; - case 6: // apply 6-tap filter for chroma plane only - assert(plane != 0); - if (use_highbitdepth) - aom_highbd_lpf_vertical_6(CONVERT_TO_SHORTPTR(p), dst_stride, - params.mblim, params.lim, params.hev_thr, - bit_depth); - else - aom_lpf_vertical_6(p, dst_stride, params.mblim, params.lim, - params.hev_thr); - break; - // apply 8-tap filtering - case 8: - if (use_highbitdepth) - aom_highbd_lpf_vertical_8(CONVERT_TO_SHORTPTR(p), dst_stride, - params.mblim, params.lim, params.hev_thr, - bit_depth); - else - aom_lpf_vertical_8(p, dst_stride, params.mblim, params.lim, - params.hev_thr); - break; - // apply 14-tap filtering - case 14: - if (use_highbitdepth) - aom_highbd_lpf_vertical_14(CONVERT_TO_SHORTPTR(p), dst_stride, - params.mblim, params.lim, params.hev_thr, - bit_depth); - else - aom_lpf_vertical_14(p, dst_stride, params.mblim, params.lim, - params.hev_thr); - break; - // no filtering - default: break; - } - // advance the destination pointer - advance_units = tx_size_wide_unit[tx_size]; - x += advance_units; - p += advance_units * MI_SIZE; - } - } -} - -void av1_filter_block_plane_horz(const AV1_COMMON *const cm, - const MACROBLOCKD *const xd, const int plane, - const MACROBLOCKD_PLANE *const plane_ptr, - const uint32_t mi_row, const uint32_t mi_col) { - const int col_step = MI_SIZE >> MI_SIZE_LOG2; - const uint32_t scale_horz = plane_ptr->subsampling_x; - const uint32_t scale_vert = plane_ptr->subsampling_y; - uint8_t *const dst_ptr = plane_ptr->dst.buf; - const int dst_stride = plane_ptr->dst.stride; - const int y_range = (MAX_MIB_SIZE >> scale_vert); - const int x_range = (MAX_MIB_SIZE >> scale_horz); - const int use_highbitdepth = cm->seq_params.use_highbitdepth; - const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth; - for (int x = 0; x < x_range; x += col_step) { - uint8_t *p = dst_ptr + x * MI_SIZE; - for (int y = 0; y < y_range;) { - // inner loop always filter vertical edges in a MI block. If MI size - // is 8x8, it will first filter the vertical edge aligned with a 8x8 - // block. If 4x4 trasnform is used, it will then filter the internal - // edge aligned with a 4x4 block - const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE; - const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE; - uint32_t advance_units; - TX_SIZE tx_size; - AV1_DEBLOCKING_PARAMETERS params; - memset(¶ms, 0, sizeof(params)); - - tx_size = - set_lpf_parameters(¶ms, (cm->mi_stride << scale_vert), cm, xd, - HORZ_EDGE, curr_x, curr_y, plane, plane_ptr); - if (tx_size == TX_INVALID) { - params.filter_length = 0; - tx_size = TX_4X4; - } - - switch (params.filter_length) { - // apply 4-tap filtering - case 4: - if (use_highbitdepth) - aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(p), dst_stride, - params.mblim, params.lim, - params.hev_thr, bit_depth); - else - aom_lpf_horizontal_4(p, dst_stride, params.mblim, params.lim, - params.hev_thr); - break; - // apply 6-tap filtering - case 6: - assert(plane != 0); - if (use_highbitdepth) - aom_highbd_lpf_horizontal_6(CONVERT_TO_SHORTPTR(p), dst_stride, - params.mblim, params.lim, - params.hev_thr, bit_depth); - else - aom_lpf_horizontal_6(p, dst_stride, params.mblim, params.lim, - params.hev_thr); - break; - // apply 8-tap filtering - case 8: - if (use_highbitdepth) - aom_highbd_lpf_horizontal_8(CONVERT_TO_SHORTPTR(p), dst_stride, - params.mblim, params.lim, - params.hev_thr, bit_depth); - else - aom_lpf_horizontal_8(p, dst_stride, params.mblim, params.lim, - params.hev_thr); - break; - // apply 14-tap filtering - case 14: - if (use_highbitdepth) - aom_highbd_lpf_horizontal_14(CONVERT_TO_SHORTPTR(p), dst_stride, - params.mblim, params.lim, - params.hev_thr, bit_depth); - else - aom_lpf_horizontal_14(p, dst_stride, params.mblim, params.lim, - params.hev_thr); - break; - // no filtering - default: break; - } - - // advance the destination pointer - advance_units = tx_size_high_unit[tx_size]; - y += advance_units; - p += advance_units * dst_stride * MI_SIZE; - } - } -} - -static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm, - MACROBLOCKD *xd, int start, int stop, -#if LOOP_FILTER_BITMASK - int is_decoding, -#endif - int plane_start, int plane_end) { - struct macroblockd_plane *pd = xd->plane; - const int col_start = 0; - const int col_end = cm->mi_cols; - int mi_row, mi_col; - int plane; - -#if LOOP_FILTER_BITMASK - if (is_decoding) { - for (plane = plane_start; plane < plane_end; plane++) { - if (plane == 0 && !(cm->lf.filter_level[0]) && !(cm->lf.filter_level[1])) - break; - else if (plane == 1 && !(cm->lf.filter_level_u)) - continue; - else if (plane == 2 && !(cm->lf.filter_level_v)) - continue; - - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, 0, 0, - plane, plane + 1); - av1_build_bitmask_vert_info(cm, &pd[plane], plane); - av1_build_bitmask_horz_info(cm, &pd[plane], plane); - - // apply loop filtering which only goes through buffer once - for (mi_row = start; mi_row < stop; mi_row += MI_SIZE_64X64) { - for (mi_col = col_start; mi_col < col_end; mi_col += MI_SIZE_64X64) { - av1_setup_dst_planes(pd, MI_SIZE_64X64, frame_buffer, mi_row, mi_col, - plane, plane + 1); - av1_filter_block_plane_bitmask_vert(cm, &pd[plane], plane, mi_row, - mi_col); - if (mi_col - MI_SIZE_64X64 >= 0) { - av1_setup_dst_planes(pd, MI_SIZE_64X64, frame_buffer, mi_row, - mi_col - MI_SIZE_64X64, plane, plane + 1); - av1_filter_block_plane_bitmask_horz(cm, &pd[plane], plane, mi_row, - mi_col - MI_SIZE_64X64); - } - } - av1_setup_dst_planes(pd, MI_SIZE_64X64, frame_buffer, mi_row, - mi_col - MI_SIZE_64X64, plane, plane + 1); - av1_filter_block_plane_bitmask_horz(cm, &pd[plane], plane, mi_row, - mi_col - MI_SIZE_64X64); - } - } - return; - } -#endif - - for (plane = plane_start; plane < plane_end; plane++) { - if (plane == 0 && !(cm->lf.filter_level[0]) && !(cm->lf.filter_level[1])) - break; - else if (plane == 1 && !(cm->lf.filter_level_u)) - continue; - else if (plane == 2 && !(cm->lf.filter_level_v)) - continue; - -#if LOOP_FILTER_BITMASK - // filter all vertical edges every superblock (could be 128x128 or 64x64) - for (mi_row = start; mi_row < stop; mi_row += cm->seq_params.mib_size) { - for (mi_col = col_start; mi_col < col_end; - mi_col += cm->seq_params.mib_size) { - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row, - mi_col, plane, plane + 1); - - av1_setup_bitmask(cm, mi_row, mi_col, plane, pd[plane].subsampling_x, - pd[plane].subsampling_y, stop, col_end); - av1_filter_block_plane_ver(cm, &pd[plane], plane, mi_row, mi_col); - } - } - - // filter all horizontal edges every superblock - for (mi_row = start; mi_row < stop; mi_row += cm->seq_params.mib_size) { - for (mi_col = col_start; mi_col < col_end; - mi_col += cm->seq_params.mib_size) { - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row, - mi_col, plane, plane + 1); - - av1_filter_block_plane_hor(cm, &pd[plane], plane, mi_row, mi_col); - } - } -#else - if (cm->lf.combine_vert_horz_lf) { - // filter all vertical and horizontal edges in every 128x128 super block - for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) { - for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) { - // filter vertical edges - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row, - mi_col, plane, plane + 1); - av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row, - mi_col); - // filter horizontal edges - if (mi_col - MAX_MIB_SIZE >= 0) { - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, - mi_row, mi_col - MAX_MIB_SIZE, plane, - plane + 1); - av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row, - mi_col - MAX_MIB_SIZE); - } - } - // filter horizontal edges - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row, - mi_col - MAX_MIB_SIZE, plane, plane + 1); - av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row, - mi_col - MAX_MIB_SIZE); - } - } else { - // filter all vertical edges in every 128x128 super block - for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) { - for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) { - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row, - mi_col, plane, plane + 1); - av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row, - mi_col); - } - } - - // filter all horizontal edges in every 128x128 super block - for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) { - for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) { - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row, - mi_col, plane, plane + 1); - av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row, - mi_col); - } - } - } -#endif // LOOP_FILTER_BITMASK - } -} - -void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, - MACROBLOCKD *xd, -#if LOOP_FILTER_BITMASK - int is_decoding, -#endif - int plane_start, int plane_end, int partial_frame) { - int start_mi_row, end_mi_row, mi_rows_to_filter; - - start_mi_row = 0; - mi_rows_to_filter = cm->mi_rows; - if (partial_frame && cm->mi_rows > 8) { - start_mi_row = cm->mi_rows >> 1; - start_mi_row &= 0xfffffff8; - mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8); - } - end_mi_row = start_mi_row + mi_rows_to_filter; - av1_loop_filter_frame_init(cm, plane_start, plane_end); - loop_filter_rows(frame, cm, xd, start_mi_row, end_mi_row, -#if LOOP_FILTER_BITMASK - is_decoding, -#endif - plane_start, plane_end); -} diff --git a/third_party/aom/av1/common/av1_loopfilter.h b/third_party/aom/av1/common/av1_loopfilter.h deleted file mode 100644 index 80ac61178..000000000 --- a/third_party/aom/av1/common/av1_loopfilter.h +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_AV1_LOOPFILTER_H_ -#define AOM_AV1_COMMON_AV1_LOOPFILTER_H_ - -#include "config/aom_config.h" - -#include "aom_ports/mem.h" -#include "av1/common/blockd.h" -#include "av1/common/seg_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_LOOP_FILTER 63 -#define MAX_SHARPNESS 7 - -#define SIMD_WIDTH 16 - -enum lf_path { - LF_PATH_420, - LF_PATH_444, - LF_PATH_SLOW, -}; - -#if LOOP_FILTER_BITMASK -typedef struct { - uint64_t bits[4]; -} FilterMask; - -// This structure holds bit masks for all 4x4 blocks in a 64x64 region. -// Each 1 bit represents a position in which we want to apply the loop filter. -// For Y plane, 4x4 in 64x64 requires 16x16 = 256 bit, therefore we use 4 -// uint64_t; For U, V plane, for 420 format, plane size is 32x32, thus we use -// a uint64_t to represent bitmask. -// Left_ entries refer to whether we apply a filter on the border to the -// left of the block. Above_ entries refer to whether or not to apply a -// filter on the above border. -// Since each transform is accompanied by a potentially different type of -// loop filter there is a different entry in the array for each transform size. -typedef struct { - FilterMask left_y[TX_SIZES]; - FilterMask above_y[TX_SIZES]; - FilterMask left_u[TX_SIZES]; - FilterMask above_u[TX_SIZES]; - FilterMask left_v[TX_SIZES]; - FilterMask above_v[TX_SIZES]; - - // Y plane vertical edge and horizontal edge filter level - uint8_t lfl_y_hor[MI_SIZE_64X64][MI_SIZE_64X64]; - uint8_t lfl_y_ver[MI_SIZE_64X64][MI_SIZE_64X64]; - - // U plane filter level - uint8_t lfl_u[MI_SIZE_64X64][MI_SIZE_64X64]; - - // V plane filter level - uint8_t lfl_v[MI_SIZE_64X64][MI_SIZE_64X64]; - - // other info - FilterMask skip; - FilterMask is_vert_border; - FilterMask is_horz_border; - // Y or UV planes, 5 tx sizes: 4x4, 8x8, 16x16, 32x32, 64x64 - FilterMask tx_size_ver[2][5]; - FilterMask tx_size_hor[2][5]; -} LoopFilterMask; -#endif // LOOP_FILTER_BITMASK - -struct loopfilter { - int filter_level[2]; - int filter_level_u; - int filter_level_v; - - int sharpness_level; - - uint8_t mode_ref_delta_enabled; - uint8_t mode_ref_delta_update; - - // 0 = Intra, Last, Last2+Last3, - // GF, BRF, ARF2, ARF - int8_t ref_deltas[REF_FRAMES]; - - // 0 = ZERO_MV, MV - int8_t mode_deltas[MAX_MODE_LF_DELTAS]; - - int combine_vert_horz_lf; - -#if LOOP_FILTER_BITMASK - LoopFilterMask *lfm; - size_t lfm_num; - int lfm_stride; -#endif // LOOP_FILTER_BITMASK -}; - -// Need to align this structure so when it is declared and -// passed it can be loaded into vector registers. -typedef struct { - DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, mblim[SIMD_WIDTH]); - DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, lim[SIMD_WIDTH]); - DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, hev_thr[SIMD_WIDTH]); -} loop_filter_thresh; - -typedef struct { - loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1]; - uint8_t lvl[MAX_MB_PLANE][MAX_SEGMENTS][2][REF_FRAMES][MAX_MODE_LF_DELTAS]; -} loop_filter_info_n; - -/* assorted loopfilter functions which get used elsewhere */ -struct AV1Common; -struct macroblockd; -struct AV1LfSyncData; - -void av1_loop_filter_init(struct AV1Common *cm); - -void av1_loop_filter_frame_init(struct AV1Common *cm, int plane_start, - int plane_end); - -#if LOOP_FILTER_BITMASK -void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm, - struct macroblockd *mbd, int is_decoding, - int plane_start, int plane_end, int partial_frame); -#else -void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm, - struct macroblockd *mbd, int plane_start, - int plane_end, int partial_frame); -#endif - -void av1_filter_block_plane_vert(const struct AV1Common *const cm, - const MACROBLOCKD *const xd, const int plane, - const MACROBLOCKD_PLANE *const plane_ptr, - const uint32_t mi_row, const uint32_t mi_col); - -void av1_filter_block_plane_horz(const struct AV1Common *const cm, - const MACROBLOCKD *const xd, const int plane, - const MACROBLOCKD_PLANE *const plane_ptr, - const uint32_t mi_row, const uint32_t mi_col); - -typedef struct LoopFilterWorkerData { - YV12_BUFFER_CONFIG *frame_buffer; - struct AV1Common *cm; - struct macroblockd_plane planes[MAX_MB_PLANE]; - // TODO(Ranjit): When the filter functions are modified to use xd->lossless - // add lossless as a member here. - MACROBLOCKD *xd; -} LFWorkerData; - -uint8_t get_filter_level(const struct AV1Common *cm, - const loop_filter_info_n *lfi_n, const int dir_idx, - int plane, const MB_MODE_INFO *mbmi); -#if LOOP_FILTER_BITMASK -void av1_setup_bitmask(struct AV1Common *const cm, int mi_row, int mi_col, - int plane, int subsampling_x, int subsampling_y, - int row_end, int col_end); - -void av1_filter_block_plane_ver(struct AV1Common *const cm, - struct macroblockd_plane *const plane_ptr, - int pl, int mi_row, int mi_col); - -void av1_filter_block_plane_hor(struct AV1Common *const cm, - struct macroblockd_plane *const plane, int pl, - int mi_row, int mi_col); -LoopFilterMask *get_loop_filter_mask(const struct AV1Common *const cm, - int mi_row, int mi_col); -int get_index_shift(int mi_col, int mi_row, int *index); - -static const FilterMask left_txform_mask[TX_SIZES] = { - { { 0x0000000000000001ULL, // TX_4X4, - 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } }, - - { { 0x0000000000010001ULL, // TX_8X8, - 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } }, - - { { 0x0001000100010001ULL, // TX_16X16, - 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } }, - - { { 0x0001000100010001ULL, // TX_32X32, - 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } }, - - { { 0x0001000100010001ULL, // TX_64X64, - 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL } }, -}; - -static const uint64_t above_txform_mask[2][TX_SIZES] = { - { - 0x0000000000000001ULL, // TX_4X4 - 0x0000000000000003ULL, // TX_8X8 - 0x000000000000000fULL, // TX_16X16 - 0x00000000000000ffULL, // TX_32X32 - 0x000000000000ffffULL, // TX_64X64 - }, - { - 0x0000000000000001ULL, // TX_4X4 - 0x0000000000000005ULL, // TX_8X8 - 0x0000000000000055ULL, // TX_16X16 - 0x0000000000005555ULL, // TX_32X32 - 0x0000000055555555ULL, // TX_64X64 - }, -}; - -extern const int mask_id_table_tx_4x4[BLOCK_SIZES_ALL]; - -extern const int mask_id_table_tx_8x8[BLOCK_SIZES_ALL]; - -extern const int mask_id_table_tx_16x16[BLOCK_SIZES_ALL]; - -extern const int mask_id_table_tx_32x32[BLOCK_SIZES_ALL]; - -extern const FilterMask left_mask_univariant_reordered[67]; - -extern const FilterMask above_mask_univariant_reordered[67]; -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_AV1_LOOPFILTER_H_ diff --git a/third_party/aom/av1/common/av1_rtcd.c b/third_party/aom/av1/common/av1_rtcd.c deleted file mode 100644 index a77a4d254..000000000 --- a/third_party/aom/av1/common/av1_rtcd.c +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#include "config/aom_config.h" - -#define RTCD_C -#include "config/av1_rtcd.h" - -#include "aom_ports/aom_once.h" - -void av1_rtcd() { - // TODO(JBB): Remove this aom_once, by insuring that both the encoder and - // decoder setup functions are protected by aom_once(); - aom_once(setup_rtcd_internal); -} diff --git a/third_party/aom/av1/common/av1_rtcd_defs.pl b/third_party/aom/av1/common/av1_rtcd_defs.pl deleted file mode 100755 index dee1f1c79..000000000 --- a/third_party/aom/av1/common/av1_rtcd_defs.pl +++ /dev/null @@ -1,398 +0,0 @@ -## -## Copyright (c) 2017, Alliance for Open Media. All rights reserved -## -## This source code is subject to the terms of the BSD 2 Clause License and -## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License -## was not distributed with this source code in the LICENSE file, you can -## obtain it at www.aomedia.org/license/software. If the Alliance for Open -## Media Patent License 1.0 was not distributed with this source code in the -## PATENTS file, you can obtain it at www.aomedia.org/license/patent. -## -sub av1_common_forward_decls() { -print <<EOF -/* - * AV1 - */ - -#include "aom/aom_integer.h" -#include "aom_dsp/txfm_common.h" -#include "av1/common/common.h" -#include "av1/common/enums.h" -#include "av1/common/quant_common.h" -#include "av1/common/filter.h" -#include "av1/common/convolve.h" -#include "av1/common/av1_txfm.h" -#include "av1/common/odintrin.h" -#include "av1/common/restoration.h" - -struct macroblockd; - -/* Encoder forward decls */ -struct macroblock; -struct txfm_param; -struct aom_variance_vtable; -struct search_site_config; -struct yv12_buffer_config; - -/* Function pointers return by CfL functions */ -typedef void (*cfl_subsample_lbd_fn)(const uint8_t *input, int input_stride, - uint16_t *output_q3); - -typedef void (*cfl_subsample_hbd_fn)(const uint16_t *input, int input_stride, - uint16_t *output_q3); - -typedef void (*cfl_subtract_average_fn)(const uint16_t *src, int16_t *dst); - -typedef void (*cfl_predict_lbd_fn)(const int16_t *src, uint8_t *dst, - int dst_stride, int alpha_q3); - -typedef void (*cfl_predict_hbd_fn)(const int16_t *src, uint16_t *dst, - int dst_stride, int alpha_q3, int bd); -EOF -} -forward_decls qw/av1_common_forward_decls/; - -# functions that are 64 bit only. -$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = ''; -if ($opts{arch} eq "x86_64") { - $mmx_x86_64 = 'mmx'; - $sse2_x86_64 = 'sse2'; - $ssse3_x86_64 = 'ssse3'; - $avx_x86_64 = 'avx'; - $avx2_x86_64 = 'avx2'; -} - -add_proto qw/void av1_convolve_horiz_rs/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn"; -specialize qw/av1_convolve_horiz_rs sse4_1/; - -add_proto qw/void av1_highbd_convolve_horiz_rs/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd"; -specialize qw/av1_highbd_convolve_horiz_rs sse4_1/; - -add_proto qw/void av1_wiener_convolve_add_src/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params"; - -add_proto qw/void av1_highbd_wiener_convolve_add_src/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps"; - -specialize qw/av1_wiener_convolve_add_src sse2 avx2 neon/; -specialize qw/av1_highbd_wiener_convolve_add_src ssse3/; -specialize qw/av1_highbd_wiener_convolve_add_src avx2/; - - -# directional intra predictor functions -add_proto qw/void av1_dr_prediction_z1/, "uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy"; -add_proto qw/void av1_dr_prediction_z2/, "uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int upsample_left, int dx, int dy"; -add_proto qw/void av1_dr_prediction_z3/, "uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_left, int dx, int dy"; - -# FILTER_INTRA predictor functions -add_proto qw/void av1_filter_intra_predictor/, "uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode"; -specialize qw/av1_filter_intra_predictor sse4_1/; - -# High bitdepth functions - -# -# Sub Pixel Filters -# -add_proto qw/void av1_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps"; - -add_proto qw/void av1_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps"; - -add_proto qw/void av1_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps"; -specialize qw/av1_highbd_convolve8/, "$sse2_x86_64"; - -add_proto qw/void av1_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps"; -specialize qw/av1_highbd_convolve8_horiz/, "$sse2_x86_64"; - -add_proto qw/void av1_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps"; -specialize qw/av1_highbd_convolve8_vert/, "$sse2_x86_64"; - -#inv txfm -add_proto qw/void av1_inv_txfm_add/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param"; -specialize qw/av1_inv_txfm_add ssse3 avx2 neon/; - -add_proto qw/void av1_highbd_inv_txfm_add/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param"; -specialize qw/av1_highbd_inv_txfm_add sse4_1 avx2/; - -add_proto qw/void av1_highbd_inv_txfm_add_4x4/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param"; -specialize qw/av1_highbd_inv_txfm_add_4x4 sse4_1/; -add_proto qw/void av1_highbd_inv_txfm_add_8x8/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param"; -specialize qw/av1_highbd_inv_txfm_add_8x8 sse4_1/; -add_proto qw/void av1_highbd_inv_txfm_add_16x8/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param"; -specialize qw/av1_highbd_inv_txfm_add_16x8 sse4_1/; -add_proto qw/void av1_highbd_inv_txfm_add_8x16/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param"; -specialize qw/av1_highbd_inv_txfm_add_8x16 sse4_1/; -add_proto qw/void av1_highbd_inv_txfm_add_16x16/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param"; -specialize qw/av1_highbd_inv_txfm_add_16x16 sse4_1/; -add_proto qw/void av1_highbd_inv_txfm_add_32x32/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param"; -specialize qw/av1_highbd_inv_txfm_add_32x32 sse4_1 avx2/; - -add_proto qw/void av1_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; -add_proto qw/void av1_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd"; - -add_proto qw/void av1_inv_txfm2d_add_4x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; -add_proto qw/void av1_inv_txfm2d_add_8x4/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; -add_proto qw/void av1_inv_txfm2d_add_8x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; -add_proto qw/void av1_inv_txfm2d_add_16x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; -add_proto qw/void av1_inv_txfm2d_add_16x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; -add_proto qw/void av1_inv_txfm2d_add_32x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; -add_proto qw/void av1_inv_txfm2d_add_4x4/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; -specialize qw/av1_inv_txfm2d_add_4x4 sse4_1/; -add_proto qw/void av1_inv_txfm2d_add_8x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; -specialize qw/av1_inv_txfm2d_add_8x8 sse4_1/; -add_proto qw/void av1_inv_txfm2d_add_16x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; -add_proto qw/void av1_inv_txfm2d_add_32x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; - -add_proto qw/void av1_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; -add_proto qw/void av1_inv_txfm2d_add_32x64/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; -add_proto qw/void av1_inv_txfm2d_add_64x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; -add_proto qw/void av1_inv_txfm2d_add_16x64/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; -add_proto qw/void av1_inv_txfm2d_add_64x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; - -add_proto qw/void av1_inv_txfm2d_add_4x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; -add_proto qw/void av1_inv_txfm2d_add_16x4/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; -add_proto qw/void av1_inv_txfm2d_add_8x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; -add_proto qw/void av1_inv_txfm2d_add_32x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd"; - -# directional intra predictor functions -add_proto qw/void av1_highbd_dr_prediction_z1/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd"; -add_proto qw/void av1_highbd_dr_prediction_z2/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int upsample_left, int dx, int dy, int bd"; -add_proto qw/void av1_highbd_dr_prediction_z3/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_left, int dx, int dy, int bd"; - -# build compound seg mask functions -add_proto qw/void av1_build_compound_diffwtd_mask/, "uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w"; -specialize qw/av1_build_compound_diffwtd_mask sse4_1 avx2/; - -add_proto qw/void av1_build_compound_diffwtd_mask_highbd/, "uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd"; -specialize qw/av1_build_compound_diffwtd_mask_highbd ssse3 avx2/; - -add_proto qw/void av1_build_compound_diffwtd_mask_d16/, "uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd"; -specialize qw/av1_build_compound_diffwtd_mask_d16 sse4_1 avx2 neon/; - -# -# Encoder functions below this point. -# -if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { - - # ENCODEMB INVOKE - - # the transform coefficients are held in 32-bit - # values, so the assembler code for av1_block_error can no longer be used. - add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; - specialize qw/av1_block_error avx2/; - - add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/av1_quantize_fp sse2 avx2/; - - add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/av1_quantize_fp_32x32 avx2/; - - add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/av1_quantize_fp_64x64 avx2/; - - # fdct functions - - add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; - - #fwd txfm - add_proto qw/void av1_lowbd_fwd_txfm/, "const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TxfmParam *txfm_param"; - specialize qw/av1_lowbd_fwd_txfm sse2 sse4_1 avx2/; - - add_proto qw/void av1_fwd_txfm2d_4x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - add_proto qw/void av1_fwd_txfm2d_8x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - add_proto qw/void av1_fwd_txfm2d_8x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - specialize qw/av1_fwd_txfm2d_8x16 sse4_1/; - add_proto qw/void av1_fwd_txfm2d_16x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - specialize qw/av1_fwd_txfm2d_16x8 sse4_1/; - add_proto qw/void av1_fwd_txfm2d_16x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - add_proto qw/void av1_fwd_txfm2d_32x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - add_proto qw/void av1_fwd_txfm2d_4x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - add_proto qw/void av1_fwd_txfm2d_16x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - add_proto qw/void av1_fwd_txfm2d_8x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - add_proto qw/void av1_fwd_txfm2d_32x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - add_proto qw/void av1_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - specialize qw/av1_fwd_txfm2d_4x4 sse4_1/; - add_proto qw/void av1_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - specialize qw/av1_fwd_txfm2d_8x8 sse4_1/; - add_proto qw/void av1_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - specialize qw/av1_fwd_txfm2d_16x16 sse4_1/; - add_proto qw/void av1_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - specialize qw/av1_fwd_txfm2d_32x32 sse4_1/; - - add_proto qw/void av1_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - specialize qw/av1_fwd_txfm2d_64x64 sse4_1/; - add_proto qw/void av1_fwd_txfm2d_32x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - add_proto qw/void av1_fwd_txfm2d_64x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - add_proto qw/void av1_fwd_txfm2d_16x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - add_proto qw/void av1_fwd_txfm2d_64x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd"; - - # - # Motion search - # - add_proto qw/int av1_diamond_search_sad/, "struct macroblock *x, const struct search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const MV *center_mv"; - - add_proto qw/int av1_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const MV *center_mv"; - - add_proto qw/void av1_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; - specialize qw/av1_temporal_filter_apply sse2 msa/; - - add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; - - # ENCODEMB INVOKE - - add_proto qw/int64_t av1_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd"; - specialize qw/av1_highbd_block_error sse2/; - - add_proto qw/void av1_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; - - add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale"; - specialize qw/av1_highbd_quantize_fp sse4_1 avx2/; - - add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; - - # End av1_high encoder functions - - # txb - add_proto qw/void av1_get_nz_map_contexts/, "const uint8_t *const levels, const int16_t *const scan, const uint16_t eob, const TX_SIZE tx_size, const TX_CLASS tx_class, int8_t *const coeff_contexts"; - specialize qw/av1_get_nz_map_contexts sse2/; - add_proto qw/void av1_txb_init_levels/, "const tran_low_t *const coeff, const int width, const int height, uint8_t *const levels"; - specialize qw/av1_txb_init_levels sse4_1 avx2/; - - add_proto qw/uint64_t av1_wedge_sse_from_residuals/, "const int16_t *r1, const int16_t *d, const uint8_t *m, int N"; - specialize qw/av1_wedge_sse_from_residuals sse2 avx2/; - add_proto qw/int av1_wedge_sign_from_residuals/, "const int16_t *ds, const uint8_t *m, int N, int64_t limit"; - specialize qw/av1_wedge_sign_from_residuals sse2 avx2/; - add_proto qw/void av1_wedge_compute_delta_squares/, "int16_t *d, const int16_t *a, const int16_t *b, int N"; - specialize qw/av1_wedge_compute_delta_squares sse2 avx2/; - - # hash - add_proto qw/uint32_t av1_get_crc32c_value/, "void *crc_calculator, uint8_t *p, int length"; - specialize qw/av1_get_crc32c_value sse4_2/; - - add_proto qw/void av1_compute_stats/, "int wiener_win, const uint8_t *dgd8, const uint8_t *src8, int h_start, int h_end, int v_start, int v_end, int dgd_stride, int src_stride, double *M, double *H"; - specialize qw/av1_compute_stats sse4_1 avx2/; - - add_proto qw/int64_t av1_lowbd_pixel_proj_error/, " const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params"; - specialize qw/av1_lowbd_pixel_proj_error sse4_1 avx2/; -} -# end encoder functions - -# Deringing Functions - -add_proto qw/int cdef_find_dir/, "const uint16_t *img, int stride, int32_t *var, int coeff_shift"; -add_proto qw/void cdef_filter_block/, "uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift"; - -add_proto qw/void copy_rect8_8bit_to_16bit/, "uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h"; -add_proto qw/void copy_rect8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h"; - -# VS compiling for 32 bit targets does not support vector types in -# structs as arguments, which makes the v256 type of the intrinsics -# hard to support, so optimizations for this target are disabled. -if ($opts{config} !~ /libs-x86-win32-vs.*/) { - specialize qw/cdef_find_dir sse2 ssse3 sse4_1 avx2 neon/; - specialize qw/cdef_filter_block sse2 ssse3 sse4_1 avx2 neon/; - specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/; - specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/; -} - -# WARPED_MOTION / GLOBAL_MOTION functions - -add_proto qw/void av1_warp_affine/, "const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta"; -specialize qw/av1_warp_affine sse4_1 neon/; - -add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta"; -specialize qw/av1_highbd_warp_affine sse4_1/; - -if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { - add_proto qw/double compute_cross_correlation/, "unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2"; - specialize qw/compute_cross_correlation sse4_1/; -} - -# LOOP_RESTORATION functions - -add_proto qw/void apply_selfguided_restoration/, "const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd"; -specialize qw/apply_selfguided_restoration sse4_1 avx2 neon/; - -add_proto qw/int av1_selfguided_restoration/, "const uint8_t *dgd8, int width, int height, - int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride, - int sgr_params_idx, int bit_depth, int highbd"; -specialize qw/av1_selfguided_restoration sse4_1 avx2 neon/; - -# CONVOLVE_ROUND/COMPOUND_ROUND functions - -add_proto qw/void av1_convolve_2d_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params"; -add_proto qw/void av1_convolve_2d_copy_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params"; -add_proto qw/void av1_convolve_x_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params"; -add_proto qw/void av1_convolve_y_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params"; -add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params"; -add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params"; -add_proto qw/void av1_jnt_convolve_x/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params"; -add_proto qw/void av1_jnt_convolve_y/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params"; -add_proto qw/void av1_highbd_convolve_2d_copy_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd"; -add_proto qw/void av1_highbd_convolve_2d_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd"; -add_proto qw/void av1_highbd_convolve_x_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd"; -add_proto qw/void av1_highbd_convolve_y_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd"; -add_proto qw/void av1_highbd_jnt_convolve_2d/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd"; -add_proto qw/void av1_highbd_jnt_convolve_x/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd"; -add_proto qw/void av1_highbd_jnt_convolve_y/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd"; -add_proto qw/void av1_highbd_jnt_convolve_2d_copy/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd"; - - add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params"; - add_proto qw/void av1_highbd_convolve_2d_scale/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd"; - - specialize qw/av1_convolve_2d_sr sse2 avx2 neon/; - specialize qw/av1_convolve_2d_copy_sr sse2 avx2 neon/; - specialize qw/av1_convolve_x_sr sse2 avx2 neon/; - specialize qw/av1_convolve_y_sr sse2 avx2 neon/; - specialize qw/av1_convolve_2d_scale sse4_1/; - specialize qw/av1_jnt_convolve_2d ssse3 avx2 neon/; - specialize qw/av1_jnt_convolve_2d_copy sse2 avx2 neon/; - specialize qw/av1_jnt_convolve_x sse2 avx2 neon/; - specialize qw/av1_jnt_convolve_y sse2 avx2 neon/; - specialize qw/av1_highbd_convolve_2d_copy_sr sse2 avx2/; - specialize qw/av1_highbd_convolve_2d_sr ssse3 avx2/; - specialize qw/av1_highbd_convolve_x_sr ssse3 avx2/; - specialize qw/av1_highbd_convolve_y_sr ssse3 avx2/; - specialize qw/av1_highbd_convolve_2d_scale sse4_1/; - specialize qw/av1_highbd_jnt_convolve_2d sse4_1 avx2/; - specialize qw/av1_highbd_jnt_convolve_x sse4_1 avx2/; - specialize qw/av1_highbd_jnt_convolve_y sse4_1 avx2/; - specialize qw/av1_highbd_jnt_convolve_2d_copy sse4_1 avx2/; - -# INTRA_EDGE functions -add_proto qw/void av1_filter_intra_edge/, "uint8_t *p, int sz, int strength"; -specialize qw/av1_filter_intra_edge sse4_1/; -add_proto qw/void av1_upsample_intra_edge/, "uint8_t *p, int sz"; -specialize qw/av1_upsample_intra_edge sse4_1/; - -add_proto qw/void av1_filter_intra_edge_high/, "uint16_t *p, int sz, int strength"; -specialize qw/av1_filter_intra_edge_high sse4_1/; -add_proto qw/void av1_upsample_intra_edge_high/, "uint16_t *p, int sz, int bd"; -specialize qw/av1_upsample_intra_edge_high sse4_1/; - -# CFL -add_proto qw/cfl_subtract_average_fn get_subtract_average_fn/, "TX_SIZE tx_size"; -specialize qw/get_subtract_average_fn sse2 avx2 neon vsx/; - -add_proto qw/cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd/, "TX_SIZE tx_size"; -specialize qw/cfl_get_luma_subsampling_420_lbd ssse3 avx2 neon/; - -add_proto qw/cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd/, "TX_SIZE tx_size"; -specialize qw/cfl_get_luma_subsampling_422_lbd ssse3 avx2 neon/; - -add_proto qw/cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd/, "TX_SIZE tx_size"; -specialize qw/cfl_get_luma_subsampling_444_lbd ssse3 avx2 neon/; - -add_proto qw/cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd/, "TX_SIZE tx_size"; -specialize qw/cfl_get_luma_subsampling_420_hbd ssse3 avx2 neon/; - -add_proto qw/cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd/, "TX_SIZE tx_size"; -specialize qw/cfl_get_luma_subsampling_422_hbd ssse3 avx2 neon/; - -add_proto qw/cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd/, "TX_SIZE tx_size"; -specialize qw/cfl_get_luma_subsampling_444_hbd ssse3 avx2 neon/; - -add_proto qw/cfl_predict_lbd_fn get_predict_lbd_fn/, "TX_SIZE tx_size"; -specialize qw/get_predict_lbd_fn ssse3 avx2 neon/; - -add_proto qw/cfl_predict_hbd_fn get_predict_hbd_fn/, "TX_SIZE tx_size"; -specialize qw/get_predict_hbd_fn ssse3 avx2 neon/; - -1; diff --git a/third_party/aom/av1/common/av1_txfm.c b/third_party/aom/av1/common/av1_txfm.c deleted file mode 100644 index bb70eab70..000000000 --- a/third_party/aom/av1/common/av1_txfm.c +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "config/aom_dsp_rtcd.h" - -#include "av1/common/av1_txfm.h" - -// av1_cospi_arr[i][j] = (int)round(cos(M_PI*j/128) * (1<<(cos_bit_min+i))); -const int32_t av1_cospi_arr_data[7][64] = { - { 1024, 1024, 1023, 1021, 1019, 1016, 1013, 1009, 1004, 999, 993, 987, 980, - 972, 964, 955, 946, 936, 926, 915, 903, 891, 878, 865, 851, 837, - 822, 807, 792, 775, 759, 742, 724, 706, 688, 669, 650, 630, 610, - 590, 569, 548, 526, 505, 483, 460, 438, 415, 392, 369, 345, 321, - 297, 273, 249, 224, 200, 175, 150, 125, 100, 75, 50, 25 }, - { 2048, 2047, 2046, 2042, 2038, 2033, 2026, 2018, 2009, 1998, 1987, - 1974, 1960, 1945, 1928, 1911, 1892, 1872, 1851, 1829, 1806, 1782, - 1757, 1730, 1703, 1674, 1645, 1615, 1583, 1551, 1517, 1483, 1448, - 1412, 1375, 1338, 1299, 1260, 1220, 1179, 1138, 1096, 1053, 1009, - 965, 921, 876, 830, 784, 737, 690, 642, 595, 546, 498, - 449, 400, 350, 301, 251, 201, 151, 100, 50 }, - { 4096, 4095, 4091, 4085, 4076, 4065, 4052, 4036, 4017, 3996, 3973, - 3948, 3920, 3889, 3857, 3822, 3784, 3745, 3703, 3659, 3612, 3564, - 3513, 3461, 3406, 3349, 3290, 3229, 3166, 3102, 3035, 2967, 2896, - 2824, 2751, 2675, 2598, 2520, 2440, 2359, 2276, 2191, 2106, 2019, - 1931, 1842, 1751, 1660, 1567, 1474, 1380, 1285, 1189, 1092, 995, - 897, 799, 700, 601, 501, 401, 301, 201, 101 }, - { 8192, 8190, 8182, 8170, 8153, 8130, 8103, 8071, 8035, 7993, 7946, - 7895, 7839, 7779, 7713, 7643, 7568, 7489, 7405, 7317, 7225, 7128, - 7027, 6921, 6811, 6698, 6580, 6458, 6333, 6203, 6070, 5933, 5793, - 5649, 5501, 5351, 5197, 5040, 4880, 4717, 4551, 4383, 4212, 4038, - 3862, 3683, 3503, 3320, 3135, 2948, 2760, 2570, 2378, 2185, 1990, - 1795, 1598, 1401, 1202, 1003, 803, 603, 402, 201 }, - { 16384, 16379, 16364, 16340, 16305, 16261, 16207, 16143, 16069, 15986, 15893, - 15791, 15679, 15557, 15426, 15286, 15137, 14978, 14811, 14635, 14449, 14256, - 14053, 13842, 13623, 13395, 13160, 12916, 12665, 12406, 12140, 11866, 11585, - 11297, 11003, 10702, 10394, 10080, 9760, 9434, 9102, 8765, 8423, 8076, - 7723, 7366, 7005, 6639, 6270, 5897, 5520, 5139, 4756, 4370, 3981, - 3590, 3196, 2801, 2404, 2006, 1606, 1205, 804, 402 }, - { 32768, 32758, 32729, 32679, 32610, 32522, 32413, 32286, 32138, 31972, 31786, - 31581, 31357, 31114, 30853, 30572, 30274, 29957, 29622, 29269, 28899, 28511, - 28106, 27684, 27246, 26791, 26320, 25833, 25330, 24812, 24279, 23732, 23170, - 22595, 22006, 21403, 20788, 20160, 19520, 18868, 18205, 17531, 16846, 16151, - 15447, 14733, 14010, 13279, 12540, 11793, 11039, 10279, 9512, 8740, 7962, - 7180, 6393, 5602, 4808, 4011, 3212, 2411, 1608, 804 }, - { 65536, 65516, 65457, 65358, 65220, 65043, 64827, 64571, 64277, 63944, 63572, - 63162, 62714, 62228, 61705, 61145, 60547, 59914, 59244, 58538, 57798, 57022, - 56212, 55368, 54491, 53581, 52639, 51665, 50660, 49624, 48559, 47464, 46341, - 45190, 44011, 42806, 41576, 40320, 39040, 37736, 36410, 35062, 33692, 32303, - 30893, 29466, 28020, 26558, 25080, 23586, 22078, 20557, 19024, 17479, 15924, - 14359, 12785, 11204, 9616, 8022, 6424, 4821, 3216, 1608 } -}; - -// av1_sinpi_arr_data[i][j] = (int)round((sqrt(2) * sin(j*Pi/9) * 2 / 3) * (1 -// << (cos_bit_min + i))) modified so that elements j=1,2 sum to element j=4. -const int32_t av1_sinpi_arr_data[7][5] = { - { 0, 330, 621, 836, 951 }, { 0, 660, 1241, 1672, 1901 }, - { 0, 1321, 2482, 3344, 3803 }, { 0, 2642, 4964, 6689, 7606 }, - { 0, 5283, 9929, 13377, 15212 }, { 0, 10566, 19858, 26755, 30424 }, - { 0, 21133, 39716, 53510, 60849 } -}; - -void av1_round_shift_array_c(int32_t *arr, int size, int bit) { - int i; - if (bit == 0) { - return; - } else { - if (bit > 0) { - for (i = 0; i < size; i++) { - arr[i] = round_shift(arr[i], bit); - } - } else { - for (i = 0; i < size; i++) { - arr[i] = (int32_t)clamp64(((int64_t)1 << (-bit)) * arr[i], INT32_MIN, - INT32_MAX); - } - } - } -} - -const TXFM_TYPE av1_txfm_type_ls[5][TX_TYPES_1D] = { - { TXFM_TYPE_DCT4, TXFM_TYPE_ADST4, TXFM_TYPE_ADST4, TXFM_TYPE_IDENTITY4 }, - { TXFM_TYPE_DCT8, TXFM_TYPE_ADST8, TXFM_TYPE_ADST8, TXFM_TYPE_IDENTITY8 }, - { TXFM_TYPE_DCT16, TXFM_TYPE_ADST16, TXFM_TYPE_ADST16, TXFM_TYPE_IDENTITY16 }, - { TXFM_TYPE_DCT32, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID, - TXFM_TYPE_IDENTITY32 }, - { TXFM_TYPE_DCT64, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID } -}; - -const int8_t av1_txfm_stage_num_list[TXFM_TYPES] = { - 4, // TXFM_TYPE_DCT4 - 6, // TXFM_TYPE_DCT8 - 8, // TXFM_TYPE_DCT16 - 10, // TXFM_TYPE_DCT32 - 12, // TXFM_TYPE_DCT64 - 7, // TXFM_TYPE_ADST4 - 8, // TXFM_TYPE_ADST8 - 10, // TXFM_TYPE_ADST16 - 1, // TXFM_TYPE_IDENTITY4 - 1, // TXFM_TYPE_IDENTITY8 - 1, // TXFM_TYPE_IDENTITY16 - 1, // TXFM_TYPE_IDENTITY32 -}; - -void av1_range_check_buf(int32_t stage, const int32_t *input, - const int32_t *buf, int32_t size, int8_t bit) { -#if CONFIG_COEFFICIENT_RANGE_CHECKING - const int64_t max_value = (1LL << (bit - 1)) - 1; - const int64_t min_value = -(1LL << (bit - 1)); - - int in_range = 1; - - for (int i = 0; i < size; ++i) { - if (buf[i] < min_value || buf[i] > max_value) { - in_range = 0; - } - } - - if (!in_range) { - fprintf(stderr, "Error: coeffs contain out-of-range values\n"); - fprintf(stderr, "size: %d\n", size); - fprintf(stderr, "stage: %d\n", stage); - fprintf(stderr, "allowed range: [%" PRId64 ";%" PRId64 "]\n", min_value, - max_value); - - fprintf(stderr, "coeffs: "); - - fprintf(stderr, "["); - for (int j = 0; j < size; j++) { - if (j > 0) fprintf(stderr, ", "); - fprintf(stderr, "%d", input[j]); - } - fprintf(stderr, "]\n"); - - fprintf(stderr, " buf: "); - - fprintf(stderr, "["); - for (int j = 0; j < size; j++) { - if (j > 0) fprintf(stderr, ", "); - fprintf(stderr, "%d", buf[j]); - } - fprintf(stderr, "]\n\n"); - } - - assert(in_range); -#else - (void)stage; - (void)input; - (void)buf; - (void)size; - (void)bit; -#endif -} diff --git a/third_party/aom/av1/common/av1_txfm.h b/third_party/aom/av1/common/av1_txfm.h deleted file mode 100644 index 59d64ca4a..000000000 --- a/third_party/aom/av1/common/av1_txfm.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_AV1_TXFM_H_ -#define AOM_AV1_COMMON_AV1_TXFM_H_ - -#include <assert.h> -#include <math.h> -#include <stdio.h> - -#include "config/aom_config.h" - -#include "av1/common/enums.h" -#include "av1/common/blockd.h" -#include "aom/aom_integer.h" -#include "aom_dsp/aom_dsp_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#if !defined(DO_RANGE_CHECK_CLAMP) -#define DO_RANGE_CHECK_CLAMP 0 -#endif - -extern const int32_t av1_cospi_arr_data[7][64]; -extern const int32_t av1_sinpi_arr_data[7][5]; - -#define MAX_TXFM_STAGE_NUM 12 - -static const int cos_bit_min = 10; -static const int cos_bit_max = 16; - -#define NewSqrt2Bits ((int32_t)12) -// 2^12 * sqrt(2) -static const int32_t NewSqrt2 = 5793; -// 2^12 / sqrt(2) -static const int32_t NewInvSqrt2 = 2896; - -static INLINE const int32_t *cospi_arr(int n) { - return av1_cospi_arr_data[n - cos_bit_min]; -} - -static INLINE const int32_t *sinpi_arr(int n) { - return av1_sinpi_arr_data[n - cos_bit_min]; -} - -static INLINE int32_t range_check_value(int32_t value, int8_t bit) { -#if CONFIG_COEFFICIENT_RANGE_CHECKING - const int64_t max_value = (1LL << (bit - 1)) - 1; - const int64_t min_value = -(1LL << (bit - 1)); - if (value < min_value || value > max_value) { - fprintf(stderr, "coeff out of bit range, value: %d bit %d\n", value, bit); - assert(0); - } -#endif // CONFIG_COEFFICIENT_RANGE_CHECKING -#if DO_RANGE_CHECK_CLAMP - bit = AOMMIN(bit, 31); - return clamp(value, -(1 << (bit - 1)), (1 << (bit - 1)) - 1); -#endif // DO_RANGE_CHECK_CLAMP - (void)bit; - return value; -} - -static INLINE int32_t round_shift(int64_t value, int bit) { - assert(bit >= 1); - return (int32_t)((value + (1ll << (bit - 1))) >> bit); -} - -static INLINE int32_t half_btf(int32_t w0, int32_t in0, int32_t w1, int32_t in1, - int bit) { - int64_t result_64 = (int64_t)(w0 * in0) + (int64_t)(w1 * in1); - int64_t intermediate = result_64 + (1LL << (bit - 1)); - // NOTE(david.barker): The value 'result_64' may not necessarily fit - // into 32 bits. However, the result of this function is nominally - // ROUND_POWER_OF_TWO_64(result_64, bit) - // and that is required to fit into stage_range[stage] many bits - // (checked by range_check_buf()). - // - // Here we've unpacked that rounding operation, and it can be shown - // that the value of 'intermediate' here *does* fit into 32 bits - // for any conformant bitstream. - // The upshot is that, if you do all this calculation using - // wrapping 32-bit arithmetic instead of (non-wrapping) 64-bit arithmetic, - // then you'll still get the correct result. - // To provide a check on this logic, we assert that 'intermediate' - // would fit into an int32 if range checking is enabled. -#if CONFIG_COEFFICIENT_RANGE_CHECKING - assert(intermediate >= INT32_MIN && intermediate <= INT32_MAX); -#endif - return (int32_t)(intermediate >> bit); -} - -static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans, - int bd) { - return clip_pixel_highbd(dest + (int)trans, bd); -} - -typedef void (*TxfmFunc)(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); - -typedef void (*FwdTxfm2dFunc)(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd); - -typedef enum TXFM_TYPE { - TXFM_TYPE_DCT4, - TXFM_TYPE_DCT8, - TXFM_TYPE_DCT16, - TXFM_TYPE_DCT32, - TXFM_TYPE_DCT64, - TXFM_TYPE_ADST4, - TXFM_TYPE_ADST8, - TXFM_TYPE_ADST16, - TXFM_TYPE_IDENTITY4, - TXFM_TYPE_IDENTITY8, - TXFM_TYPE_IDENTITY16, - TXFM_TYPE_IDENTITY32, - TXFM_TYPES, - TXFM_TYPE_INVALID, -} TXFM_TYPE; - -typedef struct TXFM_2D_FLIP_CFG { - TX_SIZE tx_size; - int ud_flip; // flip upside down - int lr_flip; // flip left to right - const int8_t *shift; - int8_t cos_bit_col; - int8_t cos_bit_row; - int8_t stage_range_col[MAX_TXFM_STAGE_NUM]; - int8_t stage_range_row[MAX_TXFM_STAGE_NUM]; - TXFM_TYPE txfm_type_col; - TXFM_TYPE txfm_type_row; - int stage_num_col; - int stage_num_row; -} TXFM_2D_FLIP_CFG; - -static INLINE void get_flip_cfg(TX_TYPE tx_type, int *ud_flip, int *lr_flip) { - switch (tx_type) { - case DCT_DCT: - case ADST_DCT: - case DCT_ADST: - case ADST_ADST: - *ud_flip = 0; - *lr_flip = 0; - break; - case IDTX: - case V_DCT: - case H_DCT: - case V_ADST: - case H_ADST: - *ud_flip = 0; - *lr_flip = 0; - break; - case FLIPADST_DCT: - case FLIPADST_ADST: - case V_FLIPADST: - *ud_flip = 1; - *lr_flip = 0; - break; - case DCT_FLIPADST: - case ADST_FLIPADST: - case H_FLIPADST: - *ud_flip = 0; - *lr_flip = 1; - break; - case FLIPADST_FLIPADST: - *ud_flip = 1; - *lr_flip = 1; - break; - default: - *ud_flip = 0; - *lr_flip = 0; - assert(0); - } -} - -static INLINE void set_flip_cfg(TX_TYPE tx_type, TXFM_2D_FLIP_CFG *cfg) { - get_flip_cfg(tx_type, &cfg->ud_flip, &cfg->lr_flip); -} - -// Utility function that returns the log of the ratio of the col and row -// sizes. -static INLINE int get_rect_tx_log_ratio(int col, int row) { - if (col == row) return 0; - if (col > row) { - if (col == row * 2) return 1; - if (col == row * 4) return 2; - assert(0 && "Unsupported transform size"); - } else { - if (row == col * 2) return -1; - if (row == col * 4) return -2; - assert(0 && "Unsupported transform size"); - } - return 0; // Invalid -} - -void av1_gen_fwd_stage_range(int8_t *stage_range_col, int8_t *stage_range_row, - const TXFM_2D_FLIP_CFG *cfg, int bd); - -void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row, - const TXFM_2D_FLIP_CFG *cfg, TX_SIZE tx_size, - int bd); - -void av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size, - TXFM_2D_FLIP_CFG *cfg); -void av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size, - TXFM_2D_FLIP_CFG *cfg); -extern const TXFM_TYPE av1_txfm_type_ls[5][TX_TYPES_1D]; -extern const int8_t av1_txfm_stage_num_list[TXFM_TYPES]; -static INLINE int get_txw_idx(TX_SIZE tx_size) { - return tx_size_wide_log2[tx_size] - tx_size_wide_log2[0]; -} -static INLINE int get_txh_idx(TX_SIZE tx_size) { - return tx_size_high_log2[tx_size] - tx_size_high_log2[0]; -} - -void av1_range_check_buf(int32_t stage, const int32_t *input, - const int32_t *buf, int32_t size, int8_t bit); -#define MAX_TXWH_IDX 5 -#ifdef __cplusplus -} -#endif // __cplusplus - -#endif // AOM_AV1_COMMON_AV1_TXFM_H_ diff --git a/third_party/aom/av1/common/blockd.c b/third_party/aom/av1/common/blockd.c deleted file mode 100644 index 2e796b656..000000000 --- a/third_party/aom/av1/common/blockd.c +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <math.h> - -#include "aom_ports/system_state.h" - -#include "av1/common/blockd.h" -#include "av1/common/onyxc_int.h" - -PREDICTION_MODE av1_left_block_mode(const MB_MODE_INFO *left_mi) { - if (!left_mi) return DC_PRED; - assert(!is_inter_block(left_mi) || is_intrabc_block(left_mi)); - return left_mi->mode; -} - -PREDICTION_MODE av1_above_block_mode(const MB_MODE_INFO *above_mi) { - if (!above_mi) return DC_PRED; - assert(!is_inter_block(above_mi) || is_intrabc_block(above_mi)); - return above_mi->mode; -} - -void av1_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, - int plane, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, - int has_eob, int aoff, int loff) { - ENTROPY_CONTEXT *const a = pd->above_context + aoff; - ENTROPY_CONTEXT *const l = pd->left_context + loff; - const int txs_wide = tx_size_wide_unit[tx_size]; - const int txs_high = tx_size_high_unit[tx_size]; - - // above - if (has_eob && xd->mb_to_right_edge < 0) { - const int blocks_wide = max_block_wide(xd, plane_bsize, plane); - const int above_contexts = AOMMIN(txs_wide, blocks_wide - aoff); - memset(a, has_eob, sizeof(*a) * above_contexts); - memset(a + above_contexts, 0, sizeof(*a) * (txs_wide - above_contexts)); - } else { - memset(a, has_eob, sizeof(*a) * txs_wide); - } - - // left - if (has_eob && xd->mb_to_bottom_edge < 0) { - const int blocks_high = max_block_high(xd, plane_bsize, plane); - const int left_contexts = AOMMIN(txs_high, blocks_high - loff); - memset(l, has_eob, sizeof(*l) * left_contexts); - memset(l + left_contexts, 0, sizeof(*l) * (txs_high - left_contexts)); - } else { - memset(l, has_eob, sizeof(*l) * txs_high); - } -} -void av1_reset_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize, const int num_planes) { - int i; - int nplanes; - int chroma_ref; - chroma_ref = - is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x, - xd->plane[1].subsampling_y); - nplanes = 1 + (num_planes - 1) * chroma_ref; - for (i = 0; i < nplanes; i++) { - struct macroblockd_plane *const pd = &xd->plane[i]; - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); - const int txs_wide = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; - const int txs_high = block_size_high[plane_bsize] >> tx_size_high_log2[0]; - memset(pd->above_context, 0, sizeof(ENTROPY_CONTEXT) * txs_wide); - memset(pd->left_context, 0, sizeof(ENTROPY_CONTEXT) * txs_high); - } -} - -void av1_reset_loop_filter_delta(MACROBLOCKD *xd, int num_planes) { - xd->delta_lf_from_base = 0; - const int frame_lf_count = - num_planes > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2; - for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) xd->delta_lf[lf_id] = 0; -} - -void av1_reset_loop_restoration(MACROBLOCKD *xd, const int num_planes) { - for (int p = 0; p < num_planes; ++p) { - set_default_wiener(xd->wiener_info + p); - set_default_sgrproj(xd->sgrproj_info + p); - } -} - -void av1_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y, - const int num_planes) { - int i; - - for (i = 0; i < num_planes; i++) { - xd->plane[i].plane_type = get_plane_type(i); - xd->plane[i].subsampling_x = i ? ss_x : 0; - xd->plane[i].subsampling_y = i ? ss_y : 0; - } - for (i = num_planes; i < MAX_MB_PLANE; i++) { - xd->plane[i].subsampling_x = 1; - xd->plane[i].subsampling_y = 1; - } -} - -const int16_t dr_intra_derivative[90] = { - // More evenly spread out angles and limited to 10-bit - // Values that are 0 will never be used - // Approx angle - 0, 0, 0, // - 1023, 0, 0, // 3, ... - 547, 0, 0, // 6, ... - 372, 0, 0, 0, 0, // 9, ... - 273, 0, 0, // 14, ... - 215, 0, 0, // 17, ... - 178, 0, 0, // 20, ... - 151, 0, 0, // 23, ... (113 & 203 are base angles) - 132, 0, 0, // 26, ... - 116, 0, 0, // 29, ... - 102, 0, 0, 0, // 32, ... - 90, 0, 0, // 36, ... - 80, 0, 0, // 39, ... - 71, 0, 0, // 42, ... - 64, 0, 0, // 45, ... (45 & 135 are base angles) - 57, 0, 0, // 48, ... - 51, 0, 0, // 51, ... - 45, 0, 0, 0, // 54, ... - 40, 0, 0, // 58, ... - 35, 0, 0, // 61, ... - 31, 0, 0, // 64, ... - 27, 0, 0, // 67, ... (67 & 157 are base angles) - 23, 0, 0, // 70, ... - 19, 0, 0, // 73, ... - 15, 0, 0, 0, 0, // 76, ... - 11, 0, 0, // 81, ... - 7, 0, 0, // 84, ... - 3, 0, 0, // 87, ... -}; diff --git a/third_party/aom/av1/common/blockd.h b/third_party/aom/av1/common/blockd.h deleted file mode 100644 index a2311c1b0..000000000 --- a/third_party/aom/av1/common/blockd.h +++ /dev/null @@ -1,1176 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_BLOCKD_H_ -#define AOM_AV1_COMMON_BLOCKD_H_ - -#include "config/aom_config.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_ports/mem.h" -#include "aom_scale/yv12config.h" - -#include "av1/common/common_data.h" -#include "av1/common/quant_common.h" -#include "av1/common/entropy.h" -#include "av1/common/entropymode.h" -#include "av1/common/mv.h" -#include "av1/common/scale.h" -#include "av1/common/seg_common.h" -#include "av1/common/tile_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define USE_B_QUANT_NO_TRELLIS 1 - -#define MAX_MB_PLANE 3 - -#define MAX_DIFFWTD_MASK_BITS 1 - -// DIFFWTD_MASK_TYPES should not surpass 1 << MAX_DIFFWTD_MASK_BITS -typedef enum ATTRIBUTE_PACKED { - DIFFWTD_38 = 0, - DIFFWTD_38_INV, - DIFFWTD_MASK_TYPES, -} DIFFWTD_MASK_TYPE; - -typedef enum ATTRIBUTE_PACKED { - KEY_FRAME = 0, - INTER_FRAME = 1, - INTRA_ONLY_FRAME = 2, // replaces intra-only - S_FRAME = 3, - FRAME_TYPES, -} FRAME_TYPE; - -static INLINE int is_comp_ref_allowed(BLOCK_SIZE bsize) { - return AOMMIN(block_size_wide[bsize], block_size_high[bsize]) >= 8; -} - -static INLINE int is_inter_mode(PREDICTION_MODE mode) { - return mode >= INTER_MODE_START && mode < INTER_MODE_END; -} - -typedef struct { - uint8_t *plane[MAX_MB_PLANE]; - int stride[MAX_MB_PLANE]; -} BUFFER_SET; - -static INLINE int is_inter_singleref_mode(PREDICTION_MODE mode) { - return mode >= SINGLE_INTER_MODE_START && mode < SINGLE_INTER_MODE_END; -} -static INLINE int is_inter_compound_mode(PREDICTION_MODE mode) { - return mode >= COMP_INTER_MODE_START && mode < COMP_INTER_MODE_END; -} - -static INLINE PREDICTION_MODE compound_ref0_mode(PREDICTION_MODE mode) { - static PREDICTION_MODE lut[] = { - MB_MODE_COUNT, // DC_PRED - MB_MODE_COUNT, // V_PRED - MB_MODE_COUNT, // H_PRED - MB_MODE_COUNT, // D45_PRED - MB_MODE_COUNT, // D135_PRED - MB_MODE_COUNT, // D113_PRED - MB_MODE_COUNT, // D157_PRED - MB_MODE_COUNT, // D203_PRED - MB_MODE_COUNT, // D67_PRED - MB_MODE_COUNT, // SMOOTH_PRED - MB_MODE_COUNT, // SMOOTH_V_PRED - MB_MODE_COUNT, // SMOOTH_H_PRED - MB_MODE_COUNT, // PAETH_PRED - MB_MODE_COUNT, // NEARESTMV - MB_MODE_COUNT, // NEARMV - MB_MODE_COUNT, // GLOBALMV - MB_MODE_COUNT, // NEWMV - NEARESTMV, // NEAREST_NEARESTMV - NEARMV, // NEAR_NEARMV - NEARESTMV, // NEAREST_NEWMV - NEWMV, // NEW_NEARESTMV - NEARMV, // NEAR_NEWMV - NEWMV, // NEW_NEARMV - GLOBALMV, // GLOBAL_GLOBALMV - NEWMV, // NEW_NEWMV - }; - assert(NELEMENTS(lut) == MB_MODE_COUNT); - assert(is_inter_compound_mode(mode)); - return lut[mode]; -} - -static INLINE PREDICTION_MODE compound_ref1_mode(PREDICTION_MODE mode) { - static PREDICTION_MODE lut[] = { - MB_MODE_COUNT, // DC_PRED - MB_MODE_COUNT, // V_PRED - MB_MODE_COUNT, // H_PRED - MB_MODE_COUNT, // D45_PRED - MB_MODE_COUNT, // D135_PRED - MB_MODE_COUNT, // D113_PRED - MB_MODE_COUNT, // D157_PRED - MB_MODE_COUNT, // D203_PRED - MB_MODE_COUNT, // D67_PRED - MB_MODE_COUNT, // SMOOTH_PRED - MB_MODE_COUNT, // SMOOTH_V_PRED - MB_MODE_COUNT, // SMOOTH_H_PRED - MB_MODE_COUNT, // PAETH_PRED - MB_MODE_COUNT, // NEARESTMV - MB_MODE_COUNT, // NEARMV - MB_MODE_COUNT, // GLOBALMV - MB_MODE_COUNT, // NEWMV - NEARESTMV, // NEAREST_NEARESTMV - NEARMV, // NEAR_NEARMV - NEWMV, // NEAREST_NEWMV - NEARESTMV, // NEW_NEARESTMV - NEWMV, // NEAR_NEWMV - NEARMV, // NEW_NEARMV - GLOBALMV, // GLOBAL_GLOBALMV - NEWMV, // NEW_NEWMV - }; - assert(NELEMENTS(lut) == MB_MODE_COUNT); - assert(is_inter_compound_mode(mode)); - return lut[mode]; -} - -static INLINE int have_nearmv_in_inter_mode(PREDICTION_MODE mode) { - return (mode == NEARMV || mode == NEAR_NEARMV || mode == NEAR_NEWMV || - mode == NEW_NEARMV); -} - -static INLINE int have_newmv_in_inter_mode(PREDICTION_MODE mode) { - return (mode == NEWMV || mode == NEW_NEWMV || mode == NEAREST_NEWMV || - mode == NEW_NEARESTMV || mode == NEAR_NEWMV || mode == NEW_NEARMV); -} - -static INLINE int is_masked_compound_type(COMPOUND_TYPE type) { - return (type == COMPOUND_WEDGE || type == COMPOUND_DIFFWTD); -} - -/* For keyframes, intra block modes are predicted by the (already decoded) - modes for the Y blocks to the left and above us; for interframes, there - is a single probability table. */ - -typedef int8_t MV_REFERENCE_FRAME; - -typedef struct { - // Number of base colors for Y (0) and UV (1) - uint8_t palette_size[2]; - // Value of base colors for Y, U, and V - uint16_t palette_colors[3 * PALETTE_MAX_SIZE]; -} PALETTE_MODE_INFO; - -typedef struct { - uint8_t use_filter_intra; - FILTER_INTRA_MODE filter_intra_mode; -} FILTER_INTRA_MODE_INFO; - -static const PREDICTION_MODE fimode_to_intradir[FILTER_INTRA_MODES] = { - DC_PRED, V_PRED, H_PRED, D157_PRED, DC_PRED -}; - -#if CONFIG_RD_DEBUG -#define TXB_COEFF_COST_MAP_SIZE (MAX_MIB_SIZE) -#endif - -typedef struct RD_STATS { - int rate; - int64_t dist; - // Please be careful of using rdcost, it's not guaranteed to be set all the - // time. - // TODO(angiebird): Create a set of functions to manipulate the RD_STATS. In - // these functions, make sure rdcost is always up-to-date according to - // rate/dist. - int64_t rdcost; - int64_t sse; - int skip; // sse should equal to dist when skip == 1 - int64_t ref_rdcost; - int zero_rate; - uint8_t invalid_rate; -#if CONFIG_RD_DEBUG - int txb_coeff_cost[MAX_MB_PLANE]; - int txb_coeff_cost_map[MAX_MB_PLANE][TXB_COEFF_COST_MAP_SIZE] - [TXB_COEFF_COST_MAP_SIZE]; -#endif // CONFIG_RD_DEBUG -} RD_STATS; - -// This struct is used to group function args that are commonly -// sent together in functions related to interinter compound modes -typedef struct { - int wedge_index; - int wedge_sign; - DIFFWTD_MASK_TYPE mask_type; - uint8_t *seg_mask; - COMPOUND_TYPE type; -} INTERINTER_COMPOUND_DATA; - -#define INTER_TX_SIZE_BUF_LEN 16 -#define TXK_TYPE_BUF_LEN 64 -// This structure now relates to 4x4 block regions. -typedef struct MB_MODE_INFO { - // Common for both INTER and INTRA blocks - BLOCK_SIZE sb_type; - PREDICTION_MODE mode; - TX_SIZE tx_size; - uint8_t inter_tx_size[INTER_TX_SIZE_BUF_LEN]; - int8_t skip; - int8_t skip_mode; - int8_t segment_id; - int8_t seg_id_predicted; // valid only when temporal_update is enabled - - // Only for INTRA blocks - UV_PREDICTION_MODE uv_mode; - - PALETTE_MODE_INFO palette_mode_info; - uint8_t use_intrabc; - - // Only for INTER blocks - InterpFilters interp_filters; - MV_REFERENCE_FRAME ref_frame[2]; - - TX_TYPE txk_type[TXK_TYPE_BUF_LEN]; - - FILTER_INTRA_MODE_INFO filter_intra_mode_info; - - // The actual prediction angle is the base angle + (angle_delta * step). - int8_t angle_delta[PLANE_TYPES]; - - // interintra members - INTERINTRA_MODE interintra_mode; - // TODO(debargha): Consolidate these flags - int use_wedge_interintra; - int interintra_wedge_index; - int interintra_wedge_sign; - // interinter members - INTERINTER_COMPOUND_DATA interinter_comp; - MOTION_MODE motion_mode; - int overlappable_neighbors[2]; - int_mv mv[2]; - uint8_t ref_mv_idx; - PARTITION_TYPE partition; - /* deringing gain *per-superblock* */ - int8_t cdef_strength; - int current_qindex; - int delta_lf_from_base; - int delta_lf[FRAME_LF_COUNT]; -#if CONFIG_RD_DEBUG - RD_STATS rd_stats; - int mi_row; - int mi_col; -#endif - int num_proj_ref; - WarpedMotionParams wm_params; - - // Index of the alpha Cb and alpha Cr combination - int cfl_alpha_idx; - // Joint sign of alpha Cb and alpha Cr - int cfl_alpha_signs; - - int compound_idx; - int comp_group_idx; -} MB_MODE_INFO; - -static INLINE int is_intrabc_block(const MB_MODE_INFO *mbmi) { - return mbmi->use_intrabc; -} - -static INLINE PREDICTION_MODE get_uv_mode(UV_PREDICTION_MODE mode) { - assert(mode < UV_INTRA_MODES); - static const PREDICTION_MODE uv2y[] = { - DC_PRED, // UV_DC_PRED - V_PRED, // UV_V_PRED - H_PRED, // UV_H_PRED - D45_PRED, // UV_D45_PRED - D135_PRED, // UV_D135_PRED - D113_PRED, // UV_D113_PRED - D157_PRED, // UV_D157_PRED - D203_PRED, // UV_D203_PRED - D67_PRED, // UV_D67_PRED - SMOOTH_PRED, // UV_SMOOTH_PRED - SMOOTH_V_PRED, // UV_SMOOTH_V_PRED - SMOOTH_H_PRED, // UV_SMOOTH_H_PRED - PAETH_PRED, // UV_PAETH_PRED - DC_PRED, // UV_CFL_PRED - INTRA_INVALID, // UV_INTRA_MODES - INTRA_INVALID, // UV_MODE_INVALID - }; - return uv2y[mode]; -} - -static INLINE int is_inter_block(const MB_MODE_INFO *mbmi) { - return is_intrabc_block(mbmi) || mbmi->ref_frame[0] > INTRA_FRAME; -} - -static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) { - return mbmi->ref_frame[1] > INTRA_FRAME; -} - -static INLINE int has_uni_comp_refs(const MB_MODE_INFO *mbmi) { - return has_second_ref(mbmi) && (!((mbmi->ref_frame[0] >= BWDREF_FRAME) ^ - (mbmi->ref_frame[1] >= BWDREF_FRAME))); -} - -static INLINE MV_REFERENCE_FRAME comp_ref0(int ref_idx) { - static const MV_REFERENCE_FRAME lut[] = { - LAST_FRAME, // LAST_LAST2_FRAMES, - LAST_FRAME, // LAST_LAST3_FRAMES, - LAST_FRAME, // LAST_GOLDEN_FRAMES, - BWDREF_FRAME, // BWDREF_ALTREF_FRAMES, - LAST2_FRAME, // LAST2_LAST3_FRAMES - LAST2_FRAME, // LAST2_GOLDEN_FRAMES, - LAST3_FRAME, // LAST3_GOLDEN_FRAMES, - BWDREF_FRAME, // BWDREF_ALTREF2_FRAMES, - ALTREF2_FRAME, // ALTREF2_ALTREF_FRAMES, - }; - assert(NELEMENTS(lut) == TOTAL_UNIDIR_COMP_REFS); - return lut[ref_idx]; -} - -static INLINE MV_REFERENCE_FRAME comp_ref1(int ref_idx) { - static const MV_REFERENCE_FRAME lut[] = { - LAST2_FRAME, // LAST_LAST2_FRAMES, - LAST3_FRAME, // LAST_LAST3_FRAMES, - GOLDEN_FRAME, // LAST_GOLDEN_FRAMES, - ALTREF_FRAME, // BWDREF_ALTREF_FRAMES, - LAST3_FRAME, // LAST2_LAST3_FRAMES - GOLDEN_FRAME, // LAST2_GOLDEN_FRAMES, - GOLDEN_FRAME, // LAST3_GOLDEN_FRAMES, - ALTREF2_FRAME, // BWDREF_ALTREF2_FRAMES, - ALTREF_FRAME, // ALTREF2_ALTREF_FRAMES, - }; - assert(NELEMENTS(lut) == TOTAL_UNIDIR_COMP_REFS); - return lut[ref_idx]; -} - -PREDICTION_MODE av1_left_block_mode(const MB_MODE_INFO *left_mi); - -PREDICTION_MODE av1_above_block_mode(const MB_MODE_INFO *above_mi); - -static INLINE int is_global_mv_block(const MB_MODE_INFO *const mbmi, - TransformationType type) { - const PREDICTION_MODE mode = mbmi->mode; - const BLOCK_SIZE bsize = mbmi->sb_type; - const int block_size_allowed = - AOMMIN(block_size_wide[bsize], block_size_high[bsize]) >= 8; - return (mode == GLOBALMV || mode == GLOBAL_GLOBALMV) && type > TRANSLATION && - block_size_allowed; -} - -#if CONFIG_MISMATCH_DEBUG -static INLINE void mi_to_pixel_loc(int *pixel_c, int *pixel_r, int mi_col, - int mi_row, int tx_blk_col, int tx_blk_row, - int subsampling_x, int subsampling_y) { - *pixel_c = ((mi_col >> subsampling_x) << MI_SIZE_LOG2) + - (tx_blk_col << tx_size_wide_log2[0]); - *pixel_r = ((mi_row >> subsampling_y) << MI_SIZE_LOG2) + - (tx_blk_row << tx_size_high_log2[0]); -} -#endif - -enum ATTRIBUTE_PACKED mv_precision { MV_PRECISION_Q3, MV_PRECISION_Q4 }; - -struct buf_2d { - uint8_t *buf; - uint8_t *buf0; - int width; - int height; - int stride; -}; - -typedef struct eob_info { - uint16_t eob; - uint16_t max_scan_line; -} eob_info; - -typedef struct { - DECLARE_ALIGNED(32, tran_low_t, dqcoeff[MAX_MB_PLANE][MAX_SB_SQUARE]); - eob_info eob_data[MAX_MB_PLANE] - [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; - DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]); -} CB_BUFFER; - -typedef struct macroblockd_plane { - tran_low_t *dqcoeff; - tran_low_t *dqcoeff_block; - eob_info *eob_data; - PLANE_TYPE plane_type; - int subsampling_x; - int subsampling_y; - struct buf_2d dst; - struct buf_2d pre[2]; - ENTROPY_CONTEXT *above_context; - ENTROPY_CONTEXT *left_context; - - // The dequantizers below are true dequntizers used only in the - // dequantization process. They have the same coefficient - // shift/scale as TX. - int16_t seg_dequant_QTX[MAX_SEGMENTS][2]; - uint8_t *color_index_map; - - // block size in pixels - uint8_t width, height; - - qm_val_t *seg_iqmatrix[MAX_SEGMENTS][TX_SIZES_ALL]; - qm_val_t *seg_qmatrix[MAX_SEGMENTS][TX_SIZES_ALL]; - - // the 'dequantizers' below are not literal dequantizer values. - // They're used by encoder RDO to generate ad-hoc lambda values. - // They use a hardwired Q3 coeff shift and do not necessarily match - // the TX scale in use. - const int16_t *dequant_Q3; -} MACROBLOCKD_PLANE; - -#define BLOCK_OFFSET(x, i) \ - ((x) + (i) * (1 << (tx_size_wide_log2[0] + tx_size_high_log2[0]))) - -typedef struct RefBuffer { - int idx; // frame buf idx - int map_idx; // frame map idx - YV12_BUFFER_CONFIG *buf; - struct scale_factors sf; -} RefBuffer; - -typedef struct { - DECLARE_ALIGNED(16, InterpKernel, vfilter); - DECLARE_ALIGNED(16, InterpKernel, hfilter); -} WienerInfo; - -typedef struct { - int ep; - int xqd[2]; -} SgrprojInfo; - -#if CONFIG_DEBUG -#define CFL_SUB8X8_VAL_MI_SIZE (4) -#define CFL_SUB8X8_VAL_MI_SQUARE \ - (CFL_SUB8X8_VAL_MI_SIZE * CFL_SUB8X8_VAL_MI_SIZE) -#endif // CONFIG_DEBUG -#define CFL_MAX_BLOCK_SIZE (BLOCK_32X32) -#define CFL_BUF_LINE (32) -#define CFL_BUF_LINE_I128 (CFL_BUF_LINE >> 3) -#define CFL_BUF_LINE_I256 (CFL_BUF_LINE >> 4) -#define CFL_BUF_SQUARE (CFL_BUF_LINE * CFL_BUF_LINE) -typedef struct cfl_ctx { - // Q3 reconstructed luma pixels (only Q2 is required, but Q3 is used to avoid - // shifts) - uint16_t recon_buf_q3[CFL_BUF_SQUARE]; - // Q3 AC contributions (reconstructed luma pixels - tx block avg) - int16_t ac_buf_q3[CFL_BUF_SQUARE]; - - // Cache the DC_PRED when performing RDO, so it does not have to be recomputed - // for every scaling parameter - int dc_pred_is_cached[CFL_PRED_PLANES]; - // The DC_PRED cache is disable when decoding - int use_dc_pred_cache; - // Only cache the first row of the DC_PRED - int16_t dc_pred_cache[CFL_PRED_PLANES][CFL_BUF_LINE]; - - // Height and width currently used in the CfL prediction buffer. - int buf_height, buf_width; - - int are_parameters_computed; - - // Chroma subsampling - int subsampling_x, subsampling_y; - - int mi_row, mi_col; - - // Whether the reconstructed luma pixels need to be stored - int store_y; - -#if CONFIG_DEBUG - int rate; -#endif // CONFIG_DEBUG - - int is_chroma_reference; -} CFL_CTX; - -typedef struct jnt_comp_params { - int use_jnt_comp_avg; - int fwd_offset; - int bck_offset; -} JNT_COMP_PARAMS; - -// Most/all of the pointers are mere pointers to actual arrays are allocated -// elsewhere. This is mostly for coding convenience. -typedef struct macroblockd { - struct macroblockd_plane plane[MAX_MB_PLANE]; - - TileInfo tile; - - int mi_stride; - - MB_MODE_INFO **mi; - MB_MODE_INFO *left_mbmi; - MB_MODE_INFO *above_mbmi; - MB_MODE_INFO *chroma_left_mbmi; - MB_MODE_INFO *chroma_above_mbmi; - - int up_available; - int left_available; - int chroma_up_available; - int chroma_left_available; - - /* Distance of MB away from frame edges in subpixels (1/8th pixel) */ - int mb_to_left_edge; - int mb_to_right_edge; - int mb_to_top_edge; - int mb_to_bottom_edge; - - /* pointers to reference frames */ - const RefBuffer *block_refs[2]; - - /* pointer to current frame */ - const YV12_BUFFER_CONFIG *cur_buf; - - ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; - ENTROPY_CONTEXT left_context[MAX_MB_PLANE][MAX_MIB_SIZE]; - - PARTITION_CONTEXT *above_seg_context; - PARTITION_CONTEXT left_seg_context[MAX_MIB_SIZE]; - - TXFM_CONTEXT *above_txfm_context; - TXFM_CONTEXT *left_txfm_context; - TXFM_CONTEXT left_txfm_context_buffer[MAX_MIB_SIZE]; - - WienerInfo wiener_info[MAX_MB_PLANE]; - SgrprojInfo sgrproj_info[MAX_MB_PLANE]; - - // block dimension in the unit of mode_info. - uint8_t n4_w, n4_h; - - uint8_t ref_mv_count[MODE_CTX_REF_FRAMES]; - CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE]; - uint8_t is_sec_rect; - - // Counts of each reference frame in the above and left neighboring blocks. - // NOTE: Take into account both single and comp references. - uint8_t neighbors_ref_counts[REF_FRAMES]; - - FRAME_CONTEXT *tile_ctx; - /* Bit depth: 8, 10, 12 */ - int bd; - - int qindex[MAX_SEGMENTS]; - int lossless[MAX_SEGMENTS]; - int corrupted; - int cur_frame_force_integer_mv; - // same with that in AV1_COMMON - struct aom_internal_error_info *error_info; - const WarpedMotionParams *global_motion; - int delta_qindex; - int current_qindex; - // Since actual frame level loop filtering level value is not available - // at the beginning of the tile (only available during actual filtering) - // at encoder side.we record the delta_lf (against the frame level loop - // filtering level) and code the delta between previous superblock's delta - // lf and current delta lf. It is equivalent to the delta between previous - // superblock's actual lf and current lf. - int delta_lf_from_base; - // For this experiment, we have four frame filter levels for different plane - // and direction. So, to support the per superblock update, we need to add - // a few more params as below. - // 0: delta loop filter level for y plane vertical - // 1: delta loop filter level for y plane horizontal - // 2: delta loop filter level for u plane - // 3: delta loop filter level for v plane - // To make it consistent with the reference to each filter level in segment, - // we need to -1, since - // SEG_LVL_ALT_LF_Y_V = 1; - // SEG_LVL_ALT_LF_Y_H = 2; - // SEG_LVL_ALT_LF_U = 3; - // SEG_LVL_ALT_LF_V = 4; - int delta_lf[FRAME_LF_COUNT]; - int cdef_preset[4]; - - DECLARE_ALIGNED(16, uint8_t, seg_mask[2 * MAX_SB_SQUARE]); - uint8_t *mc_buf[2]; - CFL_CTX cfl; - - JNT_COMP_PARAMS jcp_param; - - uint16_t cb_offset[MAX_MB_PLANE]; - uint16_t txb_offset[MAX_MB_PLANE]; - uint16_t color_index_map_offset[2]; - - CONV_BUF_TYPE *tmp_conv_dst; - uint8_t *tmp_obmc_bufs[2]; -} MACROBLOCKD; - -static INLINE int get_bitdepth_data_path_index(const MACROBLOCKD *xd) { - return xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? 1 : 0; -} - -static INLINE uint8_t *get_buf_by_bd(const MACROBLOCKD *xd, uint8_t *buf16) { - return (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - ? CONVERT_TO_BYTEPTR(buf16) - : buf16; -} - -static INLINE int get_sqr_bsize_idx(BLOCK_SIZE bsize) { - switch (bsize) { - case BLOCK_4X4: return 0; - case BLOCK_8X8: return 1; - case BLOCK_16X16: return 2; - case BLOCK_32X32: return 3; - case BLOCK_64X64: return 4; - case BLOCK_128X128: return 5; - default: return SQR_BLOCK_SIZES; - } -} - -// For a square block size 'bsize', returns the size of the sub-blocks used by -// the given partition type. If the partition produces sub-blocks of different -// sizes, then the function returns the largest sub-block size. -// Implements the Partition_Subsize lookup table in the spec (Section 9.3. -// Conversion tables). -// Note: the input block size should be square. -// Otherwise it's considered invalid. -static INLINE BLOCK_SIZE get_partition_subsize(BLOCK_SIZE bsize, - PARTITION_TYPE partition) { - if (partition == PARTITION_INVALID) { - return BLOCK_INVALID; - } else { - const int sqr_bsize_idx = get_sqr_bsize_idx(bsize); - return sqr_bsize_idx >= SQR_BLOCK_SIZES - ? BLOCK_INVALID - : subsize_lookup[partition][sqr_bsize_idx]; - } -} - -static TX_TYPE intra_mode_to_tx_type(const MB_MODE_INFO *mbmi, - PLANE_TYPE plane_type) { - static const TX_TYPE _intra_mode_to_tx_type[INTRA_MODES] = { - DCT_DCT, // DC - ADST_DCT, // V - DCT_ADST, // H - DCT_DCT, // D45 - ADST_ADST, // D135 - ADST_DCT, // D117 - DCT_ADST, // D153 - DCT_ADST, // D207 - ADST_DCT, // D63 - ADST_ADST, // SMOOTH - ADST_DCT, // SMOOTH_V - DCT_ADST, // SMOOTH_H - ADST_ADST, // PAETH - }; - const PREDICTION_MODE mode = - (plane_type == PLANE_TYPE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode); - assert(mode < INTRA_MODES); - return _intra_mode_to_tx_type[mode]; -} - -static INLINE int is_rect_tx(TX_SIZE tx_size) { return tx_size >= TX_SIZES; } - -static INLINE int block_signals_txsize(BLOCK_SIZE bsize) { - return bsize > BLOCK_4X4; -} - -// Number of transform types in each set type -static const int av1_num_ext_tx_set[EXT_TX_SET_TYPES] = { - 1, 2, 5, 7, 12, 16, -}; - -static const int av1_ext_tx_used[EXT_TX_SET_TYPES][TX_TYPES] = { - { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, - { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, - { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0 }, - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, -}; - -static const uint16_t av1_ext_tx_used_flag[EXT_TX_SET_TYPES] = { - 0x0001, // 0000 0000 0000 0001 - 0x0201, // 0000 0010 0000 0001 - 0x020F, // 0000 0010 0000 1111 - 0x0E0F, // 0000 1110 0000 1111 - 0x0FFF, // 0000 1111 1111 1111 - 0xFFFF, // 1111 1111 1111 1111 -}; - -static INLINE TxSetType av1_get_ext_tx_set_type(TX_SIZE tx_size, int is_inter, - int use_reduced_set) { - const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size]; - if (tx_size_sqr_up > TX_32X32) return EXT_TX_SET_DCTONLY; - if (tx_size_sqr_up == TX_32X32) - return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DCTONLY; - if (use_reduced_set) - return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DTT4_IDTX; - const TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size]; - if (is_inter) { - return (tx_size_sqr == TX_16X16 ? EXT_TX_SET_DTT9_IDTX_1DDCT - : EXT_TX_SET_ALL16); - } else { - return (tx_size_sqr == TX_16X16 ? EXT_TX_SET_DTT4_IDTX - : EXT_TX_SET_DTT4_IDTX_1DDCT); - } -} - -// Maps tx set types to the indices. -static const int ext_tx_set_index[2][EXT_TX_SET_TYPES] = { - { // Intra - 0, -1, 2, 1, -1, -1 }, - { // Inter - 0, 3, -1, -1, 2, 1 }, -}; - -static INLINE int get_ext_tx_set(TX_SIZE tx_size, int is_inter, - int use_reduced_set) { - const TxSetType set_type = - av1_get_ext_tx_set_type(tx_size, is_inter, use_reduced_set); - return ext_tx_set_index[is_inter][set_type]; -} - -static INLINE int get_ext_tx_types(TX_SIZE tx_size, int is_inter, - int use_reduced_set) { - const int set_type = - av1_get_ext_tx_set_type(tx_size, is_inter, use_reduced_set); - return av1_num_ext_tx_set[set_type]; -} - -#define TXSIZEMAX(t1, t2) (tx_size_2d[(t1)] >= tx_size_2d[(t2)] ? (t1) : (t2)) -#define TXSIZEMIN(t1, t2) (tx_size_2d[(t1)] <= tx_size_2d[(t2)] ? (t1) : (t2)) - -static INLINE TX_SIZE tx_size_from_tx_mode(BLOCK_SIZE bsize, TX_MODE tx_mode) { - const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; - const TX_SIZE max_rect_tx_size = max_txsize_rect_lookup[bsize]; - if (bsize == BLOCK_4X4) - return AOMMIN(max_txsize_lookup[bsize], largest_tx_size); - if (txsize_sqr_map[max_rect_tx_size] <= largest_tx_size) - return max_rect_tx_size; - else - return largest_tx_size; -} - -extern const int16_t dr_intra_derivative[90]; -static const uint8_t mode_to_angle_map[] = { - 0, 90, 180, 45, 135, 113, 157, 203, 67, 0, 0, 0, 0, -}; - -// Converts block_index for given transform size to index of the block in raster -// order. -static INLINE int av1_block_index_to_raster_order(TX_SIZE tx_size, - int block_idx) { - // For transform size 4x8, the possible block_idx values are 0 & 2, because - // block_idx values are incremented in steps of size 'tx_width_unit x - // tx_height_unit'. But, for this transform size, block_idx = 2 corresponds to - // block number 1 in raster order, inside an 8x8 MI block. - // For any other transform size, the two indices are equivalent. - return (tx_size == TX_4X8 && block_idx == 2) ? 1 : block_idx; -} - -// Inverse of above function. -// Note: only implemented for transform sizes 4x4, 4x8 and 8x4 right now. -static INLINE int av1_raster_order_to_block_index(TX_SIZE tx_size, - int raster_order) { - assert(tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4); - // We ensure that block indices are 0 & 2 if tx size is 4x8 or 8x4. - return (tx_size == TX_4X4) ? raster_order : (raster_order > 0) ? 2 : 0; -} - -static INLINE TX_TYPE get_default_tx_type(PLANE_TYPE plane_type, - const MACROBLOCKD *xd, - TX_SIZE tx_size) { - const MB_MODE_INFO *const mbmi = xd->mi[0]; - - if (is_inter_block(mbmi) || plane_type != PLANE_TYPE_Y || - xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32) - return DCT_DCT; - - return intra_mode_to_tx_type(mbmi, plane_type); -} - -// Implements the get_plane_residual_size() function in the spec (Section -// 5.11.38. Get plane residual size function). -static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize, - int subsampling_x, - int subsampling_y) { - if (bsize == BLOCK_INVALID) return BLOCK_INVALID; - return ss_size_lookup[bsize][subsampling_x][subsampling_y]; -} - -static INLINE int av1_get_txb_size_index(BLOCK_SIZE bsize, int blk_row, - int blk_col) { - TX_SIZE txs = max_txsize_rect_lookup[bsize]; - for (int level = 0; level < MAX_VARTX_DEPTH - 1; ++level) - txs = sub_tx_size_map[txs]; - const int tx_w_log2 = tx_size_wide_log2[txs] - MI_SIZE_LOG2; - const int tx_h_log2 = tx_size_high_log2[txs] - MI_SIZE_LOG2; - const int bw_log2 = mi_size_wide_log2[bsize]; - const int stride_log2 = bw_log2 - tx_w_log2; - const int index = - ((blk_row >> tx_h_log2) << stride_log2) + (blk_col >> tx_w_log2); - assert(index < INTER_TX_SIZE_BUF_LEN); - return index; -} - -static INLINE int av1_get_txk_type_index(BLOCK_SIZE bsize, int blk_row, - int blk_col) { - TX_SIZE txs = max_txsize_rect_lookup[bsize]; - for (int level = 0; level < MAX_VARTX_DEPTH; ++level) - txs = sub_tx_size_map[txs]; - const int tx_w_log2 = tx_size_wide_log2[txs] - MI_SIZE_LOG2; - const int tx_h_log2 = tx_size_high_log2[txs] - MI_SIZE_LOG2; - const int bw_uint_log2 = mi_size_wide_log2[bsize]; - const int stride_log2 = bw_uint_log2 - tx_w_log2; - const int index = - ((blk_row >> tx_h_log2) << stride_log2) + (blk_col >> tx_w_log2); - assert(index < TXK_TYPE_BUF_LEN); - return index; -} - -static INLINE void update_txk_array(TX_TYPE *txk_type, BLOCK_SIZE bsize, - int blk_row, int blk_col, TX_SIZE tx_size, - TX_TYPE tx_type) { - const int txk_type_idx = av1_get_txk_type_index(bsize, blk_row, blk_col); - txk_type[txk_type_idx] = tx_type; - - const int txw = tx_size_wide_unit[tx_size]; - const int txh = tx_size_high_unit[tx_size]; - // The 16x16 unit is due to the constraint from tx_64x64 which sets the - // maximum tx size for chroma as 32x32. Coupled with 4x1 transform block - // size, the constraint takes effect in 32x16 / 16x32 size too. To solve - // the intricacy, cover all the 16x16 units inside a 64 level transform. - if (txw == tx_size_wide_unit[TX_64X64] || - txh == tx_size_high_unit[TX_64X64]) { - const int tx_unit = tx_size_wide_unit[TX_16X16]; - for (int idy = 0; idy < txh; idy += tx_unit) { - for (int idx = 0; idx < txw; idx += tx_unit) { - const int this_index = - av1_get_txk_type_index(bsize, blk_row + idy, blk_col + idx); - txk_type[this_index] = tx_type; - } - } - } -} - -static INLINE TX_TYPE av1_get_tx_type(PLANE_TYPE plane_type, - const MACROBLOCKD *xd, int blk_row, - int blk_col, TX_SIZE tx_size, - int reduced_tx_set) { - const MB_MODE_INFO *const mbmi = xd->mi[0]; - const struct macroblockd_plane *const pd = &xd->plane[plane_type]; - const TxSetType tx_set_type = - av1_get_ext_tx_set_type(tx_size, is_inter_block(mbmi), reduced_tx_set); - - TX_TYPE tx_type; - if (xd->lossless[mbmi->segment_id] || txsize_sqr_up_map[tx_size] > TX_32X32) { - tx_type = DCT_DCT; - } else { - if (plane_type == PLANE_TYPE_Y) { - const int txk_type_idx = - av1_get_txk_type_index(mbmi->sb_type, blk_row, blk_col); - tx_type = mbmi->txk_type[txk_type_idx]; - } else if (is_inter_block(mbmi)) { - // scale back to y plane's coordinate - blk_row <<= pd->subsampling_y; - blk_col <<= pd->subsampling_x; - const int txk_type_idx = - av1_get_txk_type_index(mbmi->sb_type, blk_row, blk_col); - tx_type = mbmi->txk_type[txk_type_idx]; - } else { - // In intra mode, uv planes don't share the same prediction mode as y - // plane, so the tx_type should not be shared - tx_type = intra_mode_to_tx_type(mbmi, PLANE_TYPE_UV); - } - } - assert(tx_type < TX_TYPES); - if (!av1_ext_tx_used[tx_set_type][tx_type]) return DCT_DCT; - return tx_type; -} - -void av1_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y, - const int num_planes); - -static INLINE int bsize_to_max_depth(BLOCK_SIZE bsize) { - TX_SIZE tx_size = max_txsize_rect_lookup[bsize]; - int depth = 0; - while (depth < MAX_TX_DEPTH && tx_size != TX_4X4) { - depth++; - tx_size = sub_tx_size_map[tx_size]; - } - return depth; -} - -static INLINE int bsize_to_tx_size_cat(BLOCK_SIZE bsize) { - TX_SIZE tx_size = max_txsize_rect_lookup[bsize]; - assert(tx_size != TX_4X4); - int depth = 0; - while (tx_size != TX_4X4) { - depth++; - tx_size = sub_tx_size_map[tx_size]; - assert(depth < 10); - } - assert(depth <= MAX_TX_CATS); - return depth - 1; -} - -static INLINE TX_SIZE depth_to_tx_size(int depth, BLOCK_SIZE bsize) { - TX_SIZE max_tx_size = max_txsize_rect_lookup[bsize]; - TX_SIZE tx_size = max_tx_size; - for (int d = 0; d < depth; ++d) tx_size = sub_tx_size_map[tx_size]; - return tx_size; -} - -static INLINE TX_SIZE av1_get_adjusted_tx_size(TX_SIZE tx_size) { - switch (tx_size) { - case TX_64X64: - case TX_64X32: - case TX_32X64: return TX_32X32; - case TX_64X16: return TX_32X16; - case TX_16X64: return TX_16X32; - default: return tx_size; - } -} - -static INLINE TX_SIZE av1_get_max_uv_txsize(BLOCK_SIZE bsize, int subsampling_x, - int subsampling_y) { - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsize, subsampling_x, subsampling_y); - assert(plane_bsize < BLOCK_SIZES_ALL); - const TX_SIZE uv_tx = max_txsize_rect_lookup[plane_bsize]; - return av1_get_adjusted_tx_size(uv_tx); -} - -static INLINE TX_SIZE av1_get_tx_size(int plane, const MACROBLOCKD *xd) { - const MB_MODE_INFO *mbmi = xd->mi[0]; - if (xd->lossless[mbmi->segment_id]) return TX_4X4; - if (plane == 0) return mbmi->tx_size; - const MACROBLOCKD_PLANE *pd = &xd->plane[plane]; - return av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x, - pd->subsampling_y); -} - -void av1_reset_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize, const int num_planes); - -void av1_reset_loop_filter_delta(MACROBLOCKD *xd, int num_planes); - -void av1_reset_loop_restoration(MACROBLOCKD *xd, const int num_planes); - -typedef void (*foreach_transformed_block_visitor)(int plane, int block, - int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg); - -void av1_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, - int plane, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, - int has_eob, int aoff, int loff); - -#define MAX_INTERINTRA_SB_SQUARE 32 * 32 -static INLINE int is_interintra_mode(const MB_MODE_INFO *mbmi) { - return (mbmi->ref_frame[0] > INTRA_FRAME && - mbmi->ref_frame[1] == INTRA_FRAME); -} - -static INLINE int is_interintra_allowed_bsize(const BLOCK_SIZE bsize) { - return (bsize >= BLOCK_8X8) && (bsize <= BLOCK_32X32); -} - -static INLINE int is_interintra_allowed_mode(const PREDICTION_MODE mode) { - return (mode >= SINGLE_INTER_MODE_START) && (mode < SINGLE_INTER_MODE_END); -} - -static INLINE int is_interintra_allowed_ref(const MV_REFERENCE_FRAME rf[2]) { - return (rf[0] > INTRA_FRAME) && (rf[1] <= INTRA_FRAME); -} - -static INLINE int is_interintra_allowed(const MB_MODE_INFO *mbmi) { - return is_interintra_allowed_bsize(mbmi->sb_type) && - is_interintra_allowed_mode(mbmi->mode) && - is_interintra_allowed_ref(mbmi->ref_frame); -} - -static INLINE int is_interintra_allowed_bsize_group(int group) { - int i; - for (i = 0; i < BLOCK_SIZES_ALL; i++) { - if (size_group_lookup[i] == group && - is_interintra_allowed_bsize((BLOCK_SIZE)i)) { - return 1; - } - } - return 0; -} - -static INLINE int is_interintra_pred(const MB_MODE_INFO *mbmi) { - return mbmi->ref_frame[0] > INTRA_FRAME && - mbmi->ref_frame[1] == INTRA_FRAME && is_interintra_allowed(mbmi); -} - -static INLINE int get_vartx_max_txsize(const MACROBLOCKD *xd, BLOCK_SIZE bsize, - int plane) { - if (xd->lossless[xd->mi[0]->segment_id]) return TX_4X4; - const TX_SIZE max_txsize = max_txsize_rect_lookup[bsize]; - if (plane == 0) return max_txsize; // luma - return av1_get_adjusted_tx_size(max_txsize); // chroma -} - -static INLINE int is_motion_variation_allowed_bsize(BLOCK_SIZE bsize) { - return AOMMIN(block_size_wide[bsize], block_size_high[bsize]) >= 8; -} - -static INLINE int is_motion_variation_allowed_compound( - const MB_MODE_INFO *mbmi) { - if (!has_second_ref(mbmi)) - return 1; - else - return 0; -} - -// input: log2 of length, 0(4), 1(8), ... -static const int max_neighbor_obmc[6] = { 0, 1, 2, 3, 4, 4 }; - -static INLINE int check_num_overlappable_neighbors(const MB_MODE_INFO *mbmi) { - return !(mbmi->overlappable_neighbors[0] == 0 && - mbmi->overlappable_neighbors[1] == 0); -} - -static INLINE MOTION_MODE -motion_mode_allowed(const WarpedMotionParams *gm_params, const MACROBLOCKD *xd, - const MB_MODE_INFO *mbmi, int allow_warped_motion) { - if (xd->cur_frame_force_integer_mv == 0) { - const TransformationType gm_type = gm_params[mbmi->ref_frame[0]].wmtype; - if (is_global_mv_block(mbmi, gm_type)) return SIMPLE_TRANSLATION; - } - if (is_motion_variation_allowed_bsize(mbmi->sb_type) && - is_inter_mode(mbmi->mode) && mbmi->ref_frame[1] != INTRA_FRAME && - is_motion_variation_allowed_compound(mbmi)) { - if (!check_num_overlappable_neighbors(mbmi)) return SIMPLE_TRANSLATION; - assert(!has_second_ref(mbmi)); - if (mbmi->num_proj_ref >= 1 && - (allow_warped_motion && !av1_is_scaled(&(xd->block_refs[0]->sf)))) { - if (xd->cur_frame_force_integer_mv) { - return OBMC_CAUSAL; - } - return WARPED_CAUSAL; - } - return OBMC_CAUSAL; - } else { - return SIMPLE_TRANSLATION; - } -} - -static INLINE void assert_motion_mode_valid(MOTION_MODE mode, - const WarpedMotionParams *gm_params, - const MACROBLOCKD *xd, - const MB_MODE_INFO *mbmi, - int allow_warped_motion) { - const MOTION_MODE last_motion_mode_allowed = - motion_mode_allowed(gm_params, xd, mbmi, allow_warped_motion); - - // Check that the input mode is not illegal - if (last_motion_mode_allowed < mode) - assert(0 && "Illegal motion mode selected"); -} - -static INLINE int is_neighbor_overlappable(const MB_MODE_INFO *mbmi) { - return (is_inter_block(mbmi)); -} - -static INLINE int av1_allow_palette(int allow_screen_content_tools, - BLOCK_SIZE sb_type) { - return allow_screen_content_tools && block_size_wide[sb_type] <= 64 && - block_size_high[sb_type] <= 64 && sb_type >= BLOCK_8X8; -} - -// Returns sub-sampled dimensions of the given block. -// The output values for 'rows_within_bounds' and 'cols_within_bounds' will -// differ from 'height' and 'width' when part of the block is outside the -// right -// and/or bottom image boundary. -static INLINE void av1_get_block_dimensions(BLOCK_SIZE bsize, int plane, - const MACROBLOCKD *xd, int *width, - int *height, - int *rows_within_bounds, - int *cols_within_bounds) { - const int block_height = block_size_high[bsize]; - const int block_width = block_size_wide[bsize]; - const int block_rows = (xd->mb_to_bottom_edge >= 0) - ? block_height - : (xd->mb_to_bottom_edge >> 3) + block_height; - const int block_cols = (xd->mb_to_right_edge >= 0) - ? block_width - : (xd->mb_to_right_edge >> 3) + block_width; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - assert(IMPLIES(plane == PLANE_TYPE_Y, pd->subsampling_x == 0)); - assert(IMPLIES(plane == PLANE_TYPE_Y, pd->subsampling_y == 0)); - assert(block_width >= block_cols); - assert(block_height >= block_rows); - const int plane_block_width = block_width >> pd->subsampling_x; - const int plane_block_height = block_height >> pd->subsampling_y; - // Special handling for chroma sub8x8. - const int is_chroma_sub8_x = plane > 0 && plane_block_width < 4; - const int is_chroma_sub8_y = plane > 0 && plane_block_height < 4; - if (width) *width = plane_block_width + 2 * is_chroma_sub8_x; - if (height) *height = plane_block_height + 2 * is_chroma_sub8_y; - if (rows_within_bounds) { - *rows_within_bounds = - (block_rows >> pd->subsampling_y) + 2 * is_chroma_sub8_y; - } - if (cols_within_bounds) { - *cols_within_bounds = - (block_cols >> pd->subsampling_x) + 2 * is_chroma_sub8_x; - } -} - -/* clang-format off */ -typedef aom_cdf_prob (*MapCdf)[PALETTE_COLOR_INDEX_CONTEXTS] - [CDF_SIZE(PALETTE_COLORS)]; -typedef const int (*ColorCost)[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS] - [PALETTE_COLORS]; -/* clang-format on */ - -typedef struct { - int rows; - int cols; - int n_colors; - int plane_width; - int plane_height; - uint8_t *color_map; - MapCdf map_cdf; - ColorCost color_cost; -} Av1ColorMapParam; - -static INLINE int is_nontrans_global_motion(const MACROBLOCKD *xd, - const MB_MODE_INFO *mbmi) { - int ref; - - // First check if all modes are GLOBALMV - if (mbmi->mode != GLOBALMV && mbmi->mode != GLOBAL_GLOBALMV) return 0; - - if (AOMMIN(mi_size_wide[mbmi->sb_type], mi_size_high[mbmi->sb_type]) < 2) - return 0; - - // Now check if all global motion is non translational - for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) { - if (xd->global_motion[mbmi->ref_frame[ref]].wmtype == TRANSLATION) return 0; - } - return 1; -} - -static INLINE PLANE_TYPE get_plane_type(int plane) { - return (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; -} - -static INLINE int av1_get_max_eob(TX_SIZE tx_size) { - if (tx_size == TX_64X64 || tx_size == TX_64X32 || tx_size == TX_32X64) { - return 1024; - } - if (tx_size == TX_16X64 || tx_size == TX_64X16) { - return 512; - } - return tx_size_2d[tx_size]; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_BLOCKD_H_ diff --git a/third_party/aom/av1/common/cdef.c b/third_party/aom/av1/common/cdef.c deleted file mode 100644 index e9e2b0e42..000000000 --- a/third_party/aom/av1/common/cdef.c +++ /dev/null @@ -1,403 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <assert.h> -#include <math.h> -#include <string.h> - -#include "config/aom_scale_rtcd.h" - -#include "aom/aom_integer.h" -#include "av1/common/cdef.h" -#include "av1/common/cdef_block.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/reconinter.h" - -int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col) { - int maxc, maxr; - int skip = 1; - maxc = cm->mi_cols - mi_col; - maxr = cm->mi_rows - mi_row; - - maxr = AOMMIN(maxr, MI_SIZE_64X64); - maxc = AOMMIN(maxc, MI_SIZE_64X64); - - for (int r = 0; r < maxr; r++) { - for (int c = 0; c < maxc; c++) { - skip = - skip && - cm->mi_grid_visible[(mi_row + r) * cm->mi_stride + mi_col + c]->skip; - } - } - return skip; -} - -static int is_8x8_block_skip(MB_MODE_INFO **grid, int mi_row, int mi_col, - int mi_stride) { - int is_skip = 1; - for (int r = 0; r < mi_size_high[BLOCK_8X8]; ++r) - for (int c = 0; c < mi_size_wide[BLOCK_8X8]; ++c) - is_skip &= grid[(mi_row + r) * mi_stride + (mi_col + c)]->skip; - - return is_skip; -} - -int sb_compute_cdef_list(const AV1_COMMON *const cm, int mi_row, int mi_col, - cdef_list *dlist, BLOCK_SIZE bs) { - MB_MODE_INFO **grid = cm->mi_grid_visible; - int maxc = cm->mi_cols - mi_col; - int maxr = cm->mi_rows - mi_row; - - if (bs == BLOCK_128X128 || bs == BLOCK_128X64) - maxc = AOMMIN(maxc, MI_SIZE_128X128); - else - maxc = AOMMIN(maxc, MI_SIZE_64X64); - if (bs == BLOCK_128X128 || bs == BLOCK_64X128) - maxr = AOMMIN(maxr, MI_SIZE_128X128); - else - maxr = AOMMIN(maxr, MI_SIZE_64X64); - - const int r_step = mi_size_high[BLOCK_8X8]; - const int c_step = mi_size_wide[BLOCK_8X8]; - const int r_shift = (r_step == 2); - const int c_shift = (c_step == 2); - - assert(r_step == 1 || r_step == 2); - assert(c_step == 1 || c_step == 2); - - int count = 0; - - for (int r = 0; r < maxr; r += r_step) { - for (int c = 0; c < maxc; c += c_step) { - if (!is_8x8_block_skip(grid, mi_row + r, mi_col + c, cm->mi_stride)) { - dlist[count].by = r >> r_shift; - dlist[count].bx = c >> c_shift; - dlist[count].skip = 0; - count++; - } - } - } - return count; -} - -void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src, - int sstride, int v, int h) { - for (int i = 0; i < v; i++) { - for (int j = 0; j < h; j++) { - dst[i * dstride + j] = src[i * sstride + j]; - } - } -} - -void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride, - const uint16_t *src, int sstride, int v, - int h) { - for (int i = 0; i < v; i++) { - for (int j = 0; j < h; j++) { - dst[i * dstride + j] = src[i * sstride + j]; - } - } -} - -static void copy_sb8_16(AOM_UNUSED AV1_COMMON *cm, uint16_t *dst, int dstride, - const uint8_t *src, int src_voffset, int src_hoffset, - int sstride, int vsize, int hsize) { - if (cm->seq_params.use_highbitdepth) { - const uint16_t *base = - &CONVERT_TO_SHORTPTR(src)[src_voffset * sstride + src_hoffset]; - copy_rect8_16bit_to_16bit(dst, dstride, base, sstride, vsize, hsize); - } else { - const uint8_t *base = &src[src_voffset * sstride + src_hoffset]; - copy_rect8_8bit_to_16bit(dst, dstride, base, sstride, vsize, hsize); - } -} - -static INLINE void fill_rect(uint16_t *dst, int dstride, int v, int h, - uint16_t x) { - for (int i = 0; i < v; i++) { - for (int j = 0; j < h; j++) { - dst[i * dstride + j] = x; - } - } -} - -static INLINE void copy_rect(uint16_t *dst, int dstride, const uint16_t *src, - int sstride, int v, int h) { - for (int i = 0; i < v; i++) { - for (int j = 0; j < h; j++) { - dst[i * dstride + j] = src[i * sstride + j]; - } - } -} - -void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, - MACROBLOCKD *xd) { - const int num_planes = av1_num_planes(cm); - DECLARE_ALIGNED(16, uint16_t, src[CDEF_INBUF_SIZE]); - uint16_t *linebuf[3]; - uint16_t *colbuf[3]; - cdef_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64]; - unsigned char *row_cdef, *prev_row_cdef, *curr_row_cdef; - int cdef_count; - int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } }; - int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } }; - int mi_wide_l2[3]; - int mi_high_l2[3]; - int xdec[3]; - int ydec[3]; - int coeff_shift = AOMMAX(cm->seq_params.bit_depth - 8, 0); - const int nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; - const int nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; - av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0, - num_planes); - row_cdef = aom_malloc(sizeof(*row_cdef) * (nhfb + 2) * 2); - memset(row_cdef, 1, sizeof(*row_cdef) * (nhfb + 2) * 2); - prev_row_cdef = row_cdef + 1; - curr_row_cdef = prev_row_cdef + nhfb + 2; - for (int pli = 0; pli < num_planes; pli++) { - xdec[pli] = xd->plane[pli].subsampling_x; - ydec[pli] = xd->plane[pli].subsampling_y; - mi_wide_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_x; - mi_high_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_y; - } - const int stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER; - for (int pli = 0; pli < num_planes; pli++) { - linebuf[pli] = aom_malloc(sizeof(*linebuf) * CDEF_VBORDER * stride); - colbuf[pli] = - aom_malloc(sizeof(*colbuf) * - ((CDEF_BLOCKSIZE << mi_high_l2[pli]) + 2 * CDEF_VBORDER) * - CDEF_HBORDER); - } - for (int fbr = 0; fbr < nvfb; fbr++) { - for (int pli = 0; pli < num_planes; pli++) { - const int block_height = - (MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * CDEF_VBORDER; - fill_rect(colbuf[pli], CDEF_HBORDER, block_height, CDEF_HBORDER, - CDEF_VERY_LARGE); - } - int cdef_left = 1; - for (int fbc = 0; fbc < nhfb; fbc++) { - int level, sec_strength; - int uv_level, uv_sec_strength; - int nhb, nvb; - int cstart = 0; - curr_row_cdef[fbc] = 0; - if (cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride + - MI_SIZE_64X64 * fbc] == NULL || - cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride + - MI_SIZE_64X64 * fbc] - ->cdef_strength == -1) { - cdef_left = 0; - continue; - } - if (!cdef_left) cstart = -CDEF_HBORDER; - nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc); - nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr); - int frame_top, frame_left, frame_bottom, frame_right; - - int mi_row = MI_SIZE_64X64 * fbr; - int mi_col = MI_SIZE_64X64 * fbc; - // for the current filter block, it's top left corner mi structure (mi_tl) - // is first accessed to check whether the top and left boundaries are - // frame boundaries. Then bottom-left and top-right mi structures are - // accessed to check whether the bottom and right boundaries - // (respectively) are frame boundaries. - // - // Note that we can't just check the bottom-right mi structure - eg. if - // we're at the right-hand edge of the frame but not the bottom, then - // the bottom-right mi is NULL but the bottom-left is not. - frame_top = (mi_row == 0) ? 1 : 0; - frame_left = (mi_col == 0) ? 1 : 0; - - if (fbr != nvfb - 1) - frame_bottom = (mi_row + MI_SIZE_64X64 == cm->mi_rows) ? 1 : 0; - else - frame_bottom = 1; - - if (fbc != nhfb - 1) - frame_right = (mi_col + MI_SIZE_64X64 == cm->mi_cols) ? 1 : 0; - else - frame_right = 1; - - const int mbmi_cdef_strength = - cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride + - MI_SIZE_64X64 * fbc] - ->cdef_strength; - level = cm->cdef_strengths[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS; - sec_strength = - cm->cdef_strengths[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS; - sec_strength += sec_strength == 3; - uv_level = cm->cdef_uv_strengths[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS; - uv_sec_strength = - cm->cdef_uv_strengths[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS; - uv_sec_strength += uv_sec_strength == 3; - if ((level == 0 && sec_strength == 0 && uv_level == 0 && - uv_sec_strength == 0) || - (cdef_count = sb_compute_cdef_list(cm, fbr * MI_SIZE_64X64, - fbc * MI_SIZE_64X64, dlist, - BLOCK_64X64)) == 0) { - cdef_left = 0; - continue; - } - - curr_row_cdef[fbc] = 1; - for (int pli = 0; pli < num_planes; pli++) { - int coffset; - int rend, cend; - int pri_damping = cm->cdef_pri_damping; - int sec_damping = cm->cdef_sec_damping; - int hsize = nhb << mi_wide_l2[pli]; - int vsize = nvb << mi_high_l2[pli]; - - if (pli) { - level = uv_level; - sec_strength = uv_sec_strength; - } - - if (fbc == nhfb - 1) - cend = hsize; - else - cend = hsize + CDEF_HBORDER; - - if (fbr == nvfb - 1) - rend = vsize; - else - rend = vsize + CDEF_VBORDER; - - coffset = fbc * MI_SIZE_64X64 << mi_wide_l2[pli]; - if (fbc == nhfb - 1) { - /* On the last superblock column, fill in the right border with - CDEF_VERY_LARGE to avoid filtering with the outside. */ - fill_rect(&src[cend + CDEF_HBORDER], CDEF_BSTRIDE, - rend + CDEF_VBORDER, hsize + CDEF_HBORDER - cend, - CDEF_VERY_LARGE); - } - if (fbr == nvfb - 1) { - /* On the last superblock row, fill in the bottom border with - CDEF_VERY_LARGE to avoid filtering with the outside. */ - fill_rect(&src[(rend + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE, - CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE); - } - /* Copy in the pixels we need from the current superblock for - deringing.*/ - copy_sb8_16(cm, - &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart], - CDEF_BSTRIDE, xd->plane[pli].dst.buf, - (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr, coffset + cstart, - xd->plane[pli].dst.stride, rend, cend - cstart); - if (!prev_row_cdef[fbc]) { - copy_sb8_16(cm, &src[CDEF_HBORDER], CDEF_BSTRIDE, - xd->plane[pli].dst.buf, - (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER, - coffset, xd->plane[pli].dst.stride, CDEF_VBORDER, hsize); - } else if (fbr > 0) { - copy_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, &linebuf[pli][coffset], - stride, CDEF_VBORDER, hsize); - } else { - fill_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hsize, - CDEF_VERY_LARGE); - } - if (!prev_row_cdef[fbc - 1]) { - copy_sb8_16(cm, src, CDEF_BSTRIDE, xd->plane[pli].dst.buf, - (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER, - coffset - CDEF_HBORDER, xd->plane[pli].dst.stride, - CDEF_VBORDER, CDEF_HBORDER); - } else if (fbr > 0 && fbc > 0) { - copy_rect(src, CDEF_BSTRIDE, &linebuf[pli][coffset - CDEF_HBORDER], - stride, CDEF_VBORDER, CDEF_HBORDER); - } else { - fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, - CDEF_VERY_LARGE); - } - if (!prev_row_cdef[fbc + 1]) { - copy_sb8_16(cm, &src[CDEF_HBORDER + (nhb << mi_wide_l2[pli])], - CDEF_BSTRIDE, xd->plane[pli].dst.buf, - (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER, - coffset + hsize, xd->plane[pli].dst.stride, CDEF_VBORDER, - CDEF_HBORDER); - } else if (fbr > 0 && fbc < nhfb - 1) { - copy_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE, - &linebuf[pli][coffset + hsize], stride, CDEF_VBORDER, - CDEF_HBORDER); - } else { - fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, - CDEF_HBORDER, CDEF_VERY_LARGE); - } - if (cdef_left) { - /* If we deringed the superblock on the left then we need to copy in - saved pixels. */ - copy_rect(src, CDEF_BSTRIDE, colbuf[pli], CDEF_HBORDER, - rend + CDEF_VBORDER, CDEF_HBORDER); - } - /* Saving pixels in case we need to dering the superblock on the - right. */ - copy_rect(colbuf[pli], CDEF_HBORDER, src + hsize, CDEF_BSTRIDE, - rend + CDEF_VBORDER, CDEF_HBORDER); - copy_sb8_16( - cm, &linebuf[pli][coffset], stride, xd->plane[pli].dst.buf, - (MI_SIZE_64X64 << mi_high_l2[pli]) * (fbr + 1) - CDEF_VBORDER, - coffset, xd->plane[pli].dst.stride, CDEF_VBORDER, hsize); - - if (frame_top) { - fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, - CDEF_VERY_LARGE); - } - if (frame_left) { - fill_rect(src, CDEF_BSTRIDE, vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, - CDEF_VERY_LARGE); - } - if (frame_bottom) { - fill_rect(&src[(vsize + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE, - CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE); - } - if (frame_right) { - fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE, - vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE); - } - - if (cm->seq_params.use_highbitdepth) { - cdef_filter_fb( - NULL, - &CONVERT_TO_SHORTPTR( - xd->plane[pli] - .dst.buf)[xd->plane[pli].dst.stride * - (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) + - (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])], - xd->plane[pli].dst.stride, - &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli], - ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level, - sec_strength, pri_damping, sec_damping, coeff_shift); - } else { - cdef_filter_fb( - &xd->plane[pli] - .dst.buf[xd->plane[pli].dst.stride * - (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) + - (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])], - NULL, xd->plane[pli].dst.stride, - &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli], - ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level, - sec_strength, pri_damping, sec_damping, coeff_shift); - } - } - cdef_left = 1; - } - { - unsigned char *tmp = prev_row_cdef; - prev_row_cdef = curr_row_cdef; - curr_row_cdef = tmp; - } - } - aom_free(row_cdef); - for (int pli = 0; pli < num_planes; pli++) { - aom_free(linebuf[pli]); - aom_free(colbuf[pli]); - } -} diff --git a/third_party/aom/av1/common/cdef.h b/third_party/aom/av1/common/cdef.h deleted file mode 100644 index 3b2eac8a5..000000000 --- a/third_party/aom/av1/common/cdef.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#ifndef AOM_AV1_COMMON_CDEF_H_ -#define AOM_AV1_COMMON_CDEF_H_ - -#define CDEF_STRENGTH_BITS 6 - -#define CDEF_PRI_STRENGTHS 16 -#define CDEF_SEC_STRENGTHS 4 - -#include "config/aom_config.h" - -#include "aom/aom_integer.h" -#include "aom_ports/mem.h" -#include "av1/common/cdef_block.h" -#include "av1/common/onyxc_int.h" - -static INLINE int sign(int i) { return i < 0 ? -1 : 1; } - -static INLINE int constrain(int diff, int threshold, int damping) { - if (!threshold) return 0; - - const int shift = AOMMAX(0, damping - get_msb(threshold)); - return sign(diff) * - AOMMIN(abs(diff), AOMMAX(0, threshold - (abs(diff) >> shift))); -} - -#ifdef __cplusplus -extern "C" { -#endif - -int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col); -int sb_compute_cdef_list(const AV1_COMMON *const cm, int mi_row, int mi_col, - cdef_list *dlist, BLOCK_SIZE bsize); -void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd); - -void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, - AV1_COMMON *cm, MACROBLOCKD *xd, int fast); - -#ifdef __cplusplus -} // extern "C" -#endif -#endif // AOM_AV1_COMMON_CDEF_H_ diff --git a/third_party/aom/av1/common/cdef_block.c b/third_party/aom/av1/common/cdef_block.c deleted file mode 100644 index df1de89be..000000000 --- a/third_party/aom/av1/common/cdef_block.c +++ /dev/null @@ -1,257 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <math.h> -#include <stdlib.h> - -#include "config/aom_dsp_rtcd.h" -#include "config/av1_rtcd.h" - -#include "av1/common/cdef.h" - -/* Generated from gen_filter_tables.c. */ -DECLARE_ALIGNED(16, const int, cdef_directions[8][2]) = { - { -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2 }, - { 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2 }, - { 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2 }, - { 0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2 }, - { 1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2 }, - { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1 }, - { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0 }, - { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1 } -}; - -/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on. - The search minimizes the weighted variance along all the lines in a - particular direction, i.e. the squared error between the input and a - "predicted" block where each pixel is replaced by the average along a line - in a particular direction. Since each direction have the same sum(x^2) term, - that term is never computed. See Section 2, step 2, of: - http://jmvalin.ca/notes/intra_paint.pdf */ -int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var, - int coeff_shift) { - int i; - int32_t cost[8] = { 0 }; - int partial[8][15] = { { 0 } }; - int32_t best_cost = 0; - int best_dir = 0; - /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n. - The output is then 840 times larger, but we don't care for finding - the max. */ - static const int div_table[] = { 0, 840, 420, 280, 210, 168, 140, 120, 105 }; - for (i = 0; i < 8; i++) { - int j; - for (j = 0; j < 8; j++) { - int x; - /* We subtract 128 here to reduce the maximum range of the squared - partial sums. */ - x = (img[i * stride + j] >> coeff_shift) - 128; - partial[0][i + j] += x; - partial[1][i + j / 2] += x; - partial[2][i] += x; - partial[3][3 + i - j / 2] += x; - partial[4][7 + i - j] += x; - partial[5][3 - i / 2 + j] += x; - partial[6][j] += x; - partial[7][i / 2 + j] += x; - } - } - for (i = 0; i < 8; i++) { - cost[2] += partial[2][i] * partial[2][i]; - cost[6] += partial[6][i] * partial[6][i]; - } - cost[2] *= div_table[8]; - cost[6] *= div_table[8]; - for (i = 0; i < 7; i++) { - cost[0] += (partial[0][i] * partial[0][i] + - partial[0][14 - i] * partial[0][14 - i]) * - div_table[i + 1]; - cost[4] += (partial[4][i] * partial[4][i] + - partial[4][14 - i] * partial[4][14 - i]) * - div_table[i + 1]; - } - cost[0] += partial[0][7] * partial[0][7] * div_table[8]; - cost[4] += partial[4][7] * partial[4][7] * div_table[8]; - for (i = 1; i < 8; i += 2) { - int j; - for (j = 0; j < 4 + 1; j++) { - cost[i] += partial[i][3 + j] * partial[i][3 + j]; - } - cost[i] *= div_table[8]; - for (j = 0; j < 4 - 1; j++) { - cost[i] += (partial[i][j] * partial[i][j] + - partial[i][10 - j] * partial[i][10 - j]) * - div_table[2 * j + 2]; - } - } - for (i = 0; i < 8; i++) { - if (cost[i] > best_cost) { - best_cost = cost[i]; - best_dir = i; - } - } - /* Difference between the optimal variance and the variance along the - orthogonal direction. Again, the sum(x^2) terms cancel out. */ - *var = best_cost - cost[(best_dir + 4) & 7]; - /* We'd normally divide by 840, but dividing by 1024 is close enough - for what we're going to do with this. */ - *var >>= 10; - return best_dir; -} - -const int cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } }; -const int cdef_sec_taps[2][2] = { { 2, 1 }, { 2, 1 } }; - -/* Smooth in the direction detected. */ -void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride, - const uint16_t *in, int pri_strength, int sec_strength, - int dir, int pri_damping, int sec_damping, int bsize, - AOM_UNUSED int max_unused, int coeff_shift) { - int i, j, k; - const int s = CDEF_BSTRIDE; - const int *pri_taps = cdef_pri_taps[(pri_strength >> coeff_shift) & 1]; - const int *sec_taps = cdef_sec_taps[(pri_strength >> coeff_shift) & 1]; - for (i = 0; i < 4 << (bsize == BLOCK_8X8 || bsize == BLOCK_4X8); i++) { - for (j = 0; j < 4 << (bsize == BLOCK_8X8 || bsize == BLOCK_8X4); j++) { - int16_t sum = 0; - int16_t y; - int16_t x = in[i * s + j]; - int max = x; - int min = x; - for (k = 0; k < 2; k++) { - int16_t p0 = in[i * s + j + cdef_directions[dir][k]]; - int16_t p1 = in[i * s + j - cdef_directions[dir][k]]; - sum += pri_taps[k] * constrain(p0 - x, pri_strength, pri_damping); - sum += pri_taps[k] * constrain(p1 - x, pri_strength, pri_damping); - if (p0 != CDEF_VERY_LARGE) max = AOMMAX(p0, max); - if (p1 != CDEF_VERY_LARGE) max = AOMMAX(p1, max); - min = AOMMIN(p0, min); - min = AOMMIN(p1, min); - int16_t s0 = in[i * s + j + cdef_directions[(dir + 2) & 7][k]]; - int16_t s1 = in[i * s + j - cdef_directions[(dir + 2) & 7][k]]; - int16_t s2 = in[i * s + j + cdef_directions[(dir + 6) & 7][k]]; - int16_t s3 = in[i * s + j - cdef_directions[(dir + 6) & 7][k]]; - if (s0 != CDEF_VERY_LARGE) max = AOMMAX(s0, max); - if (s1 != CDEF_VERY_LARGE) max = AOMMAX(s1, max); - if (s2 != CDEF_VERY_LARGE) max = AOMMAX(s2, max); - if (s3 != CDEF_VERY_LARGE) max = AOMMAX(s3, max); - min = AOMMIN(s0, min); - min = AOMMIN(s1, min); - min = AOMMIN(s2, min); - min = AOMMIN(s3, min); - sum += sec_taps[k] * constrain(s0 - x, sec_strength, sec_damping); - sum += sec_taps[k] * constrain(s1 - x, sec_strength, sec_damping); - sum += sec_taps[k] * constrain(s2 - x, sec_strength, sec_damping); - sum += sec_taps[k] * constrain(s3 - x, sec_strength, sec_damping); - } - y = clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), min, max); - if (dst8) - dst8[i * dstride + j] = (uint8_t)y; - else - dst16[i * dstride + j] = (uint16_t)y; - } - } -} - -/* Compute the primary filter strength for an 8x8 block based on the - directional variance difference. A high variance difference means - that we have a highly directional pattern (e.g. a high contrast - edge), so we can apply more deringing. A low variance means that we - either have a low contrast edge, or a non-directional texture, so - we want to be careful not to blur. */ -static INLINE int adjust_strength(int strength, int32_t var) { - const int i = var >> 6 ? AOMMIN(get_msb(var >> 6), 12) : 0; - /* We use the variance of 8x8 blocks to adjust the strength. */ - return var ? (strength * (4 + i) + 8) >> 4 : 0; -} - -void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in, - int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS], - int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli, - cdef_list *dlist, int cdef_count, int level, - int sec_strength, int pri_damping, int sec_damping, - int coeff_shift) { - int bi; - int bx; - int by; - int bsize, bsizex, bsizey; - - int pri_strength = level << coeff_shift; - sec_strength <<= coeff_shift; - sec_damping += coeff_shift - (pli != AOM_PLANE_Y); - pri_damping += coeff_shift - (pli != AOM_PLANE_Y); - bsize = - ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8); - bsizex = 3 - xdec; - bsizey = 3 - ydec; - if (dirinit && pri_strength == 0 && sec_strength == 0) { - // If we're here, both primary and secondary strengths are 0, and - // we still haven't written anything to y[] yet, so we just copy - // the input to y[]. This is necessary only for av1_cdef_search() - // and only av1_cdef_search() sets dirinit. - for (bi = 0; bi < cdef_count; bi++) { - by = dlist[bi].by; - bx = dlist[bi].bx; - int iy, ix; - // TODO(stemidts/jmvalin): SIMD optimisations - for (iy = 0; iy < 1 << bsizey; iy++) - for (ix = 0; ix < 1 << bsizex; ix++) - dst16[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] = - in[((by << bsizey) + iy) * CDEF_BSTRIDE + (bx << bsizex) + ix]; - } - return; - } - - if (pli == 0) { - if (!dirinit || !*dirinit) { - for (bi = 0; bi < cdef_count; bi++) { - by = dlist[bi].by; - bx = dlist[bi].bx; - dir[by][bx] = cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx], - CDEF_BSTRIDE, &var[by][bx], coeff_shift); - } - if (dirinit) *dirinit = 1; - } - } - if (pli == 1 && xdec != ydec) { - for (bi = 0; bi < cdef_count; bi++) { - static const int conv422[8] = { 7, 0, 2, 4, 5, 6, 6, 6 }; - static const int conv440[8] = { 1, 2, 2, 2, 3, 4, 6, 0 }; - by = dlist[bi].by; - bx = dlist[bi].bx; - dir[by][bx] = (xdec ? conv422 : conv440)[dir[by][bx]]; - } - } - - for (bi = 0; bi < cdef_count; bi++) { - int t = dlist[bi].skip ? 0 : pri_strength; - int s = dlist[bi].skip ? 0 : sec_strength; - by = dlist[bi].by; - bx = dlist[bi].bx; - if (dst8) - cdef_filter_block(&dst8[(by << bsizey) * dstride + (bx << bsizex)], NULL, - dstride, - &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)], - (pli ? t : adjust_strength(t, var[by][bx])), s, - t ? dir[by][bx] : 0, pri_damping, sec_damping, bsize, - (256 << coeff_shift) - 1, coeff_shift); - else - cdef_filter_block( - NULL, - &dst16[dirinit ? bi << (bsizex + bsizey) - : (by << bsizey) * dstride + (bx << bsizex)], - dirinit ? 1 << bsizex : dstride, - &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)], - (pli ? t : adjust_strength(t, var[by][bx])), s, t ? dir[by][bx] : 0, - pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1, - coeff_shift); - } -} diff --git a/third_party/aom/av1/common/cdef_block.h b/third_party/aom/av1/common/cdef_block.h deleted file mode 100644 index 6b4452cd6..000000000 --- a/third_party/aom/av1/common/cdef_block.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_CDEF_BLOCK_H_ -#define AOM_AV1_COMMON_CDEF_BLOCK_H_ - -#include "av1/common/odintrin.h" - -#define CDEF_BLOCKSIZE 64 -#define CDEF_BLOCKSIZE_LOG2 6 -#define CDEF_NBLOCKS ((1 << MAX_SB_SIZE_LOG2) / 8) -#define CDEF_SB_SHIFT (MAX_SB_SIZE_LOG2 - CDEF_BLOCKSIZE_LOG2) - -/* We need to buffer three vertical lines. */ -#define CDEF_VBORDER (3) -/* We only need to buffer three horizontal pixels too, but let's align to - 16 bytes (8 x 16 bits) to make vectorization easier. */ -#define CDEF_HBORDER (8) -#define CDEF_BSTRIDE \ - ALIGN_POWER_OF_TWO((1 << MAX_SB_SIZE_LOG2) + 2 * CDEF_HBORDER, 3) - -#define CDEF_VERY_LARGE (30000) -#define CDEF_INBUF_SIZE \ - (CDEF_BSTRIDE * ((1 << MAX_SB_SIZE_LOG2) + 2 * CDEF_VBORDER)) - -extern const int cdef_pri_taps[2][2]; -extern const int cdef_sec_taps[2][2]; -DECLARE_ALIGNED(16, extern const int, cdef_directions[8][2]); - -typedef struct { - uint8_t by; - uint8_t bx; - uint8_t skip; -} cdef_list; - -typedef void (*cdef_filter_block_func)(uint8_t *dst8, uint16_t *dst16, - int dstride, const uint16_t *in, - int pri_strength, int sec_strength, - int dir, int pri_damping, - int sec_damping, int bsize, int max, - int coeff_shift); -void copy_cdef_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src, - cdef_list *dlist, int cdef_count, int bsize); - -void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in, - int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS], - int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli, - cdef_list *dlist, int cdef_count, int level, - int sec_strength, int pri_damping, int sec_damping, - int coeff_shift); -#endif // AOM_AV1_COMMON_CDEF_BLOCK_H_ diff --git a/third_party/aom/av1/common/cdef_block_avx2.c b/third_party/aom/av1/common/cdef_block_avx2.c deleted file mode 100644 index e2b85b3e2..000000000 --- a/third_party/aom/av1/common/cdef_block_avx2.c +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "aom_dsp/aom_simd.h" -#define SIMD_FUNC(name) name##_avx2 -#include "av1/common/cdef_block_simd.h" diff --git a/third_party/aom/av1/common/cdef_block_neon.c b/third_party/aom/av1/common/cdef_block_neon.c deleted file mode 100644 index 2d6bc65e3..000000000 --- a/third_party/aom/av1/common/cdef_block_neon.c +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "aom_dsp/aom_simd.h" -#define SIMD_FUNC(name) name##_neon -#include "av1/common/cdef_block_simd.h" diff --git a/third_party/aom/av1/common/cdef_block_simd.h b/third_party/aom/av1/common/cdef_block_simd.h deleted file mode 100644 index 14587a023..000000000 --- a/third_party/aom/av1/common/cdef_block_simd.h +++ /dev/null @@ -1,920 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_CDEF_BLOCK_SIMD_H_ -#define AOM_AV1_COMMON_CDEF_BLOCK_SIMD_H_ - -#include "config/av1_rtcd.h" - -#include "av1/common/cdef_block.h" - -/* partial A is a 16-bit vector of the form: - [x8 x7 x6 x5 x4 x3 x2 x1] and partial B has the form: - [0 y1 y2 y3 y4 y5 y6 y7]. - This function computes (x1^2+y1^2)*C1 + (x2^2+y2^2)*C2 + ... - (x7^2+y2^7)*C7 + (x8^2+0^2)*C8 where the C1..C8 constants are in const1 - and const2. */ -static INLINE v128 fold_mul_and_sum(v128 partiala, v128 partialb, v128 const1, - v128 const2) { - v128 tmp; - /* Reverse partial B. */ - partialb = v128_shuffle_8( - partialb, v128_from_32(0x0f0e0100, 0x03020504, 0x07060908, 0x0b0a0d0c)); - /* Interleave the x and y values of identical indices and pair x8 with 0. */ - tmp = partiala; - partiala = v128_ziplo_16(partialb, partiala); - partialb = v128_ziphi_16(partialb, tmp); - /* Square and add the corresponding x and y values. */ - partiala = v128_madd_s16(partiala, partiala); - partialb = v128_madd_s16(partialb, partialb); - /* Multiply by constant. */ - partiala = v128_mullo_s32(partiala, const1); - partialb = v128_mullo_s32(partialb, const2); - /* Sum all results. */ - partiala = v128_add_32(partiala, partialb); - return partiala; -} - -static INLINE v128 hsum4(v128 x0, v128 x1, v128 x2, v128 x3) { - v128 t0, t1, t2, t3; - t0 = v128_ziplo_32(x1, x0); - t1 = v128_ziplo_32(x3, x2); - t2 = v128_ziphi_32(x1, x0); - t3 = v128_ziphi_32(x3, x2); - x0 = v128_ziplo_64(t1, t0); - x1 = v128_ziphi_64(t1, t0); - x2 = v128_ziplo_64(t3, t2); - x3 = v128_ziphi_64(t3, t2); - return v128_add_32(v128_add_32(x0, x1), v128_add_32(x2, x3)); -} - -/* Computes cost for directions 0, 5, 6 and 7. We can call this function again - to compute the remaining directions. */ -static INLINE v128 compute_directions(v128 lines[8], int32_t tmp_cost1[4]) { - v128 partial4a, partial4b, partial5a, partial5b, partial7a, partial7b; - v128 partial6; - v128 tmp; - /* Partial sums for lines 0 and 1. */ - partial4a = v128_shl_n_byte(lines[0], 14); - partial4b = v128_shr_n_byte(lines[0], 2); - partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[1], 12)); - partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[1], 4)); - tmp = v128_add_16(lines[0], lines[1]); - partial5a = v128_shl_n_byte(tmp, 10); - partial5b = v128_shr_n_byte(tmp, 6); - partial7a = v128_shl_n_byte(tmp, 4); - partial7b = v128_shr_n_byte(tmp, 12); - partial6 = tmp; - - /* Partial sums for lines 2 and 3. */ - partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[2], 10)); - partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[2], 6)); - partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[3], 8)); - partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[3], 8)); - tmp = v128_add_16(lines[2], lines[3]); - partial5a = v128_add_16(partial5a, v128_shl_n_byte(tmp, 8)); - partial5b = v128_add_16(partial5b, v128_shr_n_byte(tmp, 8)); - partial7a = v128_add_16(partial7a, v128_shl_n_byte(tmp, 6)); - partial7b = v128_add_16(partial7b, v128_shr_n_byte(tmp, 10)); - partial6 = v128_add_16(partial6, tmp); - - /* Partial sums for lines 4 and 5. */ - partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[4], 6)); - partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[4], 10)); - partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[5], 4)); - partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[5], 12)); - tmp = v128_add_16(lines[4], lines[5]); - partial5a = v128_add_16(partial5a, v128_shl_n_byte(tmp, 6)); - partial5b = v128_add_16(partial5b, v128_shr_n_byte(tmp, 10)); - partial7a = v128_add_16(partial7a, v128_shl_n_byte(tmp, 8)); - partial7b = v128_add_16(partial7b, v128_shr_n_byte(tmp, 8)); - partial6 = v128_add_16(partial6, tmp); - - /* Partial sums for lines 6 and 7. */ - partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[6], 2)); - partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[6], 14)); - partial4a = v128_add_16(partial4a, lines[7]); - tmp = v128_add_16(lines[6], lines[7]); - partial5a = v128_add_16(partial5a, v128_shl_n_byte(tmp, 4)); - partial5b = v128_add_16(partial5b, v128_shr_n_byte(tmp, 12)); - partial7a = v128_add_16(partial7a, v128_shl_n_byte(tmp, 10)); - partial7b = v128_add_16(partial7b, v128_shr_n_byte(tmp, 6)); - partial6 = v128_add_16(partial6, tmp); - - /* Compute costs in terms of partial sums. */ - partial4a = - fold_mul_and_sum(partial4a, partial4b, v128_from_32(210, 280, 420, 840), - v128_from_32(105, 120, 140, 168)); - partial7a = - fold_mul_and_sum(partial7a, partial7b, v128_from_32(210, 420, 0, 0), - v128_from_32(105, 105, 105, 140)); - partial5a = - fold_mul_and_sum(partial5a, partial5b, v128_from_32(210, 420, 0, 0), - v128_from_32(105, 105, 105, 140)); - partial6 = v128_madd_s16(partial6, partial6); - partial6 = v128_mullo_s32(partial6, v128_dup_32(105)); - - partial4a = hsum4(partial4a, partial5a, partial6, partial7a); - v128_store_unaligned(tmp_cost1, partial4a); - return partial4a; -} - -/* transpose and reverse the order of the lines -- equivalent to a 90-degree - counter-clockwise rotation of the pixels. */ -static INLINE void array_reverse_transpose_8x8(v128 *in, v128 *res) { - const v128 tr0_0 = v128_ziplo_16(in[1], in[0]); - const v128 tr0_1 = v128_ziplo_16(in[3], in[2]); - const v128 tr0_2 = v128_ziphi_16(in[1], in[0]); - const v128 tr0_3 = v128_ziphi_16(in[3], in[2]); - const v128 tr0_4 = v128_ziplo_16(in[5], in[4]); - const v128 tr0_5 = v128_ziplo_16(in[7], in[6]); - const v128 tr0_6 = v128_ziphi_16(in[5], in[4]); - const v128 tr0_7 = v128_ziphi_16(in[7], in[6]); - - const v128 tr1_0 = v128_ziplo_32(tr0_1, tr0_0); - const v128 tr1_1 = v128_ziplo_32(tr0_5, tr0_4); - const v128 tr1_2 = v128_ziphi_32(tr0_1, tr0_0); - const v128 tr1_3 = v128_ziphi_32(tr0_5, tr0_4); - const v128 tr1_4 = v128_ziplo_32(tr0_3, tr0_2); - const v128 tr1_5 = v128_ziplo_32(tr0_7, tr0_6); - const v128 tr1_6 = v128_ziphi_32(tr0_3, tr0_2); - const v128 tr1_7 = v128_ziphi_32(tr0_7, tr0_6); - - res[7] = v128_ziplo_64(tr1_1, tr1_0); - res[6] = v128_ziphi_64(tr1_1, tr1_0); - res[5] = v128_ziplo_64(tr1_3, tr1_2); - res[4] = v128_ziphi_64(tr1_3, tr1_2); - res[3] = v128_ziplo_64(tr1_5, tr1_4); - res[2] = v128_ziphi_64(tr1_5, tr1_4); - res[1] = v128_ziplo_64(tr1_7, tr1_6); - res[0] = v128_ziphi_64(tr1_7, tr1_6); -} - -int SIMD_FUNC(cdef_find_dir)(const uint16_t *img, int stride, int32_t *var, - int coeff_shift) { - int i; - int32_t cost[8]; - int32_t best_cost = 0; - int best_dir = 0; - v128 lines[8]; - for (i = 0; i < 8; i++) { - lines[i] = v128_load_unaligned(&img[i * stride]); - lines[i] = - v128_sub_16(v128_shr_s16(lines[i], coeff_shift), v128_dup_16(128)); - } - - /* Compute "mostly vertical" directions. */ - v128 dir47 = compute_directions(lines, cost + 4); - - array_reverse_transpose_8x8(lines, lines); - - /* Compute "mostly horizontal" directions. */ - v128 dir03 = compute_directions(lines, cost); - - v128 max = v128_max_s32(dir03, dir47); - max = v128_max_s32(max, v128_align(max, max, 8)); - max = v128_max_s32(max, v128_align(max, max, 4)); - best_cost = v128_low_u32(max); - v128 t = - v128_pack_s32_s16(v128_cmpeq_32(max, dir47), v128_cmpeq_32(max, dir03)); - best_dir = v128_movemask_8(v128_pack_s16_s8(t, t)); - best_dir = get_msb(best_dir ^ (best_dir - 1)); // Count trailing zeros - - /* Difference between the optimal variance and the variance along the - orthogonal direction. Again, the sum(x^2) terms cancel out. */ - *var = best_cost - cost[(best_dir + 4) & 7]; - /* We'd normally divide by 840, but dividing by 1024 is close enough - for what we're going to do with this. */ - *var >>= 10; - return best_dir; -} - -// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp))) -SIMD_INLINE v256 constrain16(v256 a, v256 b, unsigned int threshold, - unsigned int adjdamp) { - v256 diff = v256_sub_16(a, b); - const v256 sign = v256_shr_n_s16(diff, 15); - diff = v256_abs_s16(diff); - const v256 s = - v256_ssub_u16(v256_dup_16(threshold), v256_shr_u16(diff, adjdamp)); - return v256_xor(v256_add_16(sign, v256_min_s16(diff, s)), sign); -} - -// sign(a - b) * min(abs(a - b), max(0, strength - (abs(a - b) >> adjdamp))) -SIMD_INLINE v128 constrain(v256 a, v256 b, unsigned int strength, - unsigned int adjdamp) { - const v256 diff16 = v256_sub_16(a, b); - v128 diff = v128_pack_s16_s8(v256_high_v128(diff16), v256_low_v128(diff16)); - const v128 sign = v128_cmplt_s8(diff, v128_zero()); - diff = v128_abs_s8(diff); - return v128_xor( - v128_add_8(sign, - v128_min_u8(diff, v128_ssub_u8(v128_dup_8(strength), - v128_shr_u8(diff, adjdamp)))), - sign); -} - -void SIMD_FUNC(cdef_filter_block_4x4_8)(uint8_t *dst, int dstride, - const uint16_t *in, int pri_strength, - int sec_strength, int dir, - int pri_damping, int sec_damping, - AOM_UNUSED int max_unused, - int coeff_shift) { - v128 p0, p1, p2, p3; - v256 sum, row, tap, res; - v256 max, min, large = v256_dup_16(CDEF_VERY_LARGE); - int po1 = cdef_directions[dir][0]; - int po2 = cdef_directions[dir][1]; - int s1o1 = cdef_directions[(dir + 2) & 7][0]; - int s1o2 = cdef_directions[(dir + 2) & 7][1]; - int s2o1 = cdef_directions[(dir + 6) & 7][0]; - int s2o2 = cdef_directions[(dir + 6) & 7][1]; - - const int *pri_taps = cdef_pri_taps[(pri_strength >> coeff_shift) & 1]; - const int *sec_taps = cdef_sec_taps[(pri_strength >> coeff_shift) & 1]; - - if (pri_strength) - pri_damping = AOMMAX(0, pri_damping - get_msb(pri_strength)); - if (sec_strength) - sec_damping = AOMMAX(0, sec_damping - get_msb(sec_strength)); - - sum = v256_zero(); - row = v256_from_v64(v64_load_aligned(&in[0 * CDEF_BSTRIDE]), - v64_load_aligned(&in[1 * CDEF_BSTRIDE]), - v64_load_aligned(&in[2 * CDEF_BSTRIDE]), - v64_load_aligned(&in[3 * CDEF_BSTRIDE])); - max = min = row; - - if (pri_strength) { - // Primary near taps - tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + po1]), - v64_load_unaligned(&in[1 * CDEF_BSTRIDE + po1]), - v64_load_unaligned(&in[2 * CDEF_BSTRIDE + po1]), - v64_load_unaligned(&in[3 * CDEF_BSTRIDE + po1])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p0 = constrain(tap, row, pri_strength, pri_damping); - tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - po1]), - v64_load_unaligned(&in[1 * CDEF_BSTRIDE - po1]), - v64_load_unaligned(&in[2 * CDEF_BSTRIDE - po1]), - v64_load_unaligned(&in[3 * CDEF_BSTRIDE - po1])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p1 = constrain(tap, row, pri_strength, pri_damping); - - // sum += pri_taps[0] * (p0 + p1) - sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(pri_taps[0]), - v256_from_v128(v128_ziphi_8(p0, p1), - v128_ziplo_8(p0, p1)))); - - // Primary far taps - tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + po2]), - v64_load_unaligned(&in[1 * CDEF_BSTRIDE + po2]), - v64_load_unaligned(&in[2 * CDEF_BSTRIDE + po2]), - v64_load_unaligned(&in[3 * CDEF_BSTRIDE + po2])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p0 = constrain(tap, row, pri_strength, pri_damping); - tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - po2]), - v64_load_unaligned(&in[1 * CDEF_BSTRIDE - po2]), - v64_load_unaligned(&in[2 * CDEF_BSTRIDE - po2]), - v64_load_unaligned(&in[3 * CDEF_BSTRIDE - po2])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p1 = constrain(tap, row, pri_strength, pri_damping); - - // sum += pri_taps[1] * (p0 + p1) - sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(pri_taps[1]), - v256_from_v128(v128_ziphi_8(p0, p1), - v128_ziplo_8(p0, p1)))); - } - - if (sec_strength) { - // Secondary near taps - tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + s1o1]), - v64_load_unaligned(&in[1 * CDEF_BSTRIDE + s1o1]), - v64_load_unaligned(&in[2 * CDEF_BSTRIDE + s1o1]), - v64_load_unaligned(&in[3 * CDEF_BSTRIDE + s1o1])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p0 = constrain(tap, row, sec_strength, sec_damping); - tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - s1o1]), - v64_load_unaligned(&in[1 * CDEF_BSTRIDE - s1o1]), - v64_load_unaligned(&in[2 * CDEF_BSTRIDE - s1o1]), - v64_load_unaligned(&in[3 * CDEF_BSTRIDE - s1o1])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p1 = constrain(tap, row, sec_strength, sec_damping); - tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + s2o1]), - v64_load_unaligned(&in[1 * CDEF_BSTRIDE + s2o1]), - v64_load_unaligned(&in[2 * CDEF_BSTRIDE + s2o1]), - v64_load_unaligned(&in[3 * CDEF_BSTRIDE + s2o1])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p2 = constrain(tap, row, sec_strength, sec_damping); - tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - s2o1]), - v64_load_unaligned(&in[1 * CDEF_BSTRIDE - s2o1]), - v64_load_unaligned(&in[2 * CDEF_BSTRIDE - s2o1]), - v64_load_unaligned(&in[3 * CDEF_BSTRIDE - s2o1])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p3 = constrain(tap, row, sec_strength, sec_damping); - - // sum += sec_taps[0] * (p0 + p1 + p2 + p3) - p0 = v128_add_8(p0, p1); - p2 = v128_add_8(p2, p3); - sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(sec_taps[0]), - v256_from_v128(v128_ziphi_8(p0, p2), - v128_ziplo_8(p0, p2)))); - - // Secondary far taps - tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + s1o2]), - v64_load_unaligned(&in[1 * CDEF_BSTRIDE + s1o2]), - v64_load_unaligned(&in[2 * CDEF_BSTRIDE + s1o2]), - v64_load_unaligned(&in[3 * CDEF_BSTRIDE + s1o2])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p0 = constrain(tap, row, sec_strength, sec_damping); - tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - s1o2]), - v64_load_unaligned(&in[1 * CDEF_BSTRIDE - s1o2]), - v64_load_unaligned(&in[2 * CDEF_BSTRIDE - s1o2]), - v64_load_unaligned(&in[3 * CDEF_BSTRIDE - s1o2])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p1 = constrain(tap, row, sec_strength, sec_damping); - tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + s2o2]), - v64_load_unaligned(&in[1 * CDEF_BSTRIDE + s2o2]), - v64_load_unaligned(&in[2 * CDEF_BSTRIDE + s2o2]), - v64_load_unaligned(&in[3 * CDEF_BSTRIDE + s2o2])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p2 = constrain(tap, row, sec_strength, sec_damping); - tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - s2o2]), - v64_load_unaligned(&in[1 * CDEF_BSTRIDE - s2o2]), - v64_load_unaligned(&in[2 * CDEF_BSTRIDE - s2o2]), - v64_load_unaligned(&in[3 * CDEF_BSTRIDE - s2o2])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p3 = constrain(tap, row, sec_strength, sec_damping); - - // sum += sec_taps[1] * (p0 + p1 + p2 + p3) - p0 = v128_add_8(p0, p1); - p2 = v128_add_8(p2, p3); - - sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(sec_taps[1]), - v256_from_v128(v128_ziphi_8(p0, p2), - v128_ziplo_8(p0, p2)))); - } - - // res = row + ((sum - (sum < 0) + 8) >> 4) - sum = v256_add_16(sum, v256_cmplt_s16(sum, v256_zero())); - res = v256_add_16(sum, v256_dup_16(8)); - res = v256_shr_n_s16(res, 4); - res = v256_add_16(row, res); - res = v256_min_s16(v256_max_s16(res, min), max); - res = v256_pack_s16_u8(res, res); - - p0 = v256_low_v128(res); - u32_store_aligned(&dst[0 * dstride], v64_high_u32(v128_high_v64(p0))); - u32_store_aligned(&dst[1 * dstride], v64_low_u32(v128_high_v64(p0))); - u32_store_aligned(&dst[2 * dstride], v64_high_u32(v128_low_v64(p0))); - u32_store_aligned(&dst[3 * dstride], v64_low_u32(v128_low_v64(p0))); -} - -void SIMD_FUNC(cdef_filter_block_8x8_8)(uint8_t *dst, int dstride, - const uint16_t *in, int pri_strength, - int sec_strength, int dir, - int pri_damping, int sec_damping, - AOM_UNUSED int max_unused, - int coeff_shift) { - int i; - v128 p0, p1, p2, p3; - v256 sum, row, res, tap; - v256 max, min, large = v256_dup_16(CDEF_VERY_LARGE); - int po1 = cdef_directions[dir][0]; - int po2 = cdef_directions[dir][1]; - int s1o1 = cdef_directions[(dir + 2) & 7][0]; - int s1o2 = cdef_directions[(dir + 2) & 7][1]; - int s2o1 = cdef_directions[(dir + 6) & 7][0]; - int s2o2 = cdef_directions[(dir + 6) & 7][1]; - - const int *pri_taps = cdef_pri_taps[(pri_strength >> coeff_shift) & 1]; - const int *sec_taps = cdef_sec_taps[(pri_strength >> coeff_shift) & 1]; - - if (pri_strength) - pri_damping = AOMMAX(0, pri_damping - get_msb(pri_strength)); - if (sec_strength) - sec_damping = AOMMAX(0, sec_damping - get_msb(sec_strength)); - for (i = 0; i < 8; i += 2) { - sum = v256_zero(); - row = v256_from_v128(v128_load_aligned(&in[i * CDEF_BSTRIDE]), - v128_load_aligned(&in[(i + 1) * CDEF_BSTRIDE])); - - max = min = row; - // Primary near taps - tap = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + po1]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po1])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p0 = constrain(tap, row, pri_strength, pri_damping); - tap = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - po1]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po1])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p1 = constrain(tap, row, pri_strength, pri_damping); - - // sum += pri_taps[0] * (p0 + p1) - sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(pri_taps[0]), - v256_from_v128(v128_ziphi_8(p0, p1), - v128_ziplo_8(p0, p1)))); - - // Primary far taps - tap = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + po2]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po2])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p0 = constrain(tap, row, pri_strength, pri_damping); - tap = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - po2]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po2])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p1 = constrain(tap, row, pri_strength, pri_damping); - - // sum += pri_taps[1] * (p0 + p1) - sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(pri_taps[1]), - v256_from_v128(v128_ziphi_8(p0, p1), - v128_ziplo_8(p0, p1)))); - - // Secondary near taps - tap = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o1]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o1])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p0 = constrain(tap, row, sec_strength, sec_damping); - tap = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o1]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o1])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p1 = constrain(tap, row, sec_strength, sec_damping); - tap = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o1]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o1])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p2 = constrain(tap, row, sec_strength, sec_damping); - tap = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o1]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o1])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p3 = constrain(tap, row, sec_strength, sec_damping); - - // sum += sec_taps[0] * (p0 + p1 + p2 + p3) - p0 = v128_add_8(p0, p1); - p2 = v128_add_8(p2, p3); - sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(sec_taps[0]), - v256_from_v128(v128_ziphi_8(p0, p2), - v128_ziplo_8(p0, p2)))); - - // Secondary far taps - tap = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o2]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o2])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p0 = constrain(tap, row, sec_strength, sec_damping); - tap = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o2]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o2])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p1 = constrain(tap, row, sec_strength, sec_damping); - tap = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o2]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o2])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p2 = constrain(tap, row, sec_strength, sec_damping); - tap = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o2]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o2])); - max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large))); - min = v256_min_s16(min, tap); - p3 = constrain(tap, row, sec_strength, sec_damping); - - // sum += sec_taps[1] * (p0 + p1 + p2 + p3) - p0 = v128_add_8(p0, p1); - p2 = v128_add_8(p2, p3); - sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(sec_taps[1]), - v256_from_v128(v128_ziphi_8(p0, p2), - v128_ziplo_8(p0, p2)))); - - // res = row + ((sum - (sum < 0) + 8) >> 4) - sum = v256_add_16(sum, v256_cmplt_s16(sum, v256_zero())); - res = v256_add_16(sum, v256_dup_16(8)); - res = v256_shr_n_s16(res, 4); - res = v256_add_16(row, res); - res = v256_min_s16(v256_max_s16(res, min), max); - res = v256_pack_s16_u8(res, res); - - p0 = v256_low_v128(res); - v64_store_aligned(&dst[i * dstride], v128_high_v64(p0)); - v64_store_aligned(&dst[(i + 1) * dstride], v128_low_v64(p0)); - } -} - -void SIMD_FUNC(cdef_filter_block_4x4_16)(uint16_t *dst, int dstride, - const uint16_t *in, int pri_strength, - int sec_strength, int dir, - int pri_damping, int sec_damping, - AOM_UNUSED int max_unused, - int coeff_shift) { - int i; - v256 p0, p1, p2, p3, sum, row, res; - v256 max, min, large = v256_dup_16(CDEF_VERY_LARGE); - int po1 = cdef_directions[dir][0]; - int po2 = cdef_directions[dir][1]; - int s1o1 = cdef_directions[(dir + 2) & 7][0]; - int s1o2 = cdef_directions[(dir + 2) & 7][1]; - int s2o1 = cdef_directions[(dir + 6) & 7][0]; - int s2o2 = cdef_directions[(dir + 6) & 7][1]; - - const int *pri_taps = cdef_pri_taps[(pri_strength >> coeff_shift) & 1]; - const int *sec_taps = cdef_sec_taps[(pri_strength >> coeff_shift) & 1]; - - if (pri_strength) - pri_damping = AOMMAX(0, pri_damping - get_msb(pri_strength)); - if (sec_strength) - sec_damping = AOMMAX(0, sec_damping - get_msb(sec_strength)); - for (i = 0; i < 4; i += 4) { - sum = v256_zero(); - row = v256_from_v64(v64_load_aligned(&in[i * CDEF_BSTRIDE]), - v64_load_aligned(&in[(i + 1) * CDEF_BSTRIDE]), - v64_load_aligned(&in[(i + 2) * CDEF_BSTRIDE]), - v64_load_aligned(&in[(i + 3) * CDEF_BSTRIDE])); - min = max = row; - - // Primary near taps - p0 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + po1]), - v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po1]), - v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + po1]), - v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + po1])); - p1 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - po1]), - v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po1]), - v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - po1]), - v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - po1])); - max = - v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))), - v256_andn(p1, v256_cmpeq_16(p1, large))); - min = v256_min_s16(v256_min_s16(min, p0), p1); - p0 = constrain16(p0, row, pri_strength, pri_damping); - p1 = constrain16(p1, row, pri_strength, pri_damping); - - // sum += pri_taps[0] * (p0 + p1) - sum = v256_add_16( - sum, v256_mullo_s16(v256_dup_16(pri_taps[0]), v256_add_16(p0, p1))); - - // Primary far taps - p0 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + po2]), - v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po2]), - v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + po2]), - v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + po2])); - p1 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - po2]), - v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po2]), - v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - po2]), - v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - po2])); - max = - v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))), - v256_andn(p1, v256_cmpeq_16(p1, large))); - min = v256_min_s16(v256_min_s16(min, p0), p1); - p0 = constrain16(p0, row, pri_strength, pri_damping); - p1 = constrain16(p1, row, pri_strength, pri_damping); - - // sum += pri_taps[1] * (p0 + p1) - sum = v256_add_16( - sum, v256_mullo_s16(v256_dup_16(pri_taps[1]), v256_add_16(p0, p1))); - - // Secondary near taps - p0 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s1o1]), - v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o1]), - v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + s1o1]), - v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + s1o1])); - p1 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s1o1]), - v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o1]), - v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - s1o1]), - v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - s1o1])); - p2 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s2o1]), - v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o1]), - v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + s2o1]), - v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + s2o1])); - p3 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s2o1]), - v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o1]), - v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - s2o1]), - v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - s2o1])); - max = - v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))), - v256_andn(p1, v256_cmpeq_16(p1, large))); - max = - v256_max_s16(v256_max_s16(max, v256_andn(p2, v256_cmpeq_16(p2, large))), - v256_andn(p3, v256_cmpeq_16(p3, large))); - min = v256_min_s16( - v256_min_s16(v256_min_s16(v256_min_s16(min, p0), p1), p2), p3); - p0 = constrain16(p0, row, sec_strength, sec_damping); - p1 = constrain16(p1, row, sec_strength, sec_damping); - p2 = constrain16(p2, row, sec_strength, sec_damping); - p3 = constrain16(p3, row, sec_strength, sec_damping); - - // sum += sec_taps[0] * (p0 + p1 + p2 + p3) - sum = v256_add_16(sum, v256_mullo_s16(v256_dup_16(sec_taps[0]), - v256_add_16(v256_add_16(p0, p1), - v256_add_16(p2, p3)))); - - // Secondary far taps - p0 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s1o2]), - v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o2]), - v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + s1o2]), - v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + s1o2])); - p1 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s1o2]), - v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o2]), - v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - s1o2]), - v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - s1o2])); - p2 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s2o2]), - v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o2]), - v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + s2o2]), - v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + s2o2])); - p3 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s2o2]), - v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o2]), - v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - s2o2]), - v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - s2o2])); - max = - v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))), - v256_andn(p1, v256_cmpeq_16(p1, large))); - max = - v256_max_s16(v256_max_s16(max, v256_andn(p2, v256_cmpeq_16(p2, large))), - v256_andn(p3, v256_cmpeq_16(p3, large))); - min = v256_min_s16( - v256_min_s16(v256_min_s16(v256_min_s16(min, p0), p1), p2), p3); - p0 = constrain16(p0, row, sec_strength, sec_damping); - p1 = constrain16(p1, row, sec_strength, sec_damping); - p2 = constrain16(p2, row, sec_strength, sec_damping); - p3 = constrain16(p3, row, sec_strength, sec_damping); - - // sum += sec_taps[1] * (p0 + p1 + p2 + p3) - sum = v256_add_16(sum, v256_mullo_s16(v256_dup_16(sec_taps[1]), - v256_add_16(v256_add_16(p0, p1), - v256_add_16(p2, p3)))); - - // res = row + ((sum - (sum < 0) + 8) >> 4) - sum = v256_add_16(sum, v256_cmplt_s16(sum, v256_zero())); - res = v256_add_16(sum, v256_dup_16(8)); - res = v256_shr_n_s16(res, 4); - res = v256_add_16(row, res); - res = v256_min_s16(v256_max_s16(res, min), max); - - v64_store_aligned(&dst[i * dstride], v128_high_v64(v256_high_v128(res))); - v64_store_aligned(&dst[(i + 1) * dstride], - v128_low_v64(v256_high_v128(res))); - v64_store_aligned(&dst[(i + 2) * dstride], - v128_high_v64(v256_low_v128(res))); - v64_store_aligned(&dst[(i + 3) * dstride], - v128_low_v64(v256_low_v128(res))); - } -} - -void SIMD_FUNC(cdef_filter_block_8x8_16)(uint16_t *dst, int dstride, - const uint16_t *in, int pri_strength, - int sec_strength, int dir, - int pri_damping, int sec_damping, - AOM_UNUSED int max_unused, - int coeff_shift) { - int i; - v256 sum, p0, p1, p2, p3, row, res; - v256 max, min, large = v256_dup_16(CDEF_VERY_LARGE); - int po1 = cdef_directions[dir][0]; - int po2 = cdef_directions[dir][1]; - int s1o1 = cdef_directions[(dir + 2) & 7][0]; - int s1o2 = cdef_directions[(dir + 2) & 7][1]; - int s2o1 = cdef_directions[(dir + 6) & 7][0]; - int s2o2 = cdef_directions[(dir + 6) & 7][1]; - - const int *pri_taps = cdef_pri_taps[(pri_strength >> coeff_shift) & 1]; - const int *sec_taps = cdef_sec_taps[(pri_strength >> coeff_shift) & 1]; - - if (pri_strength) - pri_damping = AOMMAX(0, pri_damping - get_msb(pri_strength)); - if (sec_strength) - sec_damping = AOMMAX(0, sec_damping - get_msb(sec_strength)); - - for (i = 0; i < 8; i += 2) { - sum = v256_zero(); - row = v256_from_v128(v128_load_aligned(&in[i * CDEF_BSTRIDE]), - v128_load_aligned(&in[(i + 1) * CDEF_BSTRIDE])); - - min = max = row; - // Primary near taps - p0 = v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + po1]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po1])); - p1 = v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - po1]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po1])); - max = - v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))), - v256_andn(p1, v256_cmpeq_16(p1, large))); - min = v256_min_s16(v256_min_s16(min, p0), p1); - p0 = constrain16(p0, row, pri_strength, pri_damping); - p1 = constrain16(p1, row, pri_strength, pri_damping); - - // sum += pri_taps[0] * (p0 + p1) - sum = v256_add_16( - sum, v256_mullo_s16(v256_dup_16(pri_taps[0]), v256_add_16(p0, p1))); - - // Primary far taps - p0 = v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + po2]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po2])); - p1 = v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - po2]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po2])); - max = - v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))), - v256_andn(p1, v256_cmpeq_16(p1, large))); - min = v256_min_s16(v256_min_s16(min, p0), p1); - p0 = constrain16(p0, row, pri_strength, pri_damping); - p1 = constrain16(p1, row, pri_strength, pri_damping); - - // sum += pri_taps[1] * (p0 + p1) - sum = v256_add_16( - sum, v256_mullo_s16(v256_dup_16(pri_taps[1]), v256_add_16(p0, p1))); - - // Secondary near taps - p0 = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o1]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o1])); - p1 = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o1]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o1])); - p2 = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o1]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o1])); - p3 = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o1]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o1])); - max = - v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))), - v256_andn(p1, v256_cmpeq_16(p1, large))); - max = - v256_max_s16(v256_max_s16(max, v256_andn(p2, v256_cmpeq_16(p2, large))), - v256_andn(p3, v256_cmpeq_16(p3, large))); - min = v256_min_s16( - v256_min_s16(v256_min_s16(v256_min_s16(min, p0), p1), p2), p3); - p0 = constrain16(p0, row, sec_strength, sec_damping); - p1 = constrain16(p1, row, sec_strength, sec_damping); - p2 = constrain16(p2, row, sec_strength, sec_damping); - p3 = constrain16(p3, row, sec_strength, sec_damping); - - // sum += sec_taps[0] * (p0 + p1 + p2 + p3) - sum = v256_add_16(sum, v256_mullo_s16(v256_dup_16(sec_taps[0]), - v256_add_16(v256_add_16(p0, p1), - v256_add_16(p2, p3)))); - - // Secondary far taps - p0 = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o2]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o2])); - p1 = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o2]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o2])); - p2 = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o2]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o2])); - p3 = - v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o2]), - v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o2])); - max = - v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))), - v256_andn(p1, v256_cmpeq_16(p1, large))); - max = - v256_max_s16(v256_max_s16(max, v256_andn(p2, v256_cmpeq_16(p2, large))), - v256_andn(p3, v256_cmpeq_16(p3, large))); - min = v256_min_s16( - v256_min_s16(v256_min_s16(v256_min_s16(min, p0), p1), p2), p3); - p0 = constrain16(p0, row, sec_strength, sec_damping); - p1 = constrain16(p1, row, sec_strength, sec_damping); - p2 = constrain16(p2, row, sec_strength, sec_damping); - p3 = constrain16(p3, row, sec_strength, sec_damping); - - // sum += sec_taps[1] * (p0 + p1 + p2 + p3) - sum = v256_add_16(sum, v256_mullo_s16(v256_dup_16(sec_taps[1]), - v256_add_16(v256_add_16(p0, p1), - v256_add_16(p2, p3)))); - - // res = row + ((sum - (sum < 0) + 8) >> 4) - sum = v256_add_16(sum, v256_cmplt_s16(sum, v256_zero())); - res = v256_add_16(sum, v256_dup_16(8)); - res = v256_shr_n_s16(res, 4); - res = v256_add_16(row, res); - res = v256_min_s16(v256_max_s16(res, min), max); - v128_store_unaligned(&dst[i * dstride], v256_high_v128(res)); - v128_store_unaligned(&dst[(i + 1) * dstride], v256_low_v128(res)); - } -} - -void SIMD_FUNC(cdef_filter_block)(uint8_t *dst8, uint16_t *dst16, int dstride, - const uint16_t *in, int pri_strength, - int sec_strength, int dir, int pri_damping, - int sec_damping, int bsize, int max, - int coeff_shift) { - if (dst8) { - if (bsize == BLOCK_8X8) { - SIMD_FUNC(cdef_filter_block_8x8_8) - (dst8, dstride, in, pri_strength, sec_strength, dir, pri_damping, - sec_damping, max, coeff_shift); - } else if (bsize == BLOCK_4X8) { - SIMD_FUNC(cdef_filter_block_4x4_8) - (dst8, dstride, in, pri_strength, sec_strength, dir, pri_damping, - sec_damping, max, coeff_shift); - SIMD_FUNC(cdef_filter_block_4x4_8) - (dst8 + 4 * dstride, dstride, in + 4 * CDEF_BSTRIDE, pri_strength, - sec_strength, dir, pri_damping, sec_damping, max, coeff_shift); - } else if (bsize == BLOCK_8X4) { - SIMD_FUNC(cdef_filter_block_4x4_8) - (dst8, dstride, in, pri_strength, sec_strength, dir, pri_damping, - sec_damping, max, coeff_shift); - SIMD_FUNC(cdef_filter_block_4x4_8) - (dst8 + 4, dstride, in + 4, pri_strength, sec_strength, dir, pri_damping, - sec_damping, max, coeff_shift); - } else { - SIMD_FUNC(cdef_filter_block_4x4_8) - (dst8, dstride, in, pri_strength, sec_strength, dir, pri_damping, - sec_damping, max, coeff_shift); - } - } else { - if (bsize == BLOCK_8X8) { - SIMD_FUNC(cdef_filter_block_8x8_16) - (dst16, dstride, in, pri_strength, sec_strength, dir, pri_damping, - sec_damping, max, coeff_shift); - } else if (bsize == BLOCK_4X8) { - SIMD_FUNC(cdef_filter_block_4x4_16) - (dst16, dstride, in, pri_strength, sec_strength, dir, pri_damping, - sec_damping, max, coeff_shift); - SIMD_FUNC(cdef_filter_block_4x4_16) - (dst16 + 4 * dstride, dstride, in + 4 * CDEF_BSTRIDE, pri_strength, - sec_strength, dir, pri_damping, sec_damping, max, coeff_shift); - } else if (bsize == BLOCK_8X4) { - SIMD_FUNC(cdef_filter_block_4x4_16) - (dst16, dstride, in, pri_strength, sec_strength, dir, pri_damping, - sec_damping, max, coeff_shift); - SIMD_FUNC(cdef_filter_block_4x4_16) - (dst16 + 4, dstride, in + 4, pri_strength, sec_strength, dir, pri_damping, - sec_damping, max, coeff_shift); - } else { - assert(bsize == BLOCK_4X4); - SIMD_FUNC(cdef_filter_block_4x4_16) - (dst16, dstride, in, pri_strength, sec_strength, dir, pri_damping, - sec_damping, max, coeff_shift); - } - } -} - -void SIMD_FUNC(copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride, - const uint8_t *src, int sstride, int v, - int h) { - int i, j; - for (i = 0; i < v; i++) { - for (j = 0; j < (h & ~0x7); j += 8) { - v64 row = v64_load_unaligned(&src[i * sstride + j]); - v128_store_unaligned(&dst[i * dstride + j], v128_unpack_u8_s16(row)); - } - for (; j < h; j++) { - dst[i * dstride + j] = src[i * sstride + j]; - } - } -} - -void SIMD_FUNC(copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride, - const uint16_t *src, int sstride, - int v, int h) { - int i, j; - for (i = 0; i < v; i++) { - for (j = 0; j < (h & ~0x7); j += 8) { - v128 row = v128_load_unaligned(&src[i * sstride + j]); - v128_store_unaligned(&dst[i * dstride + j], row); - } - for (; j < h; j++) { - dst[i * dstride + j] = src[i * sstride + j]; - } - } -} - -#endif // AOM_AV1_COMMON_CDEF_BLOCK_SIMD_H_ diff --git a/third_party/aom/av1/common/cdef_block_sse2.c b/third_party/aom/av1/common/cdef_block_sse2.c deleted file mode 100644 index 73f115d17..000000000 --- a/third_party/aom/av1/common/cdef_block_sse2.c +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "aom_dsp/aom_simd.h" -#define SIMD_FUNC(name) name##_sse2 -#include "av1/common/cdef_block_simd.h" diff --git a/third_party/aom/av1/common/cdef_block_sse4.c b/third_party/aom/av1/common/cdef_block_sse4.c deleted file mode 100644 index 349329af6..000000000 --- a/third_party/aom/av1/common/cdef_block_sse4.c +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "aom_dsp/aom_simd.h" -#define SIMD_FUNC(name) name##_sse4_1 -#include "av1/common/cdef_block_simd.h" diff --git a/third_party/aom/av1/common/cdef_block_ssse3.c b/third_party/aom/av1/common/cdef_block_ssse3.c deleted file mode 100644 index 3a93b150f..000000000 --- a/third_party/aom/av1/common/cdef_block_ssse3.c +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "aom_dsp/aom_simd.h" -#define SIMD_FUNC(name) name##_ssse3 -#include "av1/common/cdef_block_simd.h" diff --git a/third_party/aom/av1/common/cfl.c b/third_party/aom/av1/common/cfl.c deleted file mode 100644 index ccc59b4eb..000000000 --- a/third_party/aom/av1/common/cfl.c +++ /dev/null @@ -1,448 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "av1/common/cfl.h" -#include "av1/common/common_data.h" -#include "av1/common/onyxc_int.h" - -#include "config/av1_rtcd.h" - -void cfl_init(CFL_CTX *cfl, const SequenceHeader *seq_params) { - assert(block_size_wide[CFL_MAX_BLOCK_SIZE] == CFL_BUF_LINE); - assert(block_size_high[CFL_MAX_BLOCK_SIZE] == CFL_BUF_LINE); - - memset(&cfl->recon_buf_q3, 0, sizeof(cfl->recon_buf_q3)); - memset(&cfl->ac_buf_q3, 0, sizeof(cfl->ac_buf_q3)); - cfl->subsampling_x = seq_params->subsampling_x; - cfl->subsampling_y = seq_params->subsampling_y; - cfl->are_parameters_computed = 0; - cfl->store_y = 0; - // The DC_PRED cache is disabled by default and is only enabled in - // cfl_rd_pick_alpha - cfl->use_dc_pred_cache = 0; - cfl->dc_pred_is_cached[CFL_PRED_U] = 0; - cfl->dc_pred_is_cached[CFL_PRED_V] = 0; -} - -void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input, - CFL_PRED_TYPE pred_plane, int width) { - assert(pred_plane < CFL_PRED_PLANES); - assert(width <= CFL_BUF_LINE); - - if (get_bitdepth_data_path_index(xd)) { - uint16_t *const input_16 = CONVERT_TO_SHORTPTR(input); - memcpy(xd->cfl.dc_pred_cache[pred_plane], input_16, width << 1); - return; - } - - memcpy(xd->cfl.dc_pred_cache[pred_plane], input, width); -} - -static void cfl_load_dc_pred_lbd(const int16_t *dc_pred_cache, uint8_t *dst, - int dst_stride, int width, int height) { - for (int j = 0; j < height; j++) { - memcpy(dst, dc_pred_cache, width); - dst += dst_stride; - } -} - -static void cfl_load_dc_pred_hbd(const int16_t *dc_pred_cache, uint16_t *dst, - int dst_stride, int width, int height) { - const size_t num_bytes = width << 1; - for (int j = 0; j < height; j++) { - memcpy(dst, dc_pred_cache, num_bytes); - dst += dst_stride; - } -} -void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, - TX_SIZE tx_size, CFL_PRED_TYPE pred_plane) { - const int width = tx_size_wide[tx_size]; - const int height = tx_size_high[tx_size]; - assert(pred_plane < CFL_PRED_PLANES); - assert(width <= CFL_BUF_LINE); - assert(height <= CFL_BUF_LINE); - if (get_bitdepth_data_path_index(xd)) { - uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst); - cfl_load_dc_pred_hbd(xd->cfl.dc_pred_cache[pred_plane], dst_16, dst_stride, - width, height); - return; - } - cfl_load_dc_pred_lbd(xd->cfl.dc_pred_cache[pred_plane], dst, dst_stride, - width, height); -} - -// Due to frame boundary issues, it is possible that the total area covered by -// chroma exceeds that of luma. When this happens, we fill the missing pixels by -// repeating the last columns and/or rows. -static INLINE void cfl_pad(CFL_CTX *cfl, int width, int height) { - const int diff_width = width - cfl->buf_width; - const int diff_height = height - cfl->buf_height; - - if (diff_width > 0) { - const int min_height = height - diff_height; - uint16_t *recon_buf_q3 = cfl->recon_buf_q3 + (width - diff_width); - for (int j = 0; j < min_height; j++) { - const uint16_t last_pixel = recon_buf_q3[-1]; - assert(recon_buf_q3 + diff_width <= cfl->recon_buf_q3 + CFL_BUF_SQUARE); - for (int i = 0; i < diff_width; i++) { - recon_buf_q3[i] = last_pixel; - } - recon_buf_q3 += CFL_BUF_LINE; - } - cfl->buf_width = width; - } - if (diff_height > 0) { - uint16_t *recon_buf_q3 = - cfl->recon_buf_q3 + ((height - diff_height) * CFL_BUF_LINE); - for (int j = 0; j < diff_height; j++) { - const uint16_t *last_row_q3 = recon_buf_q3 - CFL_BUF_LINE; - assert(recon_buf_q3 + width <= cfl->recon_buf_q3 + CFL_BUF_SQUARE); - for (int i = 0; i < width; i++) { - recon_buf_q3[i] = last_row_q3[i]; - } - recon_buf_q3 += CFL_BUF_LINE; - } - cfl->buf_height = height; - } -} - -static void subtract_average_c(const uint16_t *src, int16_t *dst, int width, - int height, int round_offset, int num_pel_log2) { - int sum = round_offset; - const uint16_t *recon = src; - for (int j = 0; j < height; j++) { - for (int i = 0; i < width; i++) { - sum += recon[i]; - } - recon += CFL_BUF_LINE; - } - const int avg = sum >> num_pel_log2; - for (int j = 0; j < height; j++) { - for (int i = 0; i < width; i++) { - dst[i] = src[i] - avg; - } - src += CFL_BUF_LINE; - dst += CFL_BUF_LINE; - } -} - -CFL_SUB_AVG_FN(c) - -static INLINE int cfl_idx_to_alpha(int alpha_idx, int joint_sign, - CFL_PRED_TYPE pred_type) { - const int alpha_sign = (pred_type == CFL_PRED_U) ? CFL_SIGN_U(joint_sign) - : CFL_SIGN_V(joint_sign); - if (alpha_sign == CFL_SIGN_ZERO) return 0; - const int abs_alpha_q3 = - (pred_type == CFL_PRED_U) ? CFL_IDX_U(alpha_idx) : CFL_IDX_V(alpha_idx); - return (alpha_sign == CFL_SIGN_POS) ? abs_alpha_q3 + 1 : -abs_alpha_q3 - 1; -} - -static INLINE void cfl_predict_lbd_c(const int16_t *ac_buf_q3, uint8_t *dst, - int dst_stride, int alpha_q3, int width, - int height) { - for (int j = 0; j < height; j++) { - for (int i = 0; i < width; i++) { - dst[i] = clip_pixel(get_scaled_luma_q0(alpha_q3, ac_buf_q3[i]) + dst[i]); - } - dst += dst_stride; - ac_buf_q3 += CFL_BUF_LINE; - } -} - -// Null function used for invalid tx_sizes -void cfl_predict_lbd_null(const int16_t *ac_buf_q3, uint8_t *dst, - int dst_stride, int alpha_q3) { - (void)ac_buf_q3; - (void)dst; - (void)dst_stride; - (void)alpha_q3; - assert(0); -} - -CFL_PREDICT_FN(c, lbd) - -void cfl_predict_hbd_c(const int16_t *ac_buf_q3, uint16_t *dst, int dst_stride, - int alpha_q3, int bit_depth, int width, int height) { - for (int j = 0; j < height; j++) { - for (int i = 0; i < width; i++) { - dst[i] = clip_pixel_highbd( - get_scaled_luma_q0(alpha_q3, ac_buf_q3[i]) + dst[i], bit_depth); - } - dst += dst_stride; - ac_buf_q3 += CFL_BUF_LINE; - } -} - -// Null function used for invalid tx_sizes -void cfl_predict_hbd_null(const int16_t *ac_buf_q3, uint16_t *dst, - int dst_stride, int alpha_q3, int bd) { - (void)ac_buf_q3; - (void)dst; - (void)dst_stride; - (void)alpha_q3; - (void)bd; - assert(0); -} - -CFL_PREDICT_FN(c, hbd) - -static void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) { - CFL_CTX *const cfl = &xd->cfl; - // Do not call cfl_compute_parameters multiple time on the same values. - assert(cfl->are_parameters_computed == 0); - - cfl_pad(cfl, tx_size_wide[tx_size], tx_size_high[tx_size]); - get_subtract_average_fn(tx_size)(cfl->recon_buf_q3, cfl->ac_buf_q3); - cfl->are_parameters_computed = 1; -} - -void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, - TX_SIZE tx_size, int plane) { - CFL_CTX *const cfl = &xd->cfl; - MB_MODE_INFO *mbmi = xd->mi[0]; - assert(is_cfl_allowed(xd)); - - if (!cfl->are_parameters_computed) cfl_compute_parameters(xd, tx_size); - - const int alpha_q3 = - cfl_idx_to_alpha(mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, plane - 1); - assert((tx_size_high[tx_size] - 1) * CFL_BUF_LINE + tx_size_wide[tx_size] <= - CFL_BUF_SQUARE); - if (get_bitdepth_data_path_index(xd)) { - uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst); - get_predict_hbd_fn(tx_size)(cfl->ac_buf_q3, dst_16, dst_stride, alpha_q3, - xd->bd); - return; - } - get_predict_lbd_fn(tx_size)(cfl->ac_buf_q3, dst, dst_stride, alpha_q3); -} - -// Null function used for invalid tx_sizes -void cfl_subsample_lbd_null(const uint8_t *input, int input_stride, - uint16_t *output_q3) { - (void)input; - (void)input_stride; - (void)output_q3; - assert(0); -} - -// Null function used for invalid tx_sizes -void cfl_subsample_hbd_null(const uint16_t *input, int input_stride, - uint16_t *output_q3) { - (void)input; - (void)input_stride; - (void)output_q3; - assert(0); -} - -static void cfl_luma_subsampling_420_lbd_c(const uint8_t *input, - int input_stride, - uint16_t *output_q3, int width, - int height) { - for (int j = 0; j < height; j += 2) { - for (int i = 0; i < width; i += 2) { - const int bot = i + input_stride; - output_q3[i >> 1] = - (input[i] + input[i + 1] + input[bot] + input[bot + 1]) << 1; - } - input += input_stride << 1; - output_q3 += CFL_BUF_LINE; - } -} - -static void cfl_luma_subsampling_422_lbd_c(const uint8_t *input, - int input_stride, - uint16_t *output_q3, int width, - int height) { - assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE); - for (int j = 0; j < height; j++) { - for (int i = 0; i < width; i += 2) { - output_q3[i >> 1] = (input[i] + input[i + 1]) << 2; - } - input += input_stride; - output_q3 += CFL_BUF_LINE; - } -} - -static void cfl_luma_subsampling_444_lbd_c(const uint8_t *input, - int input_stride, - uint16_t *output_q3, int width, - int height) { - assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE); - for (int j = 0; j < height; j++) { - for (int i = 0; i < width; i++) { - output_q3[i] = input[i] << 3; - } - input += input_stride; - output_q3 += CFL_BUF_LINE; - } -} - -static void cfl_luma_subsampling_420_hbd_c(const uint16_t *input, - int input_stride, - uint16_t *output_q3, int width, - int height) { - for (int j = 0; j < height; j += 2) { - for (int i = 0; i < width; i += 2) { - const int bot = i + input_stride; - output_q3[i >> 1] = - (input[i] + input[i + 1] + input[bot] + input[bot + 1]) << 1; - } - input += input_stride << 1; - output_q3 += CFL_BUF_LINE; - } -} - -static void cfl_luma_subsampling_422_hbd_c(const uint16_t *input, - int input_stride, - uint16_t *output_q3, int width, - int height) { - assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE); - for (int j = 0; j < height; j++) { - for (int i = 0; i < width; i += 2) { - output_q3[i >> 1] = (input[i] + input[i + 1]) << 2; - } - input += input_stride; - output_q3 += CFL_BUF_LINE; - } -} - -static void cfl_luma_subsampling_444_hbd_c(const uint16_t *input, - int input_stride, - uint16_t *output_q3, int width, - int height) { - assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE); - for (int j = 0; j < height; j++) { - for (int i = 0; i < width; i++) { - output_q3[i] = input[i] << 3; - } - input += input_stride; - output_q3 += CFL_BUF_LINE; - } -} - -CFL_GET_SUBSAMPLE_FUNCTION(c) - -static INLINE cfl_subsample_hbd_fn cfl_subsampling_hbd(TX_SIZE tx_size, - int sub_x, int sub_y) { - if (sub_x == 1) { - if (sub_y == 1) { - return cfl_get_luma_subsampling_420_hbd(tx_size); - } - return cfl_get_luma_subsampling_422_hbd(tx_size); - } - return cfl_get_luma_subsampling_444_hbd(tx_size); -} - -static INLINE cfl_subsample_lbd_fn cfl_subsampling_lbd(TX_SIZE tx_size, - int sub_x, int sub_y) { - if (sub_x == 1) { - if (sub_y == 1) { - return cfl_get_luma_subsampling_420_lbd(tx_size); - } - return cfl_get_luma_subsampling_422_lbd(tx_size); - } - return cfl_get_luma_subsampling_444_lbd(tx_size); -} - -static void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, - int row, int col, TX_SIZE tx_size, int use_hbd) { - const int width = tx_size_wide[tx_size]; - const int height = tx_size_high[tx_size]; - const int tx_off_log2 = tx_size_wide_log2[0]; - const int sub_x = cfl->subsampling_x; - const int sub_y = cfl->subsampling_y; - const int store_row = row << (tx_off_log2 - sub_y); - const int store_col = col << (tx_off_log2 - sub_x); - const int store_height = height >> sub_y; - const int store_width = width >> sub_x; - - // Invalidate current parameters - cfl->are_parameters_computed = 0; - - // Store the surface of the pixel buffer that was written to, this way we - // can manage chroma overrun (e.g. when the chroma surfaces goes beyond the - // frame boundary) - if (col == 0 && row == 0) { - cfl->buf_width = store_width; - cfl->buf_height = store_height; - } else { - cfl->buf_width = OD_MAXI(store_col + store_width, cfl->buf_width); - cfl->buf_height = OD_MAXI(store_row + store_height, cfl->buf_height); - } - - // Check that we will remain inside the pixel buffer. - assert(store_row + store_height <= CFL_BUF_LINE); - assert(store_col + store_width <= CFL_BUF_LINE); - - // Store the input into the CfL pixel buffer - uint16_t *recon_buf_q3 = - cfl->recon_buf_q3 + (store_row * CFL_BUF_LINE + store_col); - - if (use_hbd) { - cfl_subsampling_hbd(tx_size, sub_x, sub_y)(CONVERT_TO_SHORTPTR(input), - input_stride, recon_buf_q3); - } else { - cfl_subsampling_lbd(tx_size, sub_x, sub_y)(input, input_stride, - recon_buf_q3); - } -} - -// Adjust the row and column of blocks smaller than 8X8, as chroma-referenced -// and non-chroma-referenced blocks are stored together in the CfL buffer. -static INLINE void sub8x8_adjust_offset(const CFL_CTX *cfl, int *row_out, - int *col_out) { - // Increment row index for bottom: 8x4, 16x4 or both bottom 4x4s. - if ((cfl->mi_row & 0x01) && cfl->subsampling_y) { - assert(*row_out == 0); - (*row_out)++; - } - - // Increment col index for right: 4x8, 4x16 or both right 4x4s. - if ((cfl->mi_col & 0x01) && cfl->subsampling_x) { - assert(*col_out == 0); - (*col_out)++; - } -} - -void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size, - BLOCK_SIZE bsize) { - CFL_CTX *const cfl = &xd->cfl; - struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y]; - uint8_t *dst = - &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]]; - - if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) { - // Only dimensions of size 4 can have an odd offset. - assert(!((col & 1) && tx_size_wide[tx_size] != 4)); - assert(!((row & 1) && tx_size_high[tx_size] != 4)); - sub8x8_adjust_offset(cfl, &row, &col); - } - cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size, - get_bitdepth_data_path_index(xd)); -} - -void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size) { - CFL_CTX *const cfl = &xd->cfl; - struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y]; - int row = 0; - int col = 0; - - if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) { - sub8x8_adjust_offset(cfl, &row, &col); - } - const int width = max_intra_block_width(xd, bsize, AOM_PLANE_Y, tx_size); - const int height = max_intra_block_height(xd, bsize, AOM_PLANE_Y, tx_size); - tx_size = get_tx_size(width, height); - cfl_store(cfl, pd->dst.buf, pd->dst.stride, row, col, tx_size, - get_bitdepth_data_path_index(xd)); -} diff --git a/third_party/aom/av1/common/cfl.h b/third_party/aom/av1/common/cfl.h deleted file mode 100644 index d627891bf..000000000 --- a/third_party/aom/av1/common/cfl.h +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_CFL_H_ -#define AOM_AV1_COMMON_CFL_H_ - -#include "av1/common/blockd.h" -#include "av1/common/onyxc_int.h" - -// Can we use CfL for the current block? -static INLINE CFL_ALLOWED_TYPE is_cfl_allowed(const MACROBLOCKD *xd) { - const MB_MODE_INFO *mbmi = xd->mi[0]; - const BLOCK_SIZE bsize = mbmi->sb_type; - assert(bsize < BLOCK_SIZES_ALL); - if (xd->lossless[mbmi->segment_id]) { - // In lossless, CfL is available when the partition size is equal to the - // transform size. - const int ssx = xd->plane[AOM_PLANE_U].subsampling_x; - const int ssy = xd->plane[AOM_PLANE_U].subsampling_y; - const int plane_bsize = get_plane_block_size(bsize, ssx, ssy); - return (CFL_ALLOWED_TYPE)(plane_bsize == BLOCK_4X4); - } - // Spec: CfL is available to luma partitions lesser than or equal to 32x32 - return (CFL_ALLOWED_TYPE)(block_size_wide[bsize] <= 32 && - block_size_high[bsize] <= 32); -} - -// Do we need to save the luma pixels from the current block, -// for a possible future CfL prediction? -static INLINE CFL_ALLOWED_TYPE store_cfl_required(const AV1_COMMON *cm, - const MACROBLOCKD *xd) { - const MB_MODE_INFO *mbmi = xd->mi[0]; - - if (cm->seq_params.monochrome) return CFL_DISALLOWED; - - if (!xd->cfl.is_chroma_reference) { - // For non-chroma-reference blocks, we should always store the luma pixels, - // in case the corresponding chroma-reference block uses CfL. - // Note that this can only happen for block sizes which are <8 on - // their shortest side, as otherwise they would be chroma reference - // blocks. - return CFL_ALLOWED; - } - - // If this block has chroma information, we know whether we're - // actually going to perform a CfL prediction - return (CFL_ALLOWED_TYPE)(!is_inter_block(mbmi) && - mbmi->uv_mode == UV_CFL_PRED); -} - -static INLINE int get_scaled_luma_q0(int alpha_q3, int16_t pred_buf_q3) { - int scaled_luma_q6 = alpha_q3 * pred_buf_q3; - return ROUND_POWER_OF_TWO_SIGNED(scaled_luma_q6, 6); -} - -static INLINE CFL_PRED_TYPE get_cfl_pred_type(PLANE_TYPE plane) { - assert(plane > 0); - return (CFL_PRED_TYPE)(plane - 1); -} - -void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, - TX_SIZE tx_size, int plane); - -void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size); - -void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size, - BLOCK_SIZE bsize); - -void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input, - CFL_PRED_TYPE pred_plane, int width); - -void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, - TX_SIZE tx_size, CFL_PRED_TYPE pred_plane); - -// Null function used for invalid tx_sizes -void cfl_subsample_lbd_null(const uint8_t *input, int input_stride, - uint16_t *output_q3); - -// Null function used for invalid tx_sizes -void cfl_subsample_hbd_null(const uint16_t *input, int input_stride, - uint16_t *output_q3); - -// Allows the CFL_SUBSAMPLE function to switch types depending on the bitdepth. -#define CFL_lbd_TYPE uint8_t *cfl_type -#define CFL_hbd_TYPE uint16_t *cfl_type - -// Declare a size-specific wrapper for the size-generic function. The compiler -// will inline the size generic function in here, the advantage is that the size -// will be constant allowing for loop unrolling and other constant propagated -// goodness. -#define CFL_SUBSAMPLE(arch, sub, bd, width, height) \ - void subsample_##bd##_##sub##_##width##x##height##_##arch( \ - const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ - cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ - output_q3, width, height); \ - } - -// Declare size-specific wrappers for all valid CfL sizes. -#define CFL_SUBSAMPLE_FUNCTIONS(arch, sub, bd) \ - CFL_SUBSAMPLE(arch, sub, bd, 4, 4) \ - CFL_SUBSAMPLE(arch, sub, bd, 8, 8) \ - CFL_SUBSAMPLE(arch, sub, bd, 16, 16) \ - CFL_SUBSAMPLE(arch, sub, bd, 32, 32) \ - CFL_SUBSAMPLE(arch, sub, bd, 4, 8) \ - CFL_SUBSAMPLE(arch, sub, bd, 8, 4) \ - CFL_SUBSAMPLE(arch, sub, bd, 8, 16) \ - CFL_SUBSAMPLE(arch, sub, bd, 16, 8) \ - CFL_SUBSAMPLE(arch, sub, bd, 16, 32) \ - CFL_SUBSAMPLE(arch, sub, bd, 32, 16) \ - CFL_SUBSAMPLE(arch, sub, bd, 4, 16) \ - CFL_SUBSAMPLE(arch, sub, bd, 16, 4) \ - CFL_SUBSAMPLE(arch, sub, bd, 8, 32) \ - CFL_SUBSAMPLE(arch, sub, bd, 32, 8) \ - cfl_subsample_##bd##_fn cfl_get_luma_subsampling_##sub##_##bd##_##arch( \ - TX_SIZE tx_size) { \ - CFL_SUBSAMPLE_FUNCTION_ARRAY(arch, sub, bd) \ - return subfn_##sub[tx_size]; \ - } - -// Declare an architecture-specific array of function pointers for size-specific -// wrappers. -#define CFL_SUBSAMPLE_FUNCTION_ARRAY(arch, sub, bd) \ - static const cfl_subsample_##bd##_fn subfn_##sub[TX_SIZES_ALL] = { \ - subsample_##bd##_##sub##_4x4_##arch, /* 4x4 */ \ - subsample_##bd##_##sub##_8x8_##arch, /* 8x8 */ \ - subsample_##bd##_##sub##_16x16_##arch, /* 16x16 */ \ - subsample_##bd##_##sub##_32x32_##arch, /* 32x32 */ \ - cfl_subsample_##bd##_null, /* 64x64 (invalid CFL size) */ \ - subsample_##bd##_##sub##_4x8_##arch, /* 4x8 */ \ - subsample_##bd##_##sub##_8x4_##arch, /* 8x4 */ \ - subsample_##bd##_##sub##_8x16_##arch, /* 8x16 */ \ - subsample_##bd##_##sub##_16x8_##arch, /* 16x8 */ \ - subsample_##bd##_##sub##_16x32_##arch, /* 16x32 */ \ - subsample_##bd##_##sub##_32x16_##arch, /* 32x16 */ \ - cfl_subsample_##bd##_null, /* 32x64 (invalid CFL size) */ \ - cfl_subsample_##bd##_null, /* 64x32 (invalid CFL size) */ \ - subsample_##bd##_##sub##_4x16_##arch, /* 4x16 */ \ - subsample_##bd##_##sub##_16x4_##arch, /* 16x4 */ \ - subsample_##bd##_##sub##_8x32_##arch, /* 8x32 */ \ - subsample_##bd##_##sub##_32x8_##arch, /* 32x8 */ \ - cfl_subsample_##bd##_null, /* 16x64 (invalid CFL size) */ \ - cfl_subsample_##bd##_null, /* 64x16 (invalid CFL size) */ \ - }; - -// The RTCD script does not support passing in an array, so we wrap it in this -// function. -#define CFL_GET_SUBSAMPLE_FUNCTION(arch) \ - CFL_SUBSAMPLE_FUNCTIONS(arch, 420, lbd) \ - CFL_SUBSAMPLE_FUNCTIONS(arch, 422, lbd) \ - CFL_SUBSAMPLE_FUNCTIONS(arch, 444, lbd) \ - CFL_SUBSAMPLE_FUNCTIONS(arch, 420, hbd) \ - CFL_SUBSAMPLE_FUNCTIONS(arch, 422, hbd) \ - CFL_SUBSAMPLE_FUNCTIONS(arch, 444, hbd) - -// Null function used for invalid tx_sizes -static INLINE void cfl_subtract_average_null(const uint16_t *src, - int16_t *dst) { - (void)dst; - (void)src; - assert(0); -} - -// Declare a size-specific wrapper for the size-generic function. The compiler -// will inline the size generic function in here, the advantage is that the size -// will be constant allowing for loop unrolling and other constant propagated -// goodness. -#define CFL_SUB_AVG_X(arch, width, height, round_offset, num_pel_log2) \ - void subtract_average_##width##x##height##_##arch(const uint16_t *src, \ - int16_t *dst) { \ - subtract_average_##arch(src, dst, width, height, round_offset, \ - num_pel_log2); \ - } - -// Declare size-specific wrappers for all valid CfL sizes. -#define CFL_SUB_AVG_FN(arch) \ - CFL_SUB_AVG_X(arch, 4, 4, 8, 4) \ - CFL_SUB_AVG_X(arch, 4, 8, 16, 5) \ - CFL_SUB_AVG_X(arch, 4, 16, 32, 6) \ - CFL_SUB_AVG_X(arch, 8, 4, 16, 5) \ - CFL_SUB_AVG_X(arch, 8, 8, 32, 6) \ - CFL_SUB_AVG_X(arch, 8, 16, 64, 7) \ - CFL_SUB_AVG_X(arch, 8, 32, 128, 8) \ - CFL_SUB_AVG_X(arch, 16, 4, 32, 6) \ - CFL_SUB_AVG_X(arch, 16, 8, 64, 7) \ - CFL_SUB_AVG_X(arch, 16, 16, 128, 8) \ - CFL_SUB_AVG_X(arch, 16, 32, 256, 9) \ - CFL_SUB_AVG_X(arch, 32, 8, 128, 8) \ - CFL_SUB_AVG_X(arch, 32, 16, 256, 9) \ - CFL_SUB_AVG_X(arch, 32, 32, 512, 10) \ - cfl_subtract_average_fn get_subtract_average_fn_##arch(TX_SIZE tx_size) { \ - static const cfl_subtract_average_fn sub_avg[TX_SIZES_ALL] = { \ - subtract_average_4x4_##arch, /* 4x4 */ \ - subtract_average_8x8_##arch, /* 8x8 */ \ - subtract_average_16x16_##arch, /* 16x16 */ \ - subtract_average_32x32_##arch, /* 32x32 */ \ - cfl_subtract_average_null, /* 64x64 (invalid CFL size) */ \ - subtract_average_4x8_##arch, /* 4x8 */ \ - subtract_average_8x4_##arch, /* 8x4 */ \ - subtract_average_8x16_##arch, /* 8x16 */ \ - subtract_average_16x8_##arch, /* 16x8 */ \ - subtract_average_16x32_##arch, /* 16x32 */ \ - subtract_average_32x16_##arch, /* 32x16 */ \ - cfl_subtract_average_null, /* 32x64 (invalid CFL size) */ \ - cfl_subtract_average_null, /* 64x32 (invalid CFL size) */ \ - subtract_average_4x16_##arch, /* 4x16 (invalid CFL size) */ \ - subtract_average_16x4_##arch, /* 16x4 (invalid CFL size) */ \ - subtract_average_8x32_##arch, /* 8x32 (invalid CFL size) */ \ - subtract_average_32x8_##arch, /* 32x8 (invalid CFL size) */ \ - cfl_subtract_average_null, /* 16x64 (invalid CFL size) */ \ - cfl_subtract_average_null, /* 64x16 (invalid CFL size) */ \ - }; \ - /* Modulo TX_SIZES_ALL to ensure that an attacker won't be able to */ \ - /* index the function pointer array out of bounds. */ \ - return sub_avg[tx_size % TX_SIZES_ALL]; \ - } - -// For VSX SIMD optimization, the C versions of width == 4 subtract are -// faster than the VSX. As such, the VSX code calls the C versions. -void subtract_average_4x4_c(const uint16_t *src, int16_t *dst); -void subtract_average_4x8_c(const uint16_t *src, int16_t *dst); -void subtract_average_4x16_c(const uint16_t *src, int16_t *dst); - -#define CFL_PREDICT_lbd(arch, width, height) \ - void predict_lbd_##width##x##height##_##arch(const int16_t *pred_buf_q3, \ - uint8_t *dst, int dst_stride, \ - int alpha_q3) { \ - cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \ - height); \ - } - -#define CFL_PREDICT_hbd(arch, width, height) \ - void predict_hbd_##width##x##height##_##arch(const int16_t *pred_buf_q3, \ - uint16_t *dst, int dst_stride, \ - int alpha_q3, int bd) { \ - cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \ - height); \ - } - -// This wrapper exists because clang format does not like calling macros with -// lowercase letters. -#define CFL_PREDICT_X(arch, width, height, bd) \ - CFL_PREDICT_##bd(arch, width, height) - -// Null function used for invalid tx_sizes -void cfl_predict_lbd_null(const int16_t *pred_buf_q3, uint8_t *dst, - int dst_stride, int alpha_q3); - -// Null function used for invalid tx_sizes -void cfl_predict_hbd_null(const int16_t *pred_buf_q3, uint16_t *dst, - int dst_stride, int alpha_q3, int bd); - -#define CFL_PREDICT_FN(arch, bd) \ - CFL_PREDICT_X(arch, 4, 4, bd) \ - CFL_PREDICT_X(arch, 4, 8, bd) \ - CFL_PREDICT_X(arch, 4, 16, bd) \ - CFL_PREDICT_X(arch, 8, 4, bd) \ - CFL_PREDICT_X(arch, 8, 8, bd) \ - CFL_PREDICT_X(arch, 8, 16, bd) \ - CFL_PREDICT_X(arch, 8, 32, bd) \ - CFL_PREDICT_X(arch, 16, 4, bd) \ - CFL_PREDICT_X(arch, 16, 8, bd) \ - CFL_PREDICT_X(arch, 16, 16, bd) \ - CFL_PREDICT_X(arch, 16, 32, bd) \ - CFL_PREDICT_X(arch, 32, 8, bd) \ - CFL_PREDICT_X(arch, 32, 16, bd) \ - CFL_PREDICT_X(arch, 32, 32, bd) \ - cfl_predict_##bd##_fn get_predict_##bd##_fn_##arch(TX_SIZE tx_size) { \ - static const cfl_predict_##bd##_fn pred[TX_SIZES_ALL] = { \ - predict_##bd##_4x4_##arch, /* 4x4 */ \ - predict_##bd##_8x8_##arch, /* 8x8 */ \ - predict_##bd##_16x16_##arch, /* 16x16 */ \ - predict_##bd##_32x32_##arch, /* 32x32 */ \ - cfl_predict_##bd##_null, /* 64x64 (invalid CFL size) */ \ - predict_##bd##_4x8_##arch, /* 4x8 */ \ - predict_##bd##_8x4_##arch, /* 8x4 */ \ - predict_##bd##_8x16_##arch, /* 8x16 */ \ - predict_##bd##_16x8_##arch, /* 16x8 */ \ - predict_##bd##_16x32_##arch, /* 16x32 */ \ - predict_##bd##_32x16_##arch, /* 32x16 */ \ - cfl_predict_##bd##_null, /* 32x64 (invalid CFL size) */ \ - cfl_predict_##bd##_null, /* 64x32 (invalid CFL size) */ \ - predict_##bd##_4x16_##arch, /* 4x16 */ \ - predict_##bd##_16x4_##arch, /* 16x4 */ \ - predict_##bd##_8x32_##arch, /* 8x32 */ \ - predict_##bd##_32x8_##arch, /* 32x8 */ \ - cfl_predict_##bd##_null, /* 16x64 (invalid CFL size) */ \ - cfl_predict_##bd##_null, /* 64x16 (invalid CFL size) */ \ - }; \ - /* Modulo TX_SIZES_ALL to ensure that an attacker won't be able to */ \ - /* index the function pointer array out of bounds. */ \ - return pred[tx_size % TX_SIZES_ALL]; \ - } - -#endif // AOM_AV1_COMMON_CFL_H_ diff --git a/third_party/aom/av1/common/common.h b/third_party/aom/av1/common/common.h deleted file mode 100644 index bed6083db..000000000 --- a/third_party/aom/av1/common/common.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_COMMON_H_ -#define AOM_AV1_COMMON_COMMON_H_ - -/* Interface header for common constant data structures and lookup tables */ - -#include <assert.h> - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_mem/aom_mem.h" -#include "aom/aom_integer.h" -#include "aom_ports/bitops.h" -#include "config/aom_config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define PI 3.141592653589793238462643383279502884 - -// Only need this for fixed-size arrays, for structs just assign. -#define av1_copy(dest, src) \ - { \ - assert(sizeof(dest) == sizeof(src)); \ - memcpy(dest, src, sizeof(src)); \ - } - -// Use this for variably-sized arrays. -#define av1_copy_array(dest, src, n) \ - { \ - assert(sizeof(*(dest)) == sizeof(*(src))); \ - memcpy(dest, src, n * sizeof(*(src))); \ - } - -#define av1_zero(dest) memset(&(dest), 0, sizeof(dest)) -#define av1_zero_array(dest, n) memset(dest, 0, n * sizeof(*(dest))) - -static INLINE int get_unsigned_bits(unsigned int num_values) { - return num_values > 0 ? get_msb(num_values) + 1 : 0; -} - -#define CHECK_MEM_ERROR(cm, lval, expr) \ - AOM_CHECK_MEM_ERROR(&cm->error, lval, expr) - -#define AOM_FRAME_MARKER 0x2 - -#define AV1_MIN_TILE_SIZE_BYTES 1 - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_COMMON_H_ diff --git a/third_party/aom/av1/common/common_data.h b/third_party/aom/av1/common/common_data.h deleted file mode 100644 index 46e455fdb..000000000 --- a/third_party/aom/av1/common/common_data.h +++ /dev/null @@ -1,446 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_COMMON_DATA_H_ -#define AOM_AV1_COMMON_COMMON_DATA_H_ - -#include "av1/common/enums.h" -#include "aom/aom_integer.h" -#include "aom_dsp/aom_dsp_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Log 2 conversion lookup tables in units of mode info (4x4). -// The Mi_Width_Log2 table in the spec (Section 9.3. Conversion tables). -static const uint8_t mi_size_wide_log2[BLOCK_SIZES_ALL] = { - 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 0, 2, 1, 3, 2, 4 -}; -// The Mi_Height_Log2 table in the spec (Section 9.3. Conversion tables). -static const uint8_t mi_size_high_log2[BLOCK_SIZES_ALL] = { - 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 2, 0, 3, 1, 4, 2 -}; - -// Width/height lookup tables in units of mode info (4x4). -// The Num_4x4_Blocks_Wide table in the spec (Section 9.3. Conversion tables). -static const uint8_t mi_size_wide[BLOCK_SIZES_ALL] = { - 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 1, 4, 2, 8, 4, 16 -}; - -// The Num_4x4_Blocks_High table in the spec (Section 9.3. Conversion tables). -static const uint8_t mi_size_high[BLOCK_SIZES_ALL] = { - 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16, 32, 16, 32, 4, 1, 8, 2, 16, 4 -}; - -// Width/height lookup tables in units of samples. -// The Block_Width table in the spec (Section 9.3. Conversion tables). -static const uint8_t block_size_wide[BLOCK_SIZES_ALL] = { - 4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, - 64, 64, 64, 128, 128, 4, 16, 8, 32, 16, 64 -}; - -// The Block_Height table in the spec (Section 9.3. Conversion tables). -static const uint8_t block_size_high[BLOCK_SIZES_ALL] = { - 4, 8, 4, 8, 16, 8, 16, 32, 16, 32, 64, - 32, 64, 128, 64, 128, 16, 4, 32, 8, 64, 16 -}; - -// Maps a block size to a context. -// The Size_Group table in the spec (Section 9.3. Conversion tables). -// AOMMIN(3, AOMMIN(mi_size_wide_log2(bsize), mi_size_high_log2(bsize))) -static const uint8_t size_group_lookup[BLOCK_SIZES_ALL] = { - 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 0, 0, 1, 1, 2, 2 -}; - -static const uint8_t num_pels_log2_lookup[BLOCK_SIZES_ALL] = { - 4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, 13, 13, 14, 6, 6, 8, 8, 10, 10 -}; - -// A compressed version of the Partition_Subsize table in the spec (9.3. -// Conversion tables), for square block sizes only. -/* clang-format off */ -static const BLOCK_SIZE subsize_lookup[EXT_PARTITION_TYPES][SQR_BLOCK_SIZES] = { - { // PARTITION_NONE - BLOCK_4X4, BLOCK_8X8, BLOCK_16X16, - BLOCK_32X32, BLOCK_64X64, BLOCK_128X128 - }, { // PARTITION_HORZ - BLOCK_INVALID, BLOCK_8X4, BLOCK_16X8, - BLOCK_32X16, BLOCK_64X32, BLOCK_128X64 - }, { // PARTITION_VERT - BLOCK_INVALID, BLOCK_4X8, BLOCK_8X16, - BLOCK_16X32, BLOCK_32X64, BLOCK_64X128 - }, { // PARTITION_SPLIT - BLOCK_INVALID, BLOCK_4X4, BLOCK_8X8, - BLOCK_16X16, BLOCK_32X32, BLOCK_64X64 - }, { // PARTITION_HORZ_A - BLOCK_INVALID, BLOCK_8X4, BLOCK_16X8, - BLOCK_32X16, BLOCK_64X32, BLOCK_128X64 - }, { // PARTITION_HORZ_B - BLOCK_INVALID, BLOCK_8X4, BLOCK_16X8, - BLOCK_32X16, BLOCK_64X32, BLOCK_128X64 - }, { // PARTITION_VERT_A - BLOCK_INVALID, BLOCK_4X8, BLOCK_8X16, - BLOCK_16X32, BLOCK_32X64, BLOCK_64X128 - }, { // PARTITION_VERT_B - BLOCK_INVALID, BLOCK_4X8, BLOCK_8X16, - BLOCK_16X32, BLOCK_32X64, BLOCK_64X128 - }, { // PARTITION_HORZ_4 - BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X4, - BLOCK_32X8, BLOCK_64X16, BLOCK_INVALID - }, { // PARTITION_VERT_4 - BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X16, - BLOCK_8X32, BLOCK_16X64, BLOCK_INVALID - } -}; - -static const TX_SIZE max_txsize_lookup[BLOCK_SIZES_ALL] = { - // 4X4 - TX_4X4, - // 4X8, 8X4, 8X8 - TX_4X4, TX_4X4, TX_8X8, - // 8X16, 16X8, 16X16 - TX_8X8, TX_8X8, TX_16X16, - // 16X32, 32X16, 32X32 - TX_16X16, TX_16X16, TX_32X32, - // 32X64, 64X32, - TX_32X32, TX_32X32, - // 64X64 - TX_64X64, - // 64x128, 128x64, 128x128 - TX_64X64, TX_64X64, TX_64X64, - // 4x16, 16x4, 8x32 - TX_4X4, TX_4X4, TX_8X8, - // 32x8, 16x64 64x16 - TX_8X8, TX_16X16, TX_16X16 -}; - -static const TX_SIZE max_txsize_rect_lookup[BLOCK_SIZES_ALL] = { - // 4X4 - TX_4X4, - // 4X8, 8X4, 8X8 - TX_4X8, TX_8X4, TX_8X8, - // 8X16, 16X8, 16X16 - TX_8X16, TX_16X8, TX_16X16, - // 16X32, 32X16, 32X32 - TX_16X32, TX_32X16, TX_32X32, - // 32X64, 64X32, - TX_32X64, TX_64X32, - // 64X64 - TX_64X64, - // 64x128, 128x64, 128x128 - TX_64X64, TX_64X64, TX_64X64, - // 4x16, 16x4, - TX_4X16, TX_16X4, - // 8x32, 32x8 - TX_8X32, TX_32X8, - // 16x64, 64x16 - TX_16X64, TX_64X16 -}; - -static const TX_TYPE_1D vtx_tab[TX_TYPES] = { - DCT_1D, ADST_1D, DCT_1D, ADST_1D, - FLIPADST_1D, DCT_1D, FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D, - DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D, IDTX_1D, -}; - -static const TX_TYPE_1D htx_tab[TX_TYPES] = { - DCT_1D, DCT_1D, ADST_1D, ADST_1D, - DCT_1D, FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D, - IDTX_1D, DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D, -}; - -#define TXSIZE_CAT_INVALID (-1) - -/* clang-format on */ - -static const TX_SIZE sub_tx_size_map[TX_SIZES_ALL] = { - TX_4X4, // TX_4X4 - TX_4X4, // TX_8X8 - TX_8X8, // TX_16X16 - TX_16X16, // TX_32X32 - TX_32X32, // TX_64X64 - TX_4X4, // TX_4X8 - TX_4X4, // TX_8X4 - TX_8X8, // TX_8X16 - TX_8X8, // TX_16X8 - TX_16X16, // TX_16X32 - TX_16X16, // TX_32X16 - TX_32X32, // TX_32X64 - TX_32X32, // TX_64X32 - TX_4X8, // TX_4X16 - TX_8X4, // TX_16X4 - TX_8X16, // TX_8X32 - TX_16X8, // TX_32X8 - TX_16X32, // TX_16X64 - TX_32X16, // TX_64X16 -}; - -static const TX_SIZE txsize_horz_map[TX_SIZES_ALL] = { - TX_4X4, // TX_4X4 - TX_8X8, // TX_8X8 - TX_16X16, // TX_16X16 - TX_32X32, // TX_32X32 - TX_64X64, // TX_64X64 - TX_4X4, // TX_4X8 - TX_8X8, // TX_8X4 - TX_8X8, // TX_8X16 - TX_16X16, // TX_16X8 - TX_16X16, // TX_16X32 - TX_32X32, // TX_32X16 - TX_32X32, // TX_32X64 - TX_64X64, // TX_64X32 - TX_4X4, // TX_4X16 - TX_16X16, // TX_16X4 - TX_8X8, // TX_8X32 - TX_32X32, // TX_32X8 - TX_16X16, // TX_16X64 - TX_64X64, // TX_64X16 -}; - -static const TX_SIZE txsize_vert_map[TX_SIZES_ALL] = { - TX_4X4, // TX_4X4 - TX_8X8, // TX_8X8 - TX_16X16, // TX_16X16 - TX_32X32, // TX_32X32 - TX_64X64, // TX_64X64 - TX_8X8, // TX_4X8 - TX_4X4, // TX_8X4 - TX_16X16, // TX_8X16 - TX_8X8, // TX_16X8 - TX_32X32, // TX_16X32 - TX_16X16, // TX_32X16 - TX_64X64, // TX_32X64 - TX_32X32, // TX_64X32 - TX_16X16, // TX_4X16 - TX_4X4, // TX_16X4 - TX_32X32, // TX_8X32 - TX_8X8, // TX_32X8 - TX_64X64, // TX_16X64 - TX_16X16, // TX_64X16 -}; - -#define TX_SIZE_W_MIN 4 - -// Transform block width in pixels -static const int tx_size_wide[TX_SIZES_ALL] = { - 4, 8, 16, 32, 64, 4, 8, 8, 16, 16, 32, 32, 64, 4, 16, 8, 32, 16, 64, -}; - -#define TX_SIZE_H_MIN 4 - -// Transform block height in pixels -static const int tx_size_high[TX_SIZES_ALL] = { - 4, 8, 16, 32, 64, 8, 4, 16, 8, 32, 16, 64, 32, 16, 4, 32, 8, 64, 16, -}; - -// Transform block width in unit -static const int tx_size_wide_unit[TX_SIZES_ALL] = { - 1, 2, 4, 8, 16, 1, 2, 2, 4, 4, 8, 8, 16, 1, 4, 2, 8, 4, 16, -}; - -// Transform block height in unit -static const int tx_size_high_unit[TX_SIZES_ALL] = { - 1, 2, 4, 8, 16, 2, 1, 4, 2, 8, 4, 16, 8, 4, 1, 8, 2, 16, 4, -}; - -// Transform block width in log2 -static const int tx_size_wide_log2[TX_SIZES_ALL] = { - 2, 3, 4, 5, 6, 2, 3, 3, 4, 4, 5, 5, 6, 2, 4, 3, 5, 4, 6, -}; - -// Transform block height in log2 -static const int tx_size_high_log2[TX_SIZES_ALL] = { - 2, 3, 4, 5, 6, 3, 2, 4, 3, 5, 4, 6, 5, 4, 2, 5, 3, 6, 4, -}; - -static const int tx_size_2d[TX_SIZES_ALL + 1] = { - 16, 64, 256, 1024, 4096, 32, 32, 128, 128, 512, - 512, 2048, 2048, 64, 64, 256, 256, 1024, 1024, -}; - -static const BLOCK_SIZE txsize_to_bsize[TX_SIZES_ALL] = { - BLOCK_4X4, // TX_4X4 - BLOCK_8X8, // TX_8X8 - BLOCK_16X16, // TX_16X16 - BLOCK_32X32, // TX_32X32 - BLOCK_64X64, // TX_64X64 - BLOCK_4X8, // TX_4X8 - BLOCK_8X4, // TX_8X4 - BLOCK_8X16, // TX_8X16 - BLOCK_16X8, // TX_16X8 - BLOCK_16X32, // TX_16X32 - BLOCK_32X16, // TX_32X16 - BLOCK_32X64, // TX_32X64 - BLOCK_64X32, // TX_64X32 - BLOCK_4X16, // TX_4X16 - BLOCK_16X4, // TX_16X4 - BLOCK_8X32, // TX_8X32 - BLOCK_32X8, // TX_32X8 - BLOCK_16X64, // TX_16X64 - BLOCK_64X16, // TX_64X16 -}; - -static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = { - TX_4X4, // TX_4X4 - TX_8X8, // TX_8X8 - TX_16X16, // TX_16X16 - TX_32X32, // TX_32X32 - TX_64X64, // TX_64X64 - TX_4X4, // TX_4X8 - TX_4X4, // TX_8X4 - TX_8X8, // TX_8X16 - TX_8X8, // TX_16X8 - TX_16X16, // TX_16X32 - TX_16X16, // TX_32X16 - TX_32X32, // TX_32X64 - TX_32X32, // TX_64X32 - TX_4X4, // TX_4X16 - TX_4X4, // TX_16X4 - TX_8X8, // TX_8X32 - TX_8X8, // TX_32X8 - TX_16X16, // TX_16X64 - TX_16X16, // TX_64X16 -}; - -static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = { - TX_4X4, // TX_4X4 - TX_8X8, // TX_8X8 - TX_16X16, // TX_16X16 - TX_32X32, // TX_32X32 - TX_64X64, // TX_64X64 - TX_8X8, // TX_4X8 - TX_8X8, // TX_8X4 - TX_16X16, // TX_8X16 - TX_16X16, // TX_16X8 - TX_32X32, // TX_16X32 - TX_32X32, // TX_32X16 - TX_64X64, // TX_32X64 - TX_64X64, // TX_64X32 - TX_16X16, // TX_4X16 - TX_16X16, // TX_16X4 - TX_32X32, // TX_8X32 - TX_32X32, // TX_32X8 - TX_64X64, // TX_16X64 - TX_64X64, // TX_64X16 -}; - -static const int8_t txsize_log2_minus4[TX_SIZES_ALL] = { - 0, // TX_4X4 - 2, // TX_8X8 - 4, // TX_16X16 - 6, // TX_32X32 - 6, // TX_64X64 - 1, // TX_4X8 - 1, // TX_8X4 - 3, // TX_8X16 - 3, // TX_16X8 - 5, // TX_16X32 - 5, // TX_32X16 - 6, // TX_32X64 - 6, // TX_64X32 - 2, // TX_4X16 - 2, // TX_16X4 - 4, // TX_8X32 - 4, // TX_32X8 - 5, // TX_16X64 - 5, // TX_64X16 -}; - -/* clang-format off */ -static const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = { - TX_4X4, // ONLY_4X4 - TX_64X64, // TX_MODE_LARGEST - TX_64X64, // TX_MODE_SELECT -}; - -// The Subsampled_Size table in the spec (Section 5.11.38. Get plane residual -// size function). -static const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES_ALL][2][2] = { - // ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1 - // ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1 - { { BLOCK_4X4, BLOCK_4X4 }, { BLOCK_4X4, BLOCK_4X4 } }, - { { BLOCK_4X8, BLOCK_4X4 }, { BLOCK_INVALID, BLOCK_4X4 } }, - { { BLOCK_8X4, BLOCK_INVALID }, { BLOCK_4X4, BLOCK_4X4 } }, - { { BLOCK_8X8, BLOCK_8X4 }, { BLOCK_4X8, BLOCK_4X4 } }, - { { BLOCK_8X16, BLOCK_8X8 }, { BLOCK_INVALID, BLOCK_4X8 } }, - { { BLOCK_16X8, BLOCK_INVALID }, { BLOCK_8X8, BLOCK_8X4 } }, - { { BLOCK_16X16, BLOCK_16X8 }, { BLOCK_8X16, BLOCK_8X8 } }, - { { BLOCK_16X32, BLOCK_16X16 }, { BLOCK_INVALID, BLOCK_8X16 } }, - { { BLOCK_32X16, BLOCK_INVALID }, { BLOCK_16X16, BLOCK_16X8 } }, - { { BLOCK_32X32, BLOCK_32X16 }, { BLOCK_16X32, BLOCK_16X16 } }, - { { BLOCK_32X64, BLOCK_32X32 }, { BLOCK_INVALID, BLOCK_16X32 } }, - { { BLOCK_64X32, BLOCK_INVALID }, { BLOCK_32X32, BLOCK_32X16 } }, - { { BLOCK_64X64, BLOCK_64X32 }, { BLOCK_32X64, BLOCK_32X32 } }, - { { BLOCK_64X128, BLOCK_64X64 }, { BLOCK_INVALID, BLOCK_32X64 } }, - { { BLOCK_128X64, BLOCK_INVALID }, { BLOCK_64X64, BLOCK_64X32 } }, - { { BLOCK_128X128, BLOCK_128X64 }, { BLOCK_64X128, BLOCK_64X64 } }, - { { BLOCK_4X16, BLOCK_4X8 }, { BLOCK_INVALID, BLOCK_4X8 } }, - { { BLOCK_16X4, BLOCK_INVALID }, { BLOCK_8X4, BLOCK_8X4 } }, - { { BLOCK_8X32, BLOCK_8X16 }, { BLOCK_INVALID, BLOCK_4X16 } }, - { { BLOCK_32X8, BLOCK_INVALID }, { BLOCK_16X8, BLOCK_16X4 } }, - { { BLOCK_16X64, BLOCK_16X32 }, { BLOCK_INVALID, BLOCK_8X32 } }, - { { BLOCK_64X16, BLOCK_INVALID }, { BLOCK_32X16, BLOCK_32X8 } } -}; -/* clang-format on */ - -// Generates 5 bit field in which each bit set to 1 represents -// a blocksize partition 11111 means we split 128x128, 64x64, 32x32, 16x16 -// and 8x8. 10000 means we just split the 128x128 to 64x64 -/* clang-format off */ -static const struct { - PARTITION_CONTEXT above; - PARTITION_CONTEXT left; -} partition_context_lookup[BLOCK_SIZES_ALL] = { - { 31, 31 }, // 4X4 - {0b11111, 0b11111} - { 31, 30 }, // 4X8 - {0b11111, 0b11110} - { 30, 31 }, // 8X4 - {0b11110, 0b11111} - { 30, 30 }, // 8X8 - {0b11110, 0b11110} - { 30, 28 }, // 8X16 - {0b11110, 0b11100} - { 28, 30 }, // 16X8 - {0b11100, 0b11110} - { 28, 28 }, // 16X16 - {0b11100, 0b11100} - { 28, 24 }, // 16X32 - {0b11100, 0b11000} - { 24, 28 }, // 32X16 - {0b11000, 0b11100} - { 24, 24 }, // 32X32 - {0b11000, 0b11000} - { 24, 16 }, // 32X64 - {0b11000, 0b10000} - { 16, 24 }, // 64X32 - {0b10000, 0b11000} - { 16, 16 }, // 64X64 - {0b10000, 0b10000} - { 16, 0 }, // 64X128- {0b10000, 0b00000} - { 0, 16 }, // 128X64- {0b00000, 0b10000} - { 0, 0 }, // 128X128-{0b00000, 0b00000} - { 31, 28 }, // 4X16 - {0b11111, 0b11100} - { 28, 31 }, // 16X4 - {0b11100, 0b11111} - { 30, 24 }, // 8X32 - {0b11110, 0b11000} - { 24, 30 }, // 32X8 - {0b11000, 0b11110} - { 28, 16 }, // 16X64 - {0b11100, 0b10000} - { 16, 28 }, // 64X16 - {0b10000, 0b11100} -}; -/* clang-format on */ - -static const int intra_mode_context[INTRA_MODES] = { - 0, 1, 2, 3, 4, 4, 4, 4, 3, 0, 1, 2, 0, -}; - -// Note: this is also used in unit tests. So whenever one changes the table, -// the unit tests need to be changed accordingly. -static const int quant_dist_weight[4][2] = { - { 2, 3 }, { 2, 5 }, { 2, 7 }, { 1, MAX_FRAME_DISTANCE } -}; -static const int quant_dist_lookup_table[2][4][2] = { - { { 9, 7 }, { 11, 5 }, { 12, 4 }, { 13, 3 } }, - { { 7, 9 }, { 5, 11 }, { 4, 12 }, { 3, 13 } }, -}; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_COMMON_DATA_H_ diff --git a/third_party/aom/av1/common/convolve.c b/third_party/aom/av1/common/convolve.c deleted file mode 100644 index 1f11126fc..000000000 --- a/third_party/aom/av1/common/convolve.c +++ /dev/null @@ -1,1295 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <assert.h> -#include <string.h> - -#include "config/aom_dsp_rtcd.h" -#include "config/av1_rtcd.h" - -#include "av1/common/blockd.h" -#include "av1/common/convolve.h" -#include "av1/common/filter.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/resize.h" -#include "aom_dsp/aom_dsp_common.h" -#include "aom_ports/mem.h" - -void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, int w, int h, - const int16_t *x_filters, int x0_qn, - int x_step_qn) { - src -= UPSCALE_NORMATIVE_TAPS / 2 - 1; - for (int y = 0; y < h; ++y) { - int x_qn = x0_qn; - for (int x = 0; x < w; ++x) { - const uint8_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS]; - const int x_filter_idx = - (x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS; - assert(x_filter_idx <= RS_SUBPEL_MASK); - const int16_t *const x_filter = - &x_filters[x_filter_idx * UPSCALE_NORMATIVE_TAPS]; - int sum = 0; - for (int k = 0; k < UPSCALE_NORMATIVE_TAPS; ++k) - sum += src_x[k] * x_filter[k]; - dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); - x_qn += x_step_qn; - } - src += src_stride; - dst += dst_stride; - } -} - -void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, - uint16_t *dst, int dst_stride, int w, int h, - const int16_t *x_filters, int x0_qn, - int x_step_qn, int bd) { - src -= UPSCALE_NORMATIVE_TAPS / 2 - 1; - for (int y = 0; y < h; ++y) { - int x_qn = x0_qn; - for (int x = 0; x < w; ++x) { - const uint16_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS]; - const int x_filter_idx = - (x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS; - assert(x_filter_idx <= RS_SUBPEL_MASK); - const int16_t *const x_filter = - &x_filters[x_filter_idx * UPSCALE_NORMATIVE_TAPS]; - int sum = 0; - for (int k = 0; k < UPSCALE_NORMATIVE_TAPS; ++k) - sum += src_x[k] * x_filter[k]; - dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); - x_qn += x_step_qn; - } - src += src_stride; - dst += dst_stride; - } -} - -void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]; - int im_h = h + filter_params_y->taps - 1; - int im_stride = w; - const int fo_vert = filter_params_y->taps / 2 - 1; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const int bd = 8; - const int bits = - FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1; - - // horizontal filter - const uint8_t *src_horiz = src - fo_vert * src_stride; - const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( - filter_params_x, subpel_x_q4 & SUBPEL_MASK); - for (int y = 0; y < im_h; ++y) { - for (int x = 0; x < w; ++x) { - int32_t sum = (1 << (bd + FILTER_BITS - 1)); - for (int k = 0; k < filter_params_x->taps; ++k) { - sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k]; - } - assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1))); - im_block[y * im_stride + x] = - (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0); - } - } - - // vertical filter - int16_t *src_vert = im_block + fo_vert * im_stride; - const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( - filter_params_y, subpel_y_q4 & SUBPEL_MASK); - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - for (int y = 0; y < h; ++y) { - for (int x = 0; x < w; ++x) { - int32_t sum = 1 << offset_bits; - for (int k = 0; k < filter_params_y->taps; ++k) { - sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x]; - } - assert(0 <= sum && sum < (1 << (offset_bits + 2))); - int16_t res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) - - ((1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1))); - dst[y * dst_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(res, bits)); - } - } -} - -void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - const int fo_vert = filter_params_y->taps / 2 - 1; - (void)filter_params_x; - (void)subpel_x_q4; - (void)conv_params; - - assert(conv_params->round_0 <= FILTER_BITS); - assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) || - ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS))); - - // vertical filter - const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( - filter_params_y, subpel_y_q4 & SUBPEL_MASK); - for (int y = 0; y < h; ++y) { - for (int x = 0; x < w; ++x) { - int32_t res = 0; - for (int k = 0; k < filter_params_y->taps; ++k) { - res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x]; - } - dst[y * dst_stride + x] = - clip_pixel(ROUND_POWER_OF_TWO(res, FILTER_BITS)); - } - } -} - -void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - const int fo_horiz = filter_params_x->taps / 2 - 1; - const int bits = FILTER_BITS - conv_params->round_0; - (void)filter_params_y; - (void)subpel_y_q4; - (void)conv_params; - - assert(bits >= 0); - assert((FILTER_BITS - conv_params->round_1) >= 0 || - ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS)); - - // horizontal filter - const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( - filter_params_x, subpel_x_q4 & SUBPEL_MASK); - - for (int y = 0; y < h; ++y) { - for (int x = 0; x < w; ++x) { - int32_t res = 0; - for (int k = 0; k < filter_params_x->taps; ++k) { - res += x_filter[k] * src[y * src_stride + x - fo_horiz + k]; - } - res = ROUND_POWER_OF_TWO(res, conv_params->round_0); - dst[y * dst_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(res, bits)); - } - } -} - -void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - (void)filter_params_x; - (void)filter_params_y; - (void)subpel_x_q4; - (void)subpel_y_q4; - (void)conv_params; - - for (int y = 0; y < h; ++y) { - memcpy(dst + y * dst_stride, src + y * src_stride, w * sizeof(src[0])); - } -} - -void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst8, - int dst8_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]; - int im_h = h + filter_params_y->taps - 1; - int im_stride = w; - const int fo_vert = filter_params_y->taps / 2 - 1; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const int bd = 8; - const int round_bits = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - - // horizontal filter - const uint8_t *src_horiz = src - fo_vert * src_stride; - const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( - filter_params_x, subpel_x_q4 & SUBPEL_MASK); - for (int y = 0; y < im_h; ++y) { - for (int x = 0; x < w; ++x) { - int32_t sum = (1 << (bd + FILTER_BITS - 1)); - for (int k = 0; k < filter_params_x->taps; ++k) { - sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k]; - } - assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1))); - im_block[y * im_stride + x] = - (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0); - } - } - - // vertical filter - int16_t *src_vert = im_block + fo_vert * im_stride; - const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( - filter_params_y, subpel_y_q4 & SUBPEL_MASK); - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - for (int y = 0; y < h; ++y) { - for (int x = 0; x < w; ++x) { - int32_t sum = 1 << offset_bits; - for (int k = 0; k < filter_params_y->taps; ++k) { - sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x]; - } - assert(0 <= sum && sum < (1 << (offset_bits + 2))); - CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1); - if (conv_params->do_average) { - int32_t tmp = dst[y * dst_stride + x]; - if (conv_params->use_jnt_comp_avg) { - tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; - tmp = tmp >> DIST_PRECISION_BITS; - } else { - tmp += res; - tmp = tmp >> 1; - } - tmp -= (1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1)); - dst8[y * dst8_stride + x] = - clip_pixel(ROUND_POWER_OF_TWO(tmp, round_bits)); - } else { - dst[y * dst_stride + x] = res; - } - } - } -} - -void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst8, - int dst8_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - const int fo_vert = filter_params_y->taps / 2 - 1; - const int bits = FILTER_BITS - conv_params->round_0; - const int bd = 8; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - const int round_offset = (1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1)); - const int round_bits = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - (void)filter_params_x; - (void)subpel_x_q4; - - // vertical filter - const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( - filter_params_y, subpel_y_q4 & SUBPEL_MASK); - for (int y = 0; y < h; ++y) { - for (int x = 0; x < w; ++x) { - int32_t res = 0; - for (int k = 0; k < filter_params_y->taps; ++k) { - res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x]; - } - res *= (1 << bits); - res = ROUND_POWER_OF_TWO(res, conv_params->round_1) + round_offset; - - if (conv_params->do_average) { - int32_t tmp = dst[y * dst_stride + x]; - if (conv_params->use_jnt_comp_avg) { - tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; - tmp = tmp >> DIST_PRECISION_BITS; - } else { - tmp += res; - tmp = tmp >> 1; - } - tmp -= round_offset; - dst8[y * dst8_stride + x] = - clip_pixel(ROUND_POWER_OF_TWO(tmp, round_bits)); - } else { - dst[y * dst_stride + x] = res; - } - } - } -} - -void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst8, - int dst8_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const int bits = FILTER_BITS - conv_params->round_1; - const int bd = 8; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - const int round_offset = (1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1)); - const int round_bits = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - (void)filter_params_y; - (void)subpel_y_q4; - - // horizontal filter - const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( - filter_params_x, subpel_x_q4 & SUBPEL_MASK); - for (int y = 0; y < h; ++y) { - for (int x = 0; x < w; ++x) { - int32_t res = 0; - for (int k = 0; k < filter_params_x->taps; ++k) { - res += x_filter[k] * src[y * src_stride + x - fo_horiz + k]; - } - res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0); - res += round_offset; - - if (conv_params->do_average) { - int32_t tmp = dst[y * dst_stride + x]; - if (conv_params->use_jnt_comp_avg) { - tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; - tmp = tmp >> DIST_PRECISION_BITS; - } else { - tmp += res; - tmp = tmp >> 1; - } - tmp -= round_offset; - dst8[y * dst8_stride + x] = - clip_pixel(ROUND_POWER_OF_TWO(tmp, round_bits)); - } else { - dst[y * dst_stride + x] = res; - } - } - } -} - -void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, - uint8_t *dst8, int dst8_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - const int bits = - FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0; - const int bd = 8; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - const int round_offset = (1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1)); - (void)filter_params_x; - (void)filter_params_y; - (void)subpel_x_q4; - (void)subpel_y_q4; - - for (int y = 0; y < h; ++y) { - for (int x = 0; x < w; ++x) { - CONV_BUF_TYPE res = src[y * src_stride + x] << bits; - res += round_offset; - - if (conv_params->do_average) { - int32_t tmp = dst[y * dst_stride + x]; - if (conv_params->use_jnt_comp_avg) { - tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; - tmp = tmp >> DIST_PRECISION_BITS; - } else { - tmp += res; - tmp = tmp >> 1; - } - tmp -= round_offset; - dst8[y * dst8_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits)); - } else { - dst[y * dst_stride + x] = res; - } - } - } -} - -void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst8, - int dst8_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_qn, const int x_step_qn, - const int subpel_y_qn, const int y_step_qn, - ConvolveParams *conv_params) { - int16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE]; - int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) + - filter_params_y->taps; - CONV_BUF_TYPE *dst16 = conv_params->dst; - const int dst16_stride = conv_params->dst_stride; - const int bits = - FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1; - assert(bits >= 0); - int im_stride = w; - const int fo_vert = filter_params_y->taps / 2 - 1; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const int bd = 8; - - // horizontal filter - const uint8_t *src_horiz = src - fo_vert * src_stride; - for (int y = 0; y < im_h; ++y) { - int x_qn = subpel_x_qn; - for (int x = 0; x < w; ++x, x_qn += x_step_qn) { - const uint8_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)]; - const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS; - assert(x_filter_idx < SUBPEL_SHIFTS); - const int16_t *x_filter = - av1_get_interp_filter_subpel_kernel(filter_params_x, x_filter_idx); - int32_t sum = (1 << (bd + FILTER_BITS - 1)); - for (int k = 0; k < filter_params_x->taps; ++k) { - sum += x_filter[k] * src_x[k - fo_horiz]; - } - assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1))); - im_block[y * im_stride + x] = - (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0); - } - src_horiz += src_stride; - } - - // vertical filter - int16_t *src_vert = im_block + fo_vert * im_stride; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - for (int x = 0; x < w; ++x) { - int y_qn = subpel_y_qn; - for (int y = 0; y < h; ++y, y_qn += y_step_qn) { - const int16_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride]; - const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS; - assert(y_filter_idx < SUBPEL_SHIFTS); - const int16_t *y_filter = - av1_get_interp_filter_subpel_kernel(filter_params_y, y_filter_idx); - int32_t sum = 1 << offset_bits; - for (int k = 0; k < filter_params_y->taps; ++k) { - sum += y_filter[k] * src_y[(k - fo_vert) * im_stride]; - } - assert(0 <= sum && sum < (1 << (offset_bits + 2))); - CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1); - if (conv_params->is_compound) { - if (conv_params->do_average) { - int32_t tmp = dst16[y * dst16_stride + x]; - if (conv_params->use_jnt_comp_avg) { - tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; - tmp = tmp >> DIST_PRECISION_BITS; - } else { - tmp += res; - tmp = tmp >> 1; - } - /* Subtract round offset and convolve round */ - tmp = tmp - ((1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1))); - dst8[y * dst8_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits)); - } else { - dst16[y * dst16_stride + x] = res; - } - } else { - /* Subtract round offset and convolve round */ - int32_t tmp = res - ((1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1))); - dst8[y * dst8_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits)); - } - } - src_vert++; - } -} - -static void convolve_2d_scale_wrapper( - const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, - int h, const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, const int subpel_x_qn, - const int x_step_qn, const int subpel_y_qn, const int y_step_qn, - ConvolveParams *conv_params) { - if (conv_params->is_compound) { - assert(conv_params->dst != NULL); - } - av1_convolve_2d_scale(src, src_stride, dst, dst_stride, w, h, filter_params_x, - filter_params_y, subpel_x_qn, x_step_qn, subpel_y_qn, - y_step_qn, conv_params); -} - -// TODO(huisu@google.com): bilinear filtering only needs 2 taps in general. So -// we may create optimized code to do 2-tap filtering for all bilinear filtering -// usages, not just IntraBC. -static void convolve_2d_for_intrabc(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, int w, int h, - int subpel_x_q4, int subpel_y_q4, - ConvolveParams *conv_params) { - const InterpFilterParams *filter_params_x = - subpel_x_q4 ? &av1_intrabc_filter_params : NULL; - const InterpFilterParams *filter_params_y = - subpel_y_q4 ? &av1_intrabc_filter_params : NULL; - if (subpel_x_q4 != 0 && subpel_y_q4 != 0) { - av1_convolve_2d_sr_c(src, src_stride, dst, dst_stride, w, h, - filter_params_x, filter_params_y, 0, 0, conv_params); - } else if (subpel_x_q4 != 0) { - av1_convolve_x_sr_c(src, src_stride, dst, dst_stride, w, h, filter_params_x, - filter_params_y, 0, 0, conv_params); - } else { - av1_convolve_y_sr_c(src, src_stride, dst, dst_stride, w, h, filter_params_x, - filter_params_y, 0, 0, conv_params); - } -} - -void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, int w, int h, - InterpFilters interp_filters, const int subpel_x_q4, - int x_step_q4, const int subpel_y_q4, int y_step_q4, - int scaled, ConvolveParams *conv_params, - const struct scale_factors *sf, int is_intrabc) { - assert(IMPLIES(is_intrabc, !scaled)); - (void)x_step_q4; - (void)y_step_q4; - (void)dst; - (void)dst_stride; - - if (is_intrabc && (subpel_x_q4 != 0 || subpel_y_q4 != 0)) { - convolve_2d_for_intrabc(src, src_stride, dst, dst_stride, w, h, subpel_x_q4, - subpel_y_q4, conv_params); - return; - } - - InterpFilter filter_x = 0; - InterpFilter filter_y = 0; - const int need_filter_params_x = (subpel_x_q4 != 0) | scaled; - const int need_filter_params_y = (subpel_y_q4 != 0) | scaled; - if (need_filter_params_x) - filter_x = av1_extract_interp_filter(interp_filters, 1); - if (need_filter_params_y) - filter_y = av1_extract_interp_filter(interp_filters, 0); - const InterpFilterParams *filter_params_x = - need_filter_params_x - ? av1_get_interp_filter_params_with_block_size(filter_x, w) - : NULL; - const InterpFilterParams *filter_params_y = - need_filter_params_y - ? av1_get_interp_filter_params_with_block_size(filter_y, h) - : NULL; - - if (scaled) { - convolve_2d_scale_wrapper(src, src_stride, dst, dst_stride, w, h, - filter_params_x, filter_params_y, subpel_x_q4, - x_step_q4, subpel_y_q4, y_step_q4, conv_params); - } else { - sf->convolve[subpel_x_q4 != 0][subpel_y_q4 != 0][conv_params->is_compound]( - src, src_stride, dst, dst_stride, w, h, filter_params_x, - filter_params_y, subpel_x_q4, subpel_y_q4, conv_params); - } -} - -void av1_highbd_convolve_2d_copy_sr_c( - const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, - int h, const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, const int subpel_x_q4, - const int subpel_y_q4, ConvolveParams *conv_params, int bd) { - (void)filter_params_x; - (void)filter_params_y; - (void)subpel_x_q4; - (void)subpel_y_q4; - (void)conv_params; - (void)bd; - - for (int y = 0; y < h; ++y) { - memcpy(dst + y * dst_stride, src + y * src_stride, w * sizeof(src[0])); - } -} - -void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, - uint16_t *dst, int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params, int bd) { - const int fo_horiz = filter_params_x->taps / 2 - 1; - const int bits = FILTER_BITS - conv_params->round_0; - (void)filter_params_y; - (void)subpel_y_q4; - - assert(bits >= 0); - assert((FILTER_BITS - conv_params->round_1) >= 0 || - ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS)); - - // horizontal filter - const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( - filter_params_x, subpel_x_q4 & SUBPEL_MASK); - for (int y = 0; y < h; ++y) { - for (int x = 0; x < w; ++x) { - int32_t res = 0; - for (int k = 0; k < filter_params_x->taps; ++k) { - res += x_filter[k] * src[y * src_stride + x - fo_horiz + k]; - } - res = ROUND_POWER_OF_TWO(res, conv_params->round_0); - dst[y * dst_stride + x] = - clip_pixel_highbd(ROUND_POWER_OF_TWO(res, bits), bd); - } - } -} - -void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, - uint16_t *dst, int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params, int bd) { - const int fo_vert = filter_params_y->taps / 2 - 1; - (void)filter_params_x; - (void)subpel_x_q4; - (void)conv_params; - - assert(conv_params->round_0 <= FILTER_BITS); - assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) || - ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS))); - // vertical filter - const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( - filter_params_y, subpel_y_q4 & SUBPEL_MASK); - for (int y = 0; y < h; ++y) { - for (int x = 0; x < w; ++x) { - int32_t res = 0; - for (int k = 0; k < filter_params_y->taps; ++k) { - res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x]; - } - dst[y * dst_stride + x] = - clip_pixel_highbd(ROUND_POWER_OF_TWO(res, FILTER_BITS), bd); - } - } -} - -void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, - uint16_t *dst, int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params, int bd) { - int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]; - int im_h = h + filter_params_y->taps - 1; - int im_stride = w; - const int fo_vert = filter_params_y->taps / 2 - 1; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const int bits = - FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1; - assert(bits >= 0); - - // horizontal filter - const uint16_t *src_horiz = src - fo_vert * src_stride; - const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( - filter_params_x, subpel_x_q4 & SUBPEL_MASK); - for (int y = 0; y < im_h; ++y) { - for (int x = 0; x < w; ++x) { - int32_t sum = (1 << (bd + FILTER_BITS - 1)); - for (int k = 0; k < filter_params_x->taps; ++k) { - sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k]; - } - assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1))); - im_block[y * im_stride + x] = - ROUND_POWER_OF_TWO(sum, conv_params->round_0); - } - } - - // vertical filter - int16_t *src_vert = im_block + fo_vert * im_stride; - const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( - filter_params_y, subpel_y_q4 & SUBPEL_MASK); - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - for (int y = 0; y < h; ++y) { - for (int x = 0; x < w; ++x) { - int32_t sum = 1 << offset_bits; - for (int k = 0; k < filter_params_y->taps; ++k) { - sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x]; - } - assert(0 <= sum && sum < (1 << (offset_bits + 2))); - int32_t res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) - - ((1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1))); - dst[y * dst_stride + x] = - clip_pixel_highbd(ROUND_POWER_OF_TWO(res, bits), bd); - } - } -} - -void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, - uint16_t *dst16, int dst16_stride, int w, - int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params, int bd) { - int x, y, k; - int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]; - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - int im_h = h + filter_params_y->taps - 1; - int im_stride = w; - const int fo_vert = filter_params_y->taps / 2 - 1; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const int round_bits = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - assert(round_bits >= 0); - - // horizontal filter - const uint16_t *src_horiz = src - fo_vert * src_stride; - const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( - filter_params_x, subpel_x_q4 & SUBPEL_MASK); - for (y = 0; y < im_h; ++y) { - for (x = 0; x < w; ++x) { - int32_t sum = (1 << (bd + FILTER_BITS - 1)); - for (k = 0; k < filter_params_x->taps; ++k) { - sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k]; - } - assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1))); - (void)bd; - im_block[y * im_stride + x] = - (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0); - } - } - - // vertical filter - int16_t *src_vert = im_block + fo_vert * im_stride; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( - filter_params_y, subpel_y_q4 & SUBPEL_MASK); - for (y = 0; y < h; ++y) { - for (x = 0; x < w; ++x) { - int32_t sum = 1 << offset_bits; - for (k = 0; k < filter_params_y->taps; ++k) { - sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x]; - } - assert(0 <= sum && sum < (1 << (offset_bits + 2))); - CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1); - if (conv_params->do_average) { - int32_t tmp = dst[y * dst_stride + x]; - if (conv_params->use_jnt_comp_avg) { - tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; - tmp = tmp >> DIST_PRECISION_BITS; - } else { - tmp += res; - tmp = tmp >> 1; - } - tmp -= (1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1)); - dst16[y * dst16_stride + x] = - clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), bd); - } else { - dst[y * dst_stride + x] = res; - } - } - } -} - -void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, - uint16_t *dst16, int dst16_stride, int w, - int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params, int bd) { - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const int bits = FILTER_BITS - conv_params->round_1; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - const int round_offset = (1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1)); - const int round_bits = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - assert(round_bits >= 0); - (void)filter_params_y; - (void)subpel_y_q4; - assert(bits >= 0); - // horizontal filter - const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( - filter_params_x, subpel_x_q4 & SUBPEL_MASK); - for (int y = 0; y < h; ++y) { - for (int x = 0; x < w; ++x) { - int32_t res = 0; - for (int k = 0; k < filter_params_x->taps; ++k) { - res += x_filter[k] * src[y * src_stride + x - fo_horiz + k]; - } - res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0); - res += round_offset; - - if (conv_params->do_average) { - int32_t tmp = dst[y * dst_stride + x]; - if (conv_params->use_jnt_comp_avg) { - tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; - tmp = tmp >> DIST_PRECISION_BITS; - } else { - tmp += res; - tmp = tmp >> 1; - } - tmp -= round_offset; - dst16[y * dst16_stride + x] = - clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), bd); - } else { - dst[y * dst_stride + x] = res; - } - } - } -} - -void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, - uint16_t *dst16, int dst16_stride, int w, - int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params, int bd) { - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - const int fo_vert = filter_params_y->taps / 2 - 1; - const int bits = FILTER_BITS - conv_params->round_0; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - const int round_offset = (1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1)); - const int round_bits = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - assert(round_bits >= 0); - (void)filter_params_x; - (void)subpel_x_q4; - assert(bits >= 0); - // vertical filter - const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( - filter_params_y, subpel_y_q4 & SUBPEL_MASK); - for (int y = 0; y < h; ++y) { - for (int x = 0; x < w; ++x) { - int32_t res = 0; - for (int k = 0; k < filter_params_y->taps; ++k) { - res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x]; - } - res *= (1 << bits); - res = ROUND_POWER_OF_TWO(res, conv_params->round_1) + round_offset; - - if (conv_params->do_average) { - int32_t tmp = dst[y * dst_stride + x]; - if (conv_params->use_jnt_comp_avg) { - tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; - tmp = tmp >> DIST_PRECISION_BITS; - } else { - tmp += res; - tmp = tmp >> 1; - } - tmp -= round_offset; - dst16[y * dst16_stride + x] = - clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), bd); - } else { - dst[y * dst_stride + x] = res; - } - } - } -} - -void av1_highbd_jnt_convolve_2d_copy_c( - const uint16_t *src, int src_stride, uint16_t *dst16, int dst16_stride, - int w, int h, const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, const int subpel_x_q4, - const int subpel_y_q4, ConvolveParams *conv_params, int bd) { - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - const int bits = - FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - const int round_offset = (1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1)); - assert(bits >= 0); - (void)filter_params_x; - (void)filter_params_y; - (void)subpel_x_q4; - (void)subpel_y_q4; - - for (int y = 0; y < h; ++y) { - for (int x = 0; x < w; ++x) { - CONV_BUF_TYPE res = src[y * src_stride + x] << bits; - res += round_offset; - if (conv_params->do_average) { - int32_t tmp = dst[y * dst_stride + x]; - if (conv_params->use_jnt_comp_avg) { - tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; - tmp = tmp >> DIST_PRECISION_BITS; - } else { - tmp += res; - tmp = tmp >> 1; - } - tmp -= round_offset; - dst16[y * dst16_stride + x] = - clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd); - } else { - dst[y * dst_stride + x] = res; - } - } - } -} - -void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, - uint16_t *dst, int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_qn, const int x_step_qn, - const int subpel_y_qn, const int y_step_qn, - ConvolveParams *conv_params, int bd) { - int16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE]; - int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) + - filter_params_y->taps; - int im_stride = w; - const int fo_vert = filter_params_y->taps / 2 - 1; - const int fo_horiz = filter_params_x->taps / 2 - 1; - CONV_BUF_TYPE *dst16 = conv_params->dst; - const int dst16_stride = conv_params->dst_stride; - const int bits = - FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1; - assert(bits >= 0); - // horizontal filter - const uint16_t *src_horiz = src - fo_vert * src_stride; - for (int y = 0; y < im_h; ++y) { - int x_qn = subpel_x_qn; - for (int x = 0; x < w; ++x, x_qn += x_step_qn) { - const uint16_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)]; - const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS; - assert(x_filter_idx < SUBPEL_SHIFTS); - const int16_t *x_filter = - av1_get_interp_filter_subpel_kernel(filter_params_x, x_filter_idx); - int32_t sum = (1 << (bd + FILTER_BITS - 1)); - for (int k = 0; k < filter_params_x->taps; ++k) { - sum += x_filter[k] * src_x[k - fo_horiz]; - } - assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1))); - im_block[y * im_stride + x] = - (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0); - } - src_horiz += src_stride; - } - - // vertical filter - int16_t *src_vert = im_block + fo_vert * im_stride; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - for (int x = 0; x < w; ++x) { - int y_qn = subpel_y_qn; - for (int y = 0; y < h; ++y, y_qn += y_step_qn) { - const int16_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride]; - const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS; - assert(y_filter_idx < SUBPEL_SHIFTS); - const int16_t *y_filter = - av1_get_interp_filter_subpel_kernel(filter_params_y, y_filter_idx); - int32_t sum = 1 << offset_bits; - for (int k = 0; k < filter_params_y->taps; ++k) { - sum += y_filter[k] * src_y[(k - fo_vert) * im_stride]; - } - assert(0 <= sum && sum < (1 << (offset_bits + 2))); - CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1); - if (conv_params->is_compound) { - if (conv_params->do_average) { - int32_t tmp = dst16[y * dst16_stride + x]; - if (conv_params->use_jnt_comp_avg) { - tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; - tmp = tmp >> DIST_PRECISION_BITS; - } else { - tmp += res; - tmp = tmp >> 1; - } - /* Subtract round offset and convolve round */ - tmp = tmp - ((1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1))); - dst[y * dst_stride + x] = - clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd); - } else { - dst16[y * dst16_stride + x] = res; - } - } else { - /* Subtract round offset and convolve round */ - int32_t tmp = res - ((1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1))); - dst[y * dst_stride + x] = - clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd); - } - } - src_vert++; - } -} - -static void highbd_convolve_2d_for_intrabc(const uint16_t *src, int src_stride, - uint16_t *dst, int dst_stride, int w, - int h, int subpel_x_q4, - int subpel_y_q4, - ConvolveParams *conv_params, - int bd) { - const InterpFilterParams *filter_params_x = - subpel_x_q4 ? &av1_intrabc_filter_params : NULL; - const InterpFilterParams *filter_params_y = - subpel_y_q4 ? &av1_intrabc_filter_params : NULL; - if (subpel_x_q4 != 0 && subpel_y_q4 != 0) { - av1_highbd_convolve_2d_sr_c(src, src_stride, dst, dst_stride, w, h, - filter_params_x, filter_params_y, 0, 0, - conv_params, bd); - } else if (subpel_x_q4 != 0) { - av1_highbd_convolve_x_sr_c(src, src_stride, dst, dst_stride, w, h, - filter_params_x, filter_params_y, 0, 0, - conv_params, bd); - } else { - av1_highbd_convolve_y_sr_c(src, src_stride, dst, dst_stride, w, h, - filter_params_x, filter_params_y, 0, 0, - conv_params, bd); - } -} - -void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride, - uint8_t *dst8, int dst_stride, int w, int h, - InterpFilters interp_filters, - const int subpel_x_q4, int x_step_q4, - const int subpel_y_q4, int y_step_q4, - int scaled, ConvolveParams *conv_params, - const struct scale_factors *sf, - int is_intrabc, int bd) { - assert(IMPLIES(is_intrabc, !scaled)); - (void)x_step_q4; - (void)y_step_q4; - (void)dst_stride; - const uint16_t *src = CONVERT_TO_SHORTPTR(src8); - - if (is_intrabc && (subpel_x_q4 != 0 || subpel_y_q4 != 0)) { - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - highbd_convolve_2d_for_intrabc(src, src_stride, dst, dst_stride, w, h, - subpel_x_q4, subpel_y_q4, conv_params, bd); - return; - } - - InterpFilter filter_x = 0; - InterpFilter filter_y = 0; - const int need_filter_params_x = (subpel_x_q4 != 0) | scaled; - const int need_filter_params_y = (subpel_y_q4 != 0) | scaled; - if (need_filter_params_x) - filter_x = av1_extract_interp_filter(interp_filters, 1); - if (need_filter_params_y) - filter_y = av1_extract_interp_filter(interp_filters, 0); - const InterpFilterParams *filter_params_x = - need_filter_params_x - ? av1_get_interp_filter_params_with_block_size(filter_x, w) - : NULL; - const InterpFilterParams *filter_params_y = - need_filter_params_y - ? av1_get_interp_filter_params_with_block_size(filter_y, h) - : NULL; - - if (scaled) { - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - if (conv_params->is_compound) { - assert(conv_params->dst != NULL); - } - av1_highbd_convolve_2d_scale(src, src_stride, dst, dst_stride, w, h, - filter_params_x, filter_params_y, subpel_x_q4, - x_step_q4, subpel_y_q4, y_step_q4, conv_params, - bd); - } else { - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - - sf->highbd_convolve[subpel_x_q4 != 0][subpel_y_q4 != - 0][conv_params->is_compound]( - src, src_stride, dst, dst_stride, w, h, filter_params_x, - filter_params_y, subpel_x_q4, subpel_y_q4, conv_params, bd); - } -} - -// Note: Fixed size intermediate buffers, place limits on parameters -// of some functions. 2d filtering proceeds in 2 steps: -// (1) Interpolate horizontally into an intermediate buffer, temp. -// (2) Interpolate temp vertically to derive the sub-pixel result. -// Deriving the maximum number of rows in the temp buffer (135): -// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). -// --Largest block size is 128x128 pixels. -// --128 rows in the downscaled frame span a distance of (128 - 1) * 32 in the -// original frame (in 1/16th pixel units). -// --Must round-up because block may be located at sub-pixel position. -// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. -// --((128 - 1) * 32 + 15) >> 4 + 8 = 263. -#define WIENER_MAX_EXT_SIZE 263 - -static INLINE int horz_scalar_product(const uint8_t *a, const int16_t *b) { - int sum = 0; - for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k] * b[k]; - return sum; -} - -static INLINE int highbd_horz_scalar_product(const uint16_t *a, - const int16_t *b) { - int sum = 0; - for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k] * b[k]; - return sum; -} - -static INLINE int highbd_vert_scalar_product(const uint16_t *a, - ptrdiff_t a_stride, - const int16_t *b) { - int sum = 0; - for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k * a_stride] * b[k]; - return sum; -} - -static const InterpKernel *get_filter_base(const int16_t *filter) { - // NOTE: This assumes that the filter table is 256-byte aligned. - // TODO(agrange) Modify to make independent of table alignment. - return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); -} - -static int get_filter_offset(const int16_t *f, const InterpKernel *base) { - return (int)((const InterpKernel *)(intptr_t)f - base); -} - -static void convolve_add_src_horiz_hip(const uint8_t *src, ptrdiff_t src_stride, - uint16_t *dst, ptrdiff_t dst_stride, - const InterpKernel *x_filters, int x0_q4, - int x_step_q4, int w, int h, - int round0_bits) { - const int bd = 8; - src -= SUBPEL_TAPS / 2 - 1; - for (int y = 0; y < h; ++y) { - int x_q4 = x0_q4; - for (int x = 0; x < w; ++x) { - const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; - const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; - const int rounding = ((int)src_x[SUBPEL_TAPS / 2 - 1] << FILTER_BITS) + - (1 << (bd + FILTER_BITS - 1)); - const int sum = horz_scalar_product(src_x, x_filter) + rounding; - dst[x] = (uint16_t)clamp(ROUND_POWER_OF_TWO(sum, round0_bits), 0, - WIENER_CLAMP_LIMIT(round0_bits, bd) - 1); - x_q4 += x_step_q4; - } - src += src_stride; - dst += dst_stride; - } -} - -static void convolve_add_src_vert_hip(const uint16_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *y_filters, int y0_q4, - int y_step_q4, int w, int h, - int round1_bits) { - const int bd = 8; - src -= src_stride * (SUBPEL_TAPS / 2 - 1); - - for (int x = 0; x < w; ++x) { - int y_q4 = y0_q4; - for (int y = 0; y < h; ++y) { - const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; - const int rounding = - ((int)src_y[(SUBPEL_TAPS / 2 - 1) * src_stride] << FILTER_BITS) - - (1 << (bd + round1_bits - 1)); - const int sum = - highbd_vert_scalar_product(src_y, src_stride, y_filter) + rounding; - dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, round1_bits)); - y_q4 += y_step_q4; - } - ++src; - ++dst; - } -} - -void av1_wiener_convolve_add_src_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, - const ConvolveParams *conv_params) { - const InterpKernel *const filters_x = get_filter_base(filter_x); - const int x0_q4 = get_filter_offset(filter_x, filters_x); - - const InterpKernel *const filters_y = get_filter_base(filter_y); - const int y0_q4 = get_filter_offset(filter_y, filters_y); - - uint16_t temp[WIENER_MAX_EXT_SIZE * MAX_SB_SIZE]; - const int intermediate_height = - (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS - 1; - memset(temp + (intermediate_height * MAX_SB_SIZE), 0, MAX_SB_SIZE); - - assert(w <= MAX_SB_SIZE); - assert(h <= MAX_SB_SIZE); - assert(y_step_q4 <= 32); - assert(x_step_q4 <= 32); - - convolve_add_src_horiz_hip(src - src_stride * (SUBPEL_TAPS / 2 - 1), - src_stride, temp, MAX_SB_SIZE, filters_x, x0_q4, - x_step_q4, w, intermediate_height, - conv_params->round_0); - convolve_add_src_vert_hip(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), - MAX_SB_SIZE, dst, dst_stride, filters_y, y0_q4, - y_step_q4, w, h, conv_params->round_1); -} - -static void highbd_convolve_add_src_horiz_hip( - const uint8_t *src8, ptrdiff_t src_stride, uint16_t *dst, - ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4, - int x_step_q4, int w, int h, int round0_bits, int bd) { - const int extraprec_clamp_limit = WIENER_CLAMP_LIMIT(round0_bits, bd); - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - src -= SUBPEL_TAPS / 2 - 1; - for (int y = 0; y < h; ++y) { - int x_q4 = x0_q4; - for (int x = 0; x < w; ++x) { - const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; - const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; - const int rounding = ((int)src_x[SUBPEL_TAPS / 2 - 1] << FILTER_BITS) + - (1 << (bd + FILTER_BITS - 1)); - const int sum = highbd_horz_scalar_product(src_x, x_filter) + rounding; - dst[x] = (uint16_t)clamp(ROUND_POWER_OF_TWO(sum, round0_bits), 0, - extraprec_clamp_limit - 1); - x_q4 += x_step_q4; - } - src += src_stride; - dst += dst_stride; - } -} - -static void highbd_convolve_add_src_vert_hip( - const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst8, - ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4, - int y_step_q4, int w, int h, int round1_bits, int bd) { - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - src -= src_stride * (SUBPEL_TAPS / 2 - 1); - for (int x = 0; x < w; ++x) { - int y_q4 = y0_q4; - for (int y = 0; y < h; ++y) { - const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; - const int rounding = - ((int)src_y[(SUBPEL_TAPS / 2 - 1) * src_stride] << FILTER_BITS) - - (1 << (bd + round1_bits - 1)); - const int sum = - highbd_vert_scalar_product(src_y, src_stride, y_filter) + rounding; - dst[y * dst_stride] = - clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, round1_bits), bd); - y_q4 += y_step_q4; - } - ++src; - ++dst; - } -} - -void av1_highbd_wiener_convolve_add_src_c( - const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, - ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, int w, int h, - const ConvolveParams *conv_params, int bd) { - const InterpKernel *const filters_x = get_filter_base(filter_x); - const int x0_q4 = get_filter_offset(filter_x, filters_x); - - const InterpKernel *const filters_y = get_filter_base(filter_y); - const int y0_q4 = get_filter_offset(filter_y, filters_y); - - uint16_t temp[WIENER_MAX_EXT_SIZE * MAX_SB_SIZE]; - const int intermediate_height = - (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; - - assert(w <= MAX_SB_SIZE); - assert(h <= MAX_SB_SIZE); - assert(y_step_q4 <= 32); - assert(x_step_q4 <= 32); - assert(bd + FILTER_BITS - conv_params->round_0 + 2 <= 16); - - highbd_convolve_add_src_horiz_hip(src - src_stride * (SUBPEL_TAPS / 2 - 1), - src_stride, temp, MAX_SB_SIZE, filters_x, - x0_q4, x_step_q4, w, intermediate_height, - conv_params->round_0, bd); - highbd_convolve_add_src_vert_hip( - temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE, dst, dst_stride, - filters_y, y0_q4, y_step_q4, w, h, conv_params->round_1, bd); -} diff --git a/third_party/aom/av1/common/convolve.h b/third_party/aom/av1/common/convolve.h deleted file mode 100644 index 4109dd843..000000000 --- a/third_party/aom/av1/common/convolve.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_CONVOLVE_H_ -#define AOM_AV1_COMMON_CONVOLVE_H_ -#include "av1/common/filter.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef uint16_t CONV_BUF_TYPE; -typedef struct ConvolveParams { - int do_average; - CONV_BUF_TYPE *dst; - int dst_stride; - int round_0; - int round_1; - int plane; - int is_compound; - int use_jnt_comp_avg; - int fwd_offset; - int bck_offset; -} ConvolveParams; - -#define ROUND0_BITS 3 -#define COMPOUND_ROUND1_BITS 7 -#define WIENER_ROUND0_BITS 3 - -#define WIENER_CLAMP_LIMIT(r0, bd) (1 << ((bd) + 1 + FILTER_BITS - r0)) - -typedef void (*aom_convolve_fn_t)(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params); - -typedef void (*aom_highbd_convolve_fn_t)( - const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, - int h, const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, const int subpel_x_q4, - const int subpel_y_q4, ConvolveParams *conv_params, int bd); - -struct AV1Common; -struct scale_factors; - -void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, int w, int h, - InterpFilters interp_filters, const int subpel_x_q4, - int x_step_q4, const int subpel_y_q4, int y_step_q4, - int scaled, ConvolveParams *conv_params, - const struct scale_factors *sf, int is_intrabc); - -static INLINE ConvolveParams get_conv_params_no_round(int do_average, int plane, - CONV_BUF_TYPE *dst, - int dst_stride, - int is_compound, int bd) { - ConvolveParams conv_params; - conv_params.do_average = do_average; - assert(IMPLIES(do_average, is_compound)); - conv_params.is_compound = is_compound; - conv_params.round_0 = ROUND0_BITS; - conv_params.round_1 = is_compound ? COMPOUND_ROUND1_BITS - : 2 * FILTER_BITS - conv_params.round_0; - const int intbufrange = bd + FILTER_BITS - conv_params.round_0 + 2; - assert(IMPLIES(bd < 12, intbufrange <= 16)); - if (intbufrange > 16) { - conv_params.round_0 += intbufrange - 16; - if (!is_compound) conv_params.round_1 -= intbufrange - 16; - } - // TODO(yunqing): The following dst should only be valid while - // is_compound = 1; - conv_params.dst = dst; - conv_params.dst_stride = dst_stride; - conv_params.plane = plane; - return conv_params; -} - -static INLINE ConvolveParams get_conv_params(int do_average, int plane, - int bd) { - return get_conv_params_no_round(do_average, plane, NULL, 0, 0, bd); -} - -static INLINE ConvolveParams get_conv_params_wiener(int bd) { - ConvolveParams conv_params; - (void)bd; - conv_params.do_average = 0; - conv_params.is_compound = 0; - conv_params.round_0 = WIENER_ROUND0_BITS; - conv_params.round_1 = 2 * FILTER_BITS - conv_params.round_0; - const int intbufrange = bd + FILTER_BITS - conv_params.round_0 + 2; - assert(IMPLIES(bd < 12, intbufrange <= 16)); - if (intbufrange > 16) { - conv_params.round_0 += intbufrange - 16; - conv_params.round_1 -= intbufrange - 16; - } - conv_params.dst = NULL; - conv_params.dst_stride = 0; - conv_params.plane = 0; - return conv_params; -} - -void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride, - uint8_t *dst, int dst_stride, int w, int h, - InterpFilters interp_filters, - const int subpel_x_q4, int x_step_q4, - const int subpel_y_q4, int y_step_q4, - int scaled, ConvolveParams *conv_params, - const struct scale_factors *sf, - int is_intrabc, int bd); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_CONVOLVE_H_ diff --git a/third_party/aom/av1/common/debugmodes.c b/third_party/aom/av1/common/debugmodes.c deleted file mode 100644 index 868f341b5..000000000 --- a/third_party/aom/av1/common/debugmodes.c +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <stdio.h> - -#include "av1/common/blockd.h" -#include "av1/common/enums.h" -#include "av1/common/onyxc_int.h" - -static void log_frame_info(AV1_COMMON *cm, const char *str, FILE *f) { - fprintf(f, "%s", str); - fprintf(f, "(Frame %d, Show:%d, Q:%d): \n", cm->current_video_frame, - cm->show_frame, cm->base_qindex); -} -/* This function dereferences a pointer to the mbmi structure - * and uses the passed in member offset to print out the value of an integer - * for each mbmi member value in the mi structure. - */ -static void print_mi_data(AV1_COMMON *cm, FILE *file, const char *descriptor, - size_t member_offset) { - int mi_row, mi_col; - MB_MODE_INFO **mi = cm->mi_grid_visible; - int rows = cm->mi_rows; - int cols = cm->mi_cols; - char prefix = descriptor[0]; - - log_frame_info(cm, descriptor, file); - for (mi_row = 0; mi_row < rows; mi_row++) { - fprintf(file, "%c ", prefix); - for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(file, "%2d ", *((char *)((char *)(mi[0]) + member_offset))); - mi++; - } - fprintf(file, "\n"); - mi += MAX_MIB_SIZE; - } - fprintf(file, "\n"); -} - -void av1_print_modes_and_motion_vectors(AV1_COMMON *cm, const char *file) { - int mi_row; - int mi_col; - FILE *mvs = fopen(file, "a"); - MB_MODE_INFO **mi = cm->mi_grid_visible; - int rows = cm->mi_rows; - int cols = cm->mi_cols; - - print_mi_data(cm, mvs, "Partitions:", offsetof(MB_MODE_INFO, sb_type)); - print_mi_data(cm, mvs, "Modes:", offsetof(MB_MODE_INFO, mode)); - print_mi_data(cm, mvs, "Ref frame:", offsetof(MB_MODE_INFO, ref_frame[0])); - print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, tx_size)); - print_mi_data(cm, mvs, "UV Modes:", offsetof(MB_MODE_INFO, uv_mode)); - - // output skip infomation. - log_frame_info(cm, "Skips:", mvs); - for (mi_row = 0; mi_row < rows; mi_row++) { - fprintf(mvs, "S "); - for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(mvs, "%2d ", mi[0]->skip); - mi++; - } - fprintf(mvs, "\n"); - mi += MAX_MIB_SIZE; - } - fprintf(mvs, "\n"); - - // output motion vectors. - log_frame_info(cm, "Vectors ", mvs); - mi = cm->mi_grid_visible; - for (mi_row = 0; mi_row < rows; mi_row++) { - fprintf(mvs, "V "); - for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(mvs, "%4d:%4d ", mi[0]->mv[0].as_mv.row, mi[0]->mv[0].as_mv.col); - mi++; - } - fprintf(mvs, "\n"); - mi += MAX_MIB_SIZE; - } - fprintf(mvs, "\n"); - - fclose(mvs); -} - -void av1_print_uncompressed_frame_header(const uint8_t *data, int size, - const char *filename) { - FILE *hdrFile = fopen(filename, "w"); - fwrite(data, size, sizeof(uint8_t), hdrFile); - fclose(hdrFile); -} - -void av1_print_frame_contexts(const FRAME_CONTEXT *fc, const char *filename) { - FILE *fcFile = fopen(filename, "w"); - const uint16_t *fcp = (uint16_t *)fc; - const unsigned int n_contexts = sizeof(FRAME_CONTEXT) / sizeof(uint16_t); - unsigned int i; - - for (i = 0; i < n_contexts; ++i) fprintf(fcFile, "%d ", *fcp++); - fclose(fcFile); -} diff --git a/third_party/aom/av1/common/entropy.c b/third_party/aom/av1/common/entropy.c deleted file mode 100644 index 4f95ef69b..000000000 --- a/third_party/aom/av1/common/entropy.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "config/aom_config.h" - -#include "aom/aom_integer.h" -#include "aom_mem/aom_mem.h" -#include "av1/common/blockd.h" -#include "av1/common/entropy.h" -#include "av1/common/entropymode.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/scan.h" -#include "av1/common/token_cdfs.h" -#include "av1/common/txb_common.h" - -static int get_q_ctx(int q) { - if (q <= 20) return 0; - if (q <= 60) return 1; - if (q <= 120) return 2; - return 3; -} - -void av1_default_coef_probs(AV1_COMMON *cm) { - const int index = get_q_ctx(cm->base_qindex); -#if CONFIG_ENTROPY_STATS - cm->coef_cdf_category = index; -#endif - - av1_copy(cm->fc->txb_skip_cdf, av1_default_txb_skip_cdfs[index]); - av1_copy(cm->fc->eob_extra_cdf, av1_default_eob_extra_cdfs[index]); - av1_copy(cm->fc->dc_sign_cdf, av1_default_dc_sign_cdfs[index]); - av1_copy(cm->fc->coeff_br_cdf, av1_default_coeff_lps_multi_cdfs[index]); - av1_copy(cm->fc->coeff_base_cdf, av1_default_coeff_base_multi_cdfs[index]); - av1_copy(cm->fc->coeff_base_eob_cdf, - av1_default_coeff_base_eob_multi_cdfs[index]); - av1_copy(cm->fc->eob_flag_cdf16, av1_default_eob_multi16_cdfs[index]); - av1_copy(cm->fc->eob_flag_cdf32, av1_default_eob_multi32_cdfs[index]); - av1_copy(cm->fc->eob_flag_cdf64, av1_default_eob_multi64_cdfs[index]); - av1_copy(cm->fc->eob_flag_cdf128, av1_default_eob_multi128_cdfs[index]); - av1_copy(cm->fc->eob_flag_cdf256, av1_default_eob_multi256_cdfs[index]); - av1_copy(cm->fc->eob_flag_cdf512, av1_default_eob_multi512_cdfs[index]); - av1_copy(cm->fc->eob_flag_cdf1024, av1_default_eob_multi1024_cdfs[index]); -} - -static void reset_cdf_symbol_counter(aom_cdf_prob *cdf_ptr, int num_cdfs, - int cdf_stride, int nsymbs) { - for (int i = 0; i < num_cdfs; i++) { - cdf_ptr[i * cdf_stride + nsymbs] = 0; - } -} - -#define RESET_CDF_COUNTER(cname, nsymbs) \ - RESET_CDF_COUNTER_STRIDE(cname, nsymbs, CDF_SIZE(nsymbs)) - -#define RESET_CDF_COUNTER_STRIDE(cname, nsymbs, cdf_stride) \ - do { \ - aom_cdf_prob *cdf_ptr = (aom_cdf_prob *)cname; \ - int array_size = (int)sizeof(cname) / sizeof(aom_cdf_prob); \ - int num_cdfs = array_size / cdf_stride; \ - reset_cdf_symbol_counter(cdf_ptr, num_cdfs, cdf_stride, nsymbs); \ - } while (0) - -static void reset_nmv_counter(nmv_context *nmv) { - RESET_CDF_COUNTER(nmv->joints_cdf, 4); - for (int i = 0; i < 2; i++) { - RESET_CDF_COUNTER(nmv->comps[i].classes_cdf, MV_CLASSES); - RESET_CDF_COUNTER(nmv->comps[i].class0_fp_cdf, MV_FP_SIZE); - RESET_CDF_COUNTER(nmv->comps[i].fp_cdf, MV_FP_SIZE); - RESET_CDF_COUNTER(nmv->comps[i].sign_cdf, 2); - RESET_CDF_COUNTER(nmv->comps[i].class0_hp_cdf, 2); - RESET_CDF_COUNTER(nmv->comps[i].hp_cdf, 2); - RESET_CDF_COUNTER(nmv->comps[i].class0_cdf, CLASS0_SIZE); - RESET_CDF_COUNTER(nmv->comps[i].bits_cdf, 2); - } -} - -void av1_reset_cdf_symbol_counters(FRAME_CONTEXT *fc) { - RESET_CDF_COUNTER(fc->txb_skip_cdf, 2); - RESET_CDF_COUNTER(fc->eob_extra_cdf, 2); - RESET_CDF_COUNTER(fc->dc_sign_cdf, 2); - RESET_CDF_COUNTER(fc->eob_flag_cdf16, 5); - RESET_CDF_COUNTER(fc->eob_flag_cdf32, 6); - RESET_CDF_COUNTER(fc->eob_flag_cdf64, 7); - RESET_CDF_COUNTER(fc->eob_flag_cdf128, 8); - RESET_CDF_COUNTER(fc->eob_flag_cdf256, 9); - RESET_CDF_COUNTER(fc->eob_flag_cdf512, 10); - RESET_CDF_COUNTER(fc->eob_flag_cdf1024, 11); - RESET_CDF_COUNTER(fc->coeff_base_eob_cdf, 3); - RESET_CDF_COUNTER(fc->coeff_base_cdf, 4); - RESET_CDF_COUNTER(fc->coeff_br_cdf, BR_CDF_SIZE); - RESET_CDF_COUNTER(fc->newmv_cdf, 2); - RESET_CDF_COUNTER(fc->zeromv_cdf, 2); - RESET_CDF_COUNTER(fc->refmv_cdf, 2); - RESET_CDF_COUNTER(fc->drl_cdf, 2); - RESET_CDF_COUNTER(fc->inter_compound_mode_cdf, INTER_COMPOUND_MODES); - RESET_CDF_COUNTER(fc->compound_type_cdf, COMPOUND_TYPES - 1); - RESET_CDF_COUNTER(fc->wedge_idx_cdf, 16); - RESET_CDF_COUNTER(fc->interintra_cdf, 2); - RESET_CDF_COUNTER(fc->wedge_interintra_cdf, 2); - RESET_CDF_COUNTER(fc->interintra_mode_cdf, INTERINTRA_MODES); - RESET_CDF_COUNTER(fc->motion_mode_cdf, MOTION_MODES); - RESET_CDF_COUNTER(fc->obmc_cdf, 2); - RESET_CDF_COUNTER(fc->palette_y_size_cdf, PALETTE_SIZES); - RESET_CDF_COUNTER(fc->palette_uv_size_cdf, PALETTE_SIZES); - for (int j = 0; j < PALETTE_SIZES; j++) { - int nsymbs = j + PALETTE_MIN_SIZE; - RESET_CDF_COUNTER_STRIDE(fc->palette_y_color_index_cdf[j], nsymbs, - CDF_SIZE(PALETTE_COLORS)); - RESET_CDF_COUNTER_STRIDE(fc->palette_uv_color_index_cdf[j], nsymbs, - CDF_SIZE(PALETTE_COLORS)); - } - RESET_CDF_COUNTER(fc->palette_y_mode_cdf, 2); - RESET_CDF_COUNTER(fc->palette_uv_mode_cdf, 2); - RESET_CDF_COUNTER(fc->comp_inter_cdf, 2); - RESET_CDF_COUNTER(fc->single_ref_cdf, 2); - RESET_CDF_COUNTER(fc->comp_ref_type_cdf, 2); - RESET_CDF_COUNTER(fc->uni_comp_ref_cdf, 2); - RESET_CDF_COUNTER(fc->comp_ref_cdf, 2); - RESET_CDF_COUNTER(fc->comp_bwdref_cdf, 2); - RESET_CDF_COUNTER(fc->txfm_partition_cdf, 2); - RESET_CDF_COUNTER(fc->compound_index_cdf, 2); - RESET_CDF_COUNTER(fc->comp_group_idx_cdf, 2); - RESET_CDF_COUNTER(fc->skip_mode_cdfs, 2); - RESET_CDF_COUNTER(fc->skip_cdfs, 2); - RESET_CDF_COUNTER(fc->intra_inter_cdf, 2); - reset_nmv_counter(&fc->nmvc); - reset_nmv_counter(&fc->ndvc); - RESET_CDF_COUNTER(fc->intrabc_cdf, 2); - RESET_CDF_COUNTER(fc->seg.tree_cdf, MAX_SEGMENTS); - RESET_CDF_COUNTER(fc->seg.pred_cdf, 2); - RESET_CDF_COUNTER(fc->seg.spatial_pred_seg_cdf, MAX_SEGMENTS); - RESET_CDF_COUNTER(fc->filter_intra_cdfs, 2); - RESET_CDF_COUNTER(fc->filter_intra_mode_cdf, FILTER_INTRA_MODES); - RESET_CDF_COUNTER(fc->switchable_restore_cdf, RESTORE_SWITCHABLE_TYPES); - RESET_CDF_COUNTER(fc->wiener_restore_cdf, 2); - RESET_CDF_COUNTER(fc->sgrproj_restore_cdf, 2); - RESET_CDF_COUNTER(fc->y_mode_cdf, INTRA_MODES); - RESET_CDF_COUNTER_STRIDE(fc->uv_mode_cdf[0], UV_INTRA_MODES - 1, - CDF_SIZE(UV_INTRA_MODES)); - RESET_CDF_COUNTER(fc->uv_mode_cdf[1], UV_INTRA_MODES); - for (int i = 0; i < PARTITION_CONTEXTS; i++) { - if (i < 4) { - RESET_CDF_COUNTER_STRIDE(fc->partition_cdf[i], 4, CDF_SIZE(10)); - } else if (i < 16) { - RESET_CDF_COUNTER(fc->partition_cdf[i], 10); - } else { - RESET_CDF_COUNTER_STRIDE(fc->partition_cdf[i], 8, CDF_SIZE(10)); - } - } - RESET_CDF_COUNTER(fc->switchable_interp_cdf, SWITCHABLE_FILTERS); - RESET_CDF_COUNTER(fc->kf_y_cdf, INTRA_MODES); - RESET_CDF_COUNTER(fc->angle_delta_cdf, 2 * MAX_ANGLE_DELTA + 1); - RESET_CDF_COUNTER_STRIDE(fc->tx_size_cdf[0], MAX_TX_DEPTH, - CDF_SIZE(MAX_TX_DEPTH + 1)); - RESET_CDF_COUNTER(fc->tx_size_cdf[1], MAX_TX_DEPTH + 1); - RESET_CDF_COUNTER(fc->tx_size_cdf[2], MAX_TX_DEPTH + 1); - RESET_CDF_COUNTER(fc->tx_size_cdf[3], MAX_TX_DEPTH + 1); - RESET_CDF_COUNTER(fc->delta_q_cdf, DELTA_Q_PROBS + 1); - RESET_CDF_COUNTER(fc->delta_lf_cdf, DELTA_LF_PROBS + 1); - for (int i = 0; i < FRAME_LF_COUNT; i++) { - RESET_CDF_COUNTER(fc->delta_lf_multi_cdf[i], DELTA_LF_PROBS + 1); - } - RESET_CDF_COUNTER_STRIDE(fc->intra_ext_tx_cdf[1], 7, CDF_SIZE(TX_TYPES)); - RESET_CDF_COUNTER_STRIDE(fc->intra_ext_tx_cdf[2], 5, CDF_SIZE(TX_TYPES)); - RESET_CDF_COUNTER_STRIDE(fc->inter_ext_tx_cdf[1], 16, CDF_SIZE(TX_TYPES)); - RESET_CDF_COUNTER_STRIDE(fc->inter_ext_tx_cdf[2], 12, CDF_SIZE(TX_TYPES)); - RESET_CDF_COUNTER_STRIDE(fc->inter_ext_tx_cdf[3], 2, CDF_SIZE(TX_TYPES)); - RESET_CDF_COUNTER(fc->cfl_sign_cdf, CFL_JOINT_SIGNS); - RESET_CDF_COUNTER(fc->cfl_alpha_cdf, CFL_ALPHABET_SIZE); -} diff --git a/third_party/aom/av1/common/entropy.h b/third_party/aom/av1/common/entropy.h deleted file mode 100644 index 991692c2f..000000000 --- a/third_party/aom/av1/common/entropy.h +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_ENTROPY_H_ -#define AOM_AV1_COMMON_ENTROPY_H_ - -#include "config/aom_config.h" - -#include "aom/aom_integer.h" -#include "aom_dsp/prob.h" - -#include "av1/common/common.h" -#include "av1/common/common_data.h" -#include "av1/common/enums.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define TOKEN_CDF_Q_CTXS 4 - -#define TXB_SKIP_CONTEXTS 13 - -#define EOB_COEF_CONTEXTS 9 - -#define SIG_COEF_CONTEXTS_2D 26 -#define SIG_COEF_CONTEXTS_1D 16 -#define SIG_COEF_CONTEXTS_EOB 4 -#define SIG_COEF_CONTEXTS (SIG_COEF_CONTEXTS_2D + SIG_COEF_CONTEXTS_1D) - -#define COEFF_BASE_CONTEXTS (SIG_COEF_CONTEXTS) -#define DC_SIGN_CONTEXTS 3 - -#define BR_TMP_OFFSET 12 -#define BR_REF_CAT 4 -#define LEVEL_CONTEXTS 21 - -#define NUM_BASE_LEVELS 2 - -#define BR_CDF_SIZE (4) -#define COEFF_BASE_RANGE (4 * (BR_CDF_SIZE - 1)) - -#define COEFF_CONTEXT_BITS 6 -#define COEFF_CONTEXT_MASK ((1 << COEFF_CONTEXT_BITS) - 1) -#define MAX_BASE_BR_RANGE (COEFF_BASE_RANGE + NUM_BASE_LEVELS + 1) - -#define BASE_CONTEXT_POSITION_NUM 12 - -typedef enum TX_CLASS { - TX_CLASS_2D = 0, - TX_CLASS_HORIZ = 1, - TX_CLASS_VERT = 2, - TX_CLASSES = 3, -} TX_CLASS; - -#define DCT_MAX_VALUE 16384 -#define DCT_MAX_VALUE_HIGH10 65536 -#define DCT_MAX_VALUE_HIGH12 262144 - -/* Coefficients are predicted via a 3-dimensional probability table indexed on - * REF_TYPES, COEF_BANDS and COEF_CONTEXTS. */ -#define REF_TYPES 2 // intra=0, inter=1 - -struct AV1Common; -struct frame_contexts; -void av1_reset_cdf_symbol_counters(struct frame_contexts *fc); -void av1_default_coef_probs(struct AV1Common *cm); - -struct frame_contexts; - -typedef char ENTROPY_CONTEXT; - -static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a, - ENTROPY_CONTEXT b) { - return (a != 0) + (b != 0); -} - -static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a, - const ENTROPY_CONTEXT *l) { - ENTROPY_CONTEXT above_ec = 0, left_ec = 0; - - switch (tx_size) { - case TX_4X4: - above_ec = a[0] != 0; - left_ec = l[0] != 0; - break; - case TX_4X8: - above_ec = a[0] != 0; - left_ec = !!*(const uint16_t *)l; - break; - case TX_8X4: - above_ec = !!*(const uint16_t *)a; - left_ec = l[0] != 0; - break; - case TX_8X16: - above_ec = !!*(const uint16_t *)a; - left_ec = !!*(const uint32_t *)l; - break; - case TX_16X8: - above_ec = !!*(const uint32_t *)a; - left_ec = !!*(const uint16_t *)l; - break; - case TX_16X32: - above_ec = !!*(const uint32_t *)a; - left_ec = !!*(const uint64_t *)l; - break; - case TX_32X16: - above_ec = !!*(const uint64_t *)a; - left_ec = !!*(const uint32_t *)l; - break; - case TX_8X8: - above_ec = !!*(const uint16_t *)a; - left_ec = !!*(const uint16_t *)l; - break; - case TX_16X16: - above_ec = !!*(const uint32_t *)a; - left_ec = !!*(const uint32_t *)l; - break; - case TX_32X32: - above_ec = !!*(const uint64_t *)a; - left_ec = !!*(const uint64_t *)l; - break; - case TX_64X64: - above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8)); - left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8)); - break; - case TX_32X64: - above_ec = !!*(const uint64_t *)a; - left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8)); - break; - case TX_64X32: - above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8)); - left_ec = !!*(const uint64_t *)l; - break; - case TX_4X16: - above_ec = a[0] != 0; - left_ec = !!*(const uint32_t *)l; - break; - case TX_16X4: - above_ec = !!*(const uint32_t *)a; - left_ec = l[0] != 0; - break; - case TX_8X32: - above_ec = !!*(const uint16_t *)a; - left_ec = !!*(const uint64_t *)l; - break; - case TX_32X8: - above_ec = !!*(const uint64_t *)a; - left_ec = !!*(const uint16_t *)l; - break; - case TX_16X64: - above_ec = !!*(const uint32_t *)a; - left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8)); - break; - case TX_64X16: - above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8)); - left_ec = !!*(const uint32_t *)l; - break; - default: assert(0 && "Invalid transform size."); break; - } - return combine_entropy_contexts(above_ec, left_ec); -} - -static INLINE TX_SIZE get_txsize_entropy_ctx(TX_SIZE txsize) { - return (TX_SIZE)((txsize_sqr_map[txsize] + txsize_sqr_up_map[txsize] + 1) >> - 1); -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_ENTROPY_H_ diff --git a/third_party/aom/av1/common/entropymode.c b/third_party/aom/av1/common/entropymode.c deleted file mode 100644 index 41dc30ddb..000000000 --- a/third_party/aom/av1/common/entropymode.c +++ /dev/null @@ -1,1103 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "aom_mem/aom_mem.h" - -#include "av1/common/reconinter.h" -#include "av1/common/scan.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/seg_common.h" -#include "av1/common/txb_common.h" - -static const aom_cdf_prob - default_kf_y_mode_cdf[KF_MODE_CONTEXTS][KF_MODE_CONTEXTS][CDF_SIZE( - INTRA_MODES)] = { - { { AOM_CDF13(15588, 17027, 19338, 20218, 20682, 21110, 21825, 23244, - 24189, 28165, 29093, 30466) }, - { AOM_CDF13(12016, 18066, 19516, 20303, 20719, 21444, 21888, 23032, - 24434, 28658, 30172, 31409) }, - { AOM_CDF13(10052, 10771, 22296, 22788, 23055, 23239, 24133, 25620, - 26160, 29336, 29929, 31567) }, - { AOM_CDF13(14091, 15406, 16442, 18808, 19136, 19546, 19998, 22096, - 24746, 29585, 30958, 32462) }, - { AOM_CDF13(12122, 13265, 15603, 16501, 18609, 20033, 22391, 25583, - 26437, 30261, 31073, 32475) } }, - { { AOM_CDF13(10023, 19585, 20848, 21440, 21832, 22760, 23089, 24023, - 25381, 29014, 30482, 31436) }, - { AOM_CDF13(5983, 24099, 24560, 24886, 25066, 25795, 25913, 26423, - 27610, 29905, 31276, 31794) }, - { AOM_CDF13(7444, 12781, 20177, 20728, 21077, 21607, 22170, 23405, - 24469, 27915, 29090, 30492) }, - { AOM_CDF13(8537, 14689, 15432, 17087, 17408, 18172, 18408, 19825, - 24649, 29153, 31096, 32210) }, - { AOM_CDF13(7543, 14231, 15496, 16195, 17905, 20717, 21984, 24516, - 26001, 29675, 30981, 31994) } }, - { { AOM_CDF13(12613, 13591, 21383, 22004, 22312, 22577, 23401, 25055, - 25729, 29538, 30305, 32077) }, - { AOM_CDF13(9687, 13470, 18506, 19230, 19604, 20147, 20695, 22062, - 23219, 27743, 29211, 30907) }, - { AOM_CDF13(6183, 6505, 26024, 26252, 26366, 26434, 27082, 28354, 28555, - 30467, 30794, 32086) }, - { AOM_CDF13(10718, 11734, 14954, 17224, 17565, 17924, 18561, 21523, - 23878, 28975, 30287, 32252) }, - { AOM_CDF13(9194, 9858, 16501, 17263, 18424, 19171, 21563, 25961, 26561, - 30072, 30737, 32463) } }, - { { AOM_CDF13(12602, 14399, 15488, 18381, 18778, 19315, 19724, 21419, - 25060, 29696, 30917, 32409) }, - { AOM_CDF13(8203, 13821, 14524, 17105, 17439, 18131, 18404, 19468, - 25225, 29485, 31158, 32342) }, - { AOM_CDF13(8451, 9731, 15004, 17643, 18012, 18425, 19070, 21538, 24605, - 29118, 30078, 32018) }, - { AOM_CDF13(7714, 9048, 9516, 16667, 16817, 16994, 17153, 18767, 26743, - 30389, 31536, 32528) }, - { AOM_CDF13(8843, 10280, 11496, 15317, 16652, 17943, 19108, 22718, - 25769, 29953, 30983, 32485) } }, - { { AOM_CDF13(12578, 13671, 15979, 16834, 19075, 20913, 22989, 25449, - 26219, 30214, 31150, 32477) }, - { AOM_CDF13(9563, 13626, 15080, 15892, 17756, 20863, 22207, 24236, - 25380, 29653, 31143, 32277) }, - { AOM_CDF13(8356, 8901, 17616, 18256, 19350, 20106, 22598, 25947, 26466, - 29900, 30523, 32261) }, - { AOM_CDF13(10835, 11815, 13124, 16042, 17018, 18039, 18947, 22753, - 24615, 29489, 30883, 32482) }, - { AOM_CDF13(7618, 8288, 9859, 10509, 15386, 18657, 22903, 28776, 29180, - 31355, 31802, 32593) } } - }; - -static const aom_cdf_prob default_angle_delta_cdf[DIRECTIONAL_MODES][CDF_SIZE( - 2 * MAX_ANGLE_DELTA + 1)] = { - { AOM_CDF7(2180, 5032, 7567, 22776, 26989, 30217) }, - { AOM_CDF7(2301, 5608, 8801, 23487, 26974, 30330) }, - { AOM_CDF7(3780, 11018, 13699, 19354, 23083, 31286) }, - { AOM_CDF7(4581, 11226, 15147, 17138, 21834, 28397) }, - { AOM_CDF7(1737, 10927, 14509, 19588, 22745, 28823) }, - { AOM_CDF7(2664, 10176, 12485, 17650, 21600, 30495) }, - { AOM_CDF7(2240, 11096, 15453, 20341, 22561, 28917) }, - { AOM_CDF7(3605, 10428, 12459, 17676, 21244, 30655) } -}; - -static const aom_cdf_prob default_if_y_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE( - INTRA_MODES)] = { { AOM_CDF13(22801, 23489, 24293, 24756, 25601, 26123, - 26606, 27418, 27945, 29228, 29685, 30349) }, - { AOM_CDF13(18673, 19845, 22631, 23318, 23950, 24649, - 25527, 27364, 28152, 29701, 29984, 30852) }, - { AOM_CDF13(19770, 20979, 23396, 23939, 24241, 24654, - 25136, 27073, 27830, 29360, 29730, 30659) }, - { AOM_CDF13(20155, 21301, 22838, 23178, 23261, 23533, - 23703, 24804, 25352, 26575, 27016, 28049) } }; - -static const aom_cdf_prob - default_uv_mode_cdf[CFL_ALLOWED_TYPES][INTRA_MODES][CDF_SIZE( - UV_INTRA_MODES)] = { - { { AOM_CDF13(22631, 24152, 25378, 25661, 25986, 26520, 27055, 27923, - 28244, 30059, 30941, 31961) }, - { AOM_CDF13(9513, 26881, 26973, 27046, 27118, 27664, 27739, 27824, - 28359, 29505, 29800, 31796) }, - { AOM_CDF13(9845, 9915, 28663, 28704, 28757, 28780, 29198, 29822, 29854, - 30764, 31777, 32029) }, - { AOM_CDF13(13639, 13897, 14171, 25331, 25606, 25727, 25953, 27148, - 28577, 30612, 31355, 32493) }, - { AOM_CDF13(9764, 9835, 9930, 9954, 25386, 27053, 27958, 28148, 28243, - 31101, 31744, 32363) }, - { AOM_CDF13(11825, 13589, 13677, 13720, 15048, 29213, 29301, 29458, - 29711, 31161, 31441, 32550) }, - { AOM_CDF13(14175, 14399, 16608, 16821, 17718, 17775, 28551, 30200, - 30245, 31837, 32342, 32667) }, - { AOM_CDF13(12885, 13038, 14978, 15590, 15673, 15748, 16176, 29128, - 29267, 30643, 31961, 32461) }, - { AOM_CDF13(12026, 13661, 13874, 15305, 15490, 15726, 15995, 16273, - 28443, 30388, 30767, 32416) }, - { AOM_CDF13(19052, 19840, 20579, 20916, 21150, 21467, 21885, 22719, - 23174, 28861, 30379, 32175) }, - { AOM_CDF13(18627, 19649, 20974, 21219, 21492, 21816, 22199, 23119, - 23527, 27053, 31397, 32148) }, - { AOM_CDF13(17026, 19004, 19997, 20339, 20586, 21103, 21349, 21907, - 22482, 25896, 26541, 31819) }, - { AOM_CDF13(12124, 13759, 14959, 14992, 15007, 15051, 15078, 15166, - 15255, 15753, 16039, 16606) } }, - { { AOM_CDF14(10407, 11208, 12900, 13181, 13823, 14175, 14899, 15656, - 15986, 20086, 20995, 22455, 24212) }, - { AOM_CDF14(4532, 19780, 20057, 20215, 20428, 21071, 21199, 21451, - 22099, 24228, 24693, 27032, 29472) }, - { AOM_CDF14(5273, 5379, 20177, 20270, 20385, 20439, 20949, 21695, 21774, - 23138, 24256, 24703, 26679) }, - { AOM_CDF14(6740, 7167, 7662, 14152, 14536, 14785, 15034, 16741, 18371, - 21520, 22206, 23389, 24182) }, - { AOM_CDF14(4987, 5368, 5928, 6068, 19114, 20315, 21857, 22253, 22411, - 24911, 25380, 26027, 26376) }, - { AOM_CDF14(5370, 6889, 7247, 7393, 9498, 21114, 21402, 21753, 21981, - 24780, 25386, 26517, 27176) }, - { AOM_CDF14(4816, 4961, 7204, 7326, 8765, 8930, 20169, 20682, 20803, - 23188, 23763, 24455, 24940) }, - { AOM_CDF14(6608, 6740, 8529, 9049, 9257, 9356, 9735, 18827, 19059, - 22336, 23204, 23964, 24793) }, - { AOM_CDF14(5998, 7419, 7781, 8933, 9255, 9549, 9753, 10417, 18898, - 22494, 23139, 24764, 25989) }, - { AOM_CDF14(10660, 11298, 12550, 12957, 13322, 13624, 14040, 15004, - 15534, 20714, 21789, 23443, 24861) }, - { AOM_CDF14(10522, 11530, 12552, 12963, 13378, 13779, 14245, 15235, - 15902, 20102, 22696, 23774, 25838) }, - { AOM_CDF14(10099, 10691, 12639, 13049, 13386, 13665, 14125, 15163, - 15636, 19676, 20474, 23519, 25208) }, - { AOM_CDF14(3144, 5087, 7382, 7504, 7593, 7690, 7801, 8064, 8232, 9248, - 9875, 10521, 29048) } } - }; - -static const aom_cdf_prob default_partition_cdf[PARTITION_CONTEXTS][CDF_SIZE( - EXT_PARTITION_TYPES)] = { - { AOM_CDF4(19132, 25510, 30392) }, - { AOM_CDF4(13928, 19855, 28540) }, - { AOM_CDF4(12522, 23679, 28629) }, - { AOM_CDF4(9896, 18783, 25853) }, - { AOM_CDF10(15597, 20929, 24571, 26706, 27664, 28821, 29601, 30571, 31902) }, - { AOM_CDF10(7925, 11043, 16785, 22470, 23971, 25043, 26651, 28701, 29834) }, - { AOM_CDF10(5414, 13269, 15111, 20488, 22360, 24500, 25537, 26336, 32117) }, - { AOM_CDF10(2662, 6362, 8614, 20860, 23053, 24778, 26436, 27829, 31171) }, - { AOM_CDF10(18462, 20920, 23124, 27647, 28227, 29049, 29519, 30178, 31544) }, - { AOM_CDF10(7689, 9060, 12056, 24992, 25660, 26182, 26951, 28041, 29052) }, - { AOM_CDF10(6015, 9009, 10062, 24544, 25409, 26545, 27071, 27526, 32047) }, - { AOM_CDF10(1394, 2208, 2796, 28614, 29061, 29466, 29840, 30185, 31899) }, - { AOM_CDF10(20137, 21547, 23078, 29566, 29837, 30261, 30524, 30892, 31724) }, - { AOM_CDF10(6732, 7490, 9497, 27944, 28250, 28515, 28969, 29630, 30104) }, - { AOM_CDF10(5945, 7663, 8348, 28683, 29117, 29749, 30064, 30298, 32238) }, - { AOM_CDF10(870, 1212, 1487, 31198, 31394, 31574, 31743, 31881, 32332) }, - { AOM_CDF8(27899, 28219, 28529, 32484, 32539, 32619, 32639) }, - { AOM_CDF8(6607, 6990, 8268, 32060, 32219, 32338, 32371) }, - { AOM_CDF8(5429, 6676, 7122, 32027, 32227, 32531, 32582) }, - { AOM_CDF8(711, 966, 1172, 32448, 32538, 32617, 32664) }, -}; - -static const aom_cdf_prob default_intra_ext_tx_cdf - [EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES][CDF_SIZE(TX_TYPES)] = { - { - { - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - }, - { - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - }, - { - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - }, - { - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - { 0 }, - }, - }, - { - { - { AOM_CDF7(1535, 8035, 9461, 12751, 23467, 27825) }, - { AOM_CDF7(564, 3335, 9709, 10870, 18143, 28094) }, - { AOM_CDF7(672, 3247, 3676, 11982, 19415, 23127) }, - { AOM_CDF7(5279, 13885, 15487, 18044, 23527, 30252) }, - { AOM_CDF7(4423, 6074, 7985, 10416, 25693, 29298) }, - { AOM_CDF7(1486, 4241, 9460, 10662, 16456, 27694) }, - { AOM_CDF7(439, 2838, 3522, 6737, 18058, 23754) }, - { AOM_CDF7(1190, 4233, 4855, 11670, 20281, 24377) }, - { AOM_CDF7(1045, 4312, 8647, 10159, 18644, 29335) }, - { AOM_CDF7(202, 3734, 4747, 7298, 17127, 24016) }, - { AOM_CDF7(447, 4312, 6819, 8884, 16010, 23858) }, - { AOM_CDF7(277, 4369, 5255, 8905, 16465, 22271) }, - { AOM_CDF7(3409, 5436, 10599, 15599, 19687, 24040) }, - }, - { - { AOM_CDF7(1870, 13742, 14530, 16498, 23770, 27698) }, - { AOM_CDF7(326, 8796, 14632, 15079, 19272, 27486) }, - { AOM_CDF7(484, 7576, 7712, 14443, 19159, 22591) }, - { AOM_CDF7(1126, 15340, 15895, 17023, 20896, 30279) }, - { AOM_CDF7(655, 4854, 5249, 5913, 22099, 27138) }, - { AOM_CDF7(1299, 6458, 8885, 9290, 14851, 25497) }, - { AOM_CDF7(311, 5295, 5552, 6885, 16107, 22672) }, - { AOM_CDF7(883, 8059, 8270, 11258, 17289, 21549) }, - { AOM_CDF7(741, 7580, 9318, 10345, 16688, 29046) }, - { AOM_CDF7(110, 7406, 7915, 9195, 16041, 23329) }, - { AOM_CDF7(363, 7974, 9357, 10673, 15629, 24474) }, - { AOM_CDF7(153, 7647, 8112, 9936, 15307, 19996) }, - { AOM_CDF7(3511, 6332, 11165, 15335, 19323, 23594) }, - }, - { - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - }, - { - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, - }, - }, - { - { - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - }, - { - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - }, - { - { AOM_CDF5(1127, 12814, 22772, 27483) }, - { AOM_CDF5(145, 6761, 11980, 26667) }, - { AOM_CDF5(362, 5887, 11678, 16725) }, - { AOM_CDF5(385, 15213, 18587, 30693) }, - { AOM_CDF5(25, 2914, 23134, 27903) }, - { AOM_CDF5(60, 4470, 11749, 23991) }, - { AOM_CDF5(37, 3332, 14511, 21448) }, - { AOM_CDF5(157, 6320, 13036, 17439) }, - { AOM_CDF5(119, 6719, 12906, 29396) }, - { AOM_CDF5(47, 5537, 12576, 21499) }, - { AOM_CDF5(269, 6076, 11258, 23115) }, - { AOM_CDF5(83, 5615, 12001, 17228) }, - { AOM_CDF5(1968, 5556, 12023, 18547) }, - }, - { - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - { AOM_CDF5(6554, 13107, 19661, 26214) }, - }, - }, - }; - -static const aom_cdf_prob - default_inter_ext_tx_cdf[EXT_TX_SETS_INTER][EXT_TX_SIZES][CDF_SIZE( - TX_TYPES)] = { - { - { 0 }, - { 0 }, - { 0 }, - { 0 }, - }, - { - { AOM_CDF16(4458, 5560, 7695, 9709, 13330, 14789, 17537, 20266, 21504, - 22848, 23934, 25474, 27727, 28915, 30631) }, - { AOM_CDF16(1645, 2573, 4778, 5711, 7807, 8622, 10522, 15357, 17674, - 20408, 22517, 25010, 27116, 28856, 30749) }, - { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, - 20480, 22528, 24576, 26624, 28672, 30720) }, - { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, - 20480, 22528, 24576, 26624, 28672, 30720) }, - }, - { - { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845, - 24576, 27307, 30037) }, - { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845, - 24576, 27307, 30037) }, - { AOM_CDF12(770, 2421, 5225, 12907, 15819, 18927, 21561, 24089, 26595, - 28526, 30529) }, - { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845, - 24576, 27307, 30037) }, - }, - { - { AOM_CDF2(16384) }, - { AOM_CDF2(4167) }, - { AOM_CDF2(1998) }, - { AOM_CDF2(748) }, - }, - }; - -static const aom_cdf_prob default_cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)] = { - AOM_CDF8(1418, 2123, 13340, 18405, 26972, 28343, 32294) -}; - -static const aom_cdf_prob - default_cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)] = { - { AOM_CDF16(7637, 20719, 31401, 32481, 32657, 32688, 32692, 32696, 32700, - 32704, 32708, 32712, 32716, 32720, 32724) }, - { AOM_CDF16(14365, 23603, 28135, 31168, 32167, 32395, 32487, 32573, 32620, - 32647, 32668, 32672, 32676, 32680, 32684) }, - { AOM_CDF16(11532, 22380, 28445, 31360, 32349, 32523, 32584, 32649, 32673, - 32677, 32681, 32685, 32689, 32693, 32697) }, - { AOM_CDF16(26990, 31402, 32282, 32571, 32692, 32696, 32700, 32704, 32708, - 32712, 32716, 32720, 32724, 32728, 32732) }, - { AOM_CDF16(17248, 26058, 28904, 30608, 31305, 31877, 32126, 32321, 32394, - 32464, 32516, 32560, 32576, 32593, 32622) }, - { AOM_CDF16(14738, 21678, 25779, 27901, 29024, 30302, 30980, 31843, 32144, - 32413, 32520, 32594, 32622, 32656, 32660) } - }; - -static const aom_cdf_prob - default_switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS][CDF_SIZE( - SWITCHABLE_FILTERS)] = { - { AOM_CDF3(31935, 32720) }, { AOM_CDF3(5568, 32719) }, - { AOM_CDF3(422, 2938) }, { AOM_CDF3(28244, 32608) }, - { AOM_CDF3(31206, 31953) }, { AOM_CDF3(4862, 32121) }, - { AOM_CDF3(770, 1152) }, { AOM_CDF3(20889, 25637) }, - { AOM_CDF3(31910, 32724) }, { AOM_CDF3(4120, 32712) }, - { AOM_CDF3(305, 2247) }, { AOM_CDF3(27403, 32636) }, - { AOM_CDF3(31022, 32009) }, { AOM_CDF3(2963, 32093) }, - { AOM_CDF3(601, 943) }, { AOM_CDF3(14969, 21398) } - }; - -static const aom_cdf_prob default_newmv_cdf[NEWMV_MODE_CONTEXTS][CDF_SIZE(2)] = - { { AOM_CDF2(24035) }, { AOM_CDF2(16630) }, { AOM_CDF2(15339) }, - { AOM_CDF2(8386) }, { AOM_CDF2(12222) }, { AOM_CDF2(4676) } }; - -static const aom_cdf_prob default_zeromv_cdf[GLOBALMV_MODE_CONTEXTS][CDF_SIZE( - 2)] = { { AOM_CDF2(2175) }, { AOM_CDF2(1054) } }; - -static const aom_cdf_prob default_refmv_cdf[REFMV_MODE_CONTEXTS][CDF_SIZE(2)] = - { { AOM_CDF2(23974) }, { AOM_CDF2(24188) }, { AOM_CDF2(17848) }, - { AOM_CDF2(28622) }, { AOM_CDF2(24312) }, { AOM_CDF2(19923) } }; - -static const aom_cdf_prob default_drl_cdf[DRL_MODE_CONTEXTS][CDF_SIZE(2)] = { - { AOM_CDF2(13104) }, { AOM_CDF2(24560) }, { AOM_CDF2(18945) } -}; - -static const aom_cdf_prob - default_inter_compound_mode_cdf[INTER_MODE_CONTEXTS][CDF_SIZE( - INTER_COMPOUND_MODES)] = { - { AOM_CDF8(7760, 13823, 15808, 17641, 19156, 20666, 26891) }, - { AOM_CDF8(10730, 19452, 21145, 22749, 24039, 25131, 28724) }, - { AOM_CDF8(10664, 20221, 21588, 22906, 24295, 25387, 28436) }, - { AOM_CDF8(13298, 16984, 20471, 24182, 25067, 25736, 26422) }, - { AOM_CDF8(18904, 23325, 25242, 27432, 27898, 28258, 30758) }, - { AOM_CDF8(10725, 17454, 20124, 22820, 24195, 25168, 26046) }, - { AOM_CDF8(17125, 24273, 25814, 27492, 28214, 28704, 30592) }, - { AOM_CDF8(13046, 23214, 24505, 25942, 27435, 28442, 29330) } - }; - -static const aom_cdf_prob default_interintra_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE( - 2)] = { { AOM_CDF2(16384) }, - { AOM_CDF2(26887) }, - { AOM_CDF2(27597) }, - { AOM_CDF2(30237) } }; - -static const aom_cdf_prob - default_interintra_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(INTERINTRA_MODES)] = - { { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(1875, 11082, 27332) }, - { AOM_CDF4(2473, 9996, 26388) }, - { AOM_CDF4(4238, 11537, 25926) } }; - -static const aom_cdf_prob - default_wedge_interintra_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)] = { - { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, - { AOM_CDF2(20036) }, { AOM_CDF2(24957) }, { AOM_CDF2(26704) }, - { AOM_CDF2(27530) }, { AOM_CDF2(29564) }, { AOM_CDF2(29444) }, - { AOM_CDF2(26872) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } - }; - -static const aom_cdf_prob - default_compound_type_cdf[BLOCK_SIZES_ALL][CDF_SIZE(COMPOUND_TYPES - 1)] = { - { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, - { AOM_CDF2(23431) }, { AOM_CDF2(13171) }, { AOM_CDF2(11470) }, - { AOM_CDF2(9770) }, { AOM_CDF2(9100) }, { AOM_CDF2(8233) }, - { AOM_CDF2(6172) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, - { AOM_CDF2(11820) }, { AOM_CDF2(7701) }, { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } - }; - -static const aom_cdf_prob default_wedge_idx_cdf[BLOCK_SIZES_ALL][CDF_SIZE(16)] = - { { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, - 20480, 22528, 24576, 26624, 28672, 30720) }, - { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, - 20480, 22528, 24576, 26624, 28672, 30720) }, - { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, - 20480, 22528, 24576, 26624, 28672, 30720) }, - { AOM_CDF16(2438, 4440, 6599, 8663, 11005, 12874, 15751, 18094, 20359, - 22362, 24127, 25702, 27752, 29450, 31171) }, - { AOM_CDF16(806, 3266, 6005, 6738, 7218, 7367, 7771, 14588, 16323, 17367, - 18452, 19422, 22839, 26127, 29629) }, - { AOM_CDF16(2779, 3738, 4683, 7213, 7775, 8017, 8655, 14357, 17939, 21332, - 24520, 27470, 29456, 30529, 31656) }, - { AOM_CDF16(1684, 3625, 5675, 7108, 9302, 11274, 14429, 17144, 19163, - 20961, 22884, 24471, 26719, 28714, 30877) }, - { AOM_CDF16(1142, 3491, 6277, 7314, 8089, 8355, 9023, 13624, 15369, 16730, - 18114, 19313, 22521, 26012, 29550) }, - { AOM_CDF16(2742, 4195, 5727, 8035, 8980, 9336, 10146, 14124, 17270, - 20533, 23434, 25972, 27944, 29570, 31416) }, - { AOM_CDF16(1727, 3948, 6101, 7796, 9841, 12344, 15766, 18944, 20638, - 22038, 23963, 25311, 26988, 28766, 31012) }, - { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, - 20480, 22528, 24576, 26624, 28672, 30720) }, - { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, - 20480, 22528, 24576, 26624, 28672, 30720) }, - { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, - 20480, 22528, 24576, 26624, 28672, 30720) }, - { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, - 20480, 22528, 24576, 26624, 28672, 30720) }, - { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, - 20480, 22528, 24576, 26624, 28672, 30720) }, - { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, - 20480, 22528, 24576, 26624, 28672, 30720) }, - { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, - 20480, 22528, 24576, 26624, 28672, 30720) }, - { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, - 20480, 22528, 24576, 26624, 28672, 30720) }, - { AOM_CDF16(154, 987, 1925, 2051, 2088, 2111, 2151, 23033, 23703, 24284, - 24985, 25684, 27259, 28883, 30911) }, - { AOM_CDF16(1135, 1322, 1493, 2635, 2696, 2737, 2770, 21016, 22935, 25057, - 27251, 29173, 30089, 30960, 31933) }, - { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, - 20480, 22528, 24576, 26624, 28672, 30720) }, - { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, - 20480, 22528, 24576, 26624, 28672, 30720) } }; - -static const aom_cdf_prob default_motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE( - MOTION_MODES)] = { { AOM_CDF3(10923, 21845) }, { AOM_CDF3(10923, 21845) }, - { AOM_CDF3(10923, 21845) }, { AOM_CDF3(7651, 24760) }, - { AOM_CDF3(4738, 24765) }, { AOM_CDF3(5391, 25528) }, - { AOM_CDF3(19419, 26810) }, { AOM_CDF3(5123, 23606) }, - { AOM_CDF3(11606, 24308) }, { AOM_CDF3(26260, 29116) }, - { AOM_CDF3(20360, 28062) }, { AOM_CDF3(21679, 26830) }, - { AOM_CDF3(29516, 30701) }, { AOM_CDF3(28898, 30397) }, - { AOM_CDF3(30878, 31335) }, { AOM_CDF3(32507, 32558) }, - { AOM_CDF3(10923, 21845) }, { AOM_CDF3(10923, 21845) }, - { AOM_CDF3(28799, 31390) }, { AOM_CDF3(26431, 30774) }, - { AOM_CDF3(28973, 31594) }, { AOM_CDF3(29742, 31203) } }; - -static const aom_cdf_prob default_obmc_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)] = { - { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, - { AOM_CDF2(10437) }, { AOM_CDF2(9371) }, { AOM_CDF2(9301) }, - { AOM_CDF2(17432) }, { AOM_CDF2(14423) }, { AOM_CDF2(15142) }, - { AOM_CDF2(25817) }, { AOM_CDF2(22823) }, { AOM_CDF2(22083) }, - { AOM_CDF2(30128) }, { AOM_CDF2(31014) }, { AOM_CDF2(31560) }, - { AOM_CDF2(32638) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, - { AOM_CDF2(23664) }, { AOM_CDF2(20901) }, { AOM_CDF2(24008) }, - { AOM_CDF2(26879) } -}; - -static const aom_cdf_prob default_intra_inter_cdf[INTRA_INTER_CONTEXTS] - [CDF_SIZE(2)] = { - { AOM_CDF2(806) }, - { AOM_CDF2(16662) }, - { AOM_CDF2(20186) }, - { AOM_CDF2(26538) } - }; - -static const aom_cdf_prob default_comp_inter_cdf[COMP_INTER_CONTEXTS][CDF_SIZE( - 2)] = { { AOM_CDF2(26828) }, - { AOM_CDF2(24035) }, - { AOM_CDF2(12031) }, - { AOM_CDF2(10640) }, - { AOM_CDF2(2901) } }; - -static const aom_cdf_prob default_comp_ref_type_cdf[COMP_REF_TYPE_CONTEXTS] - [CDF_SIZE(2)] = { - { AOM_CDF2(1198) }, - { AOM_CDF2(2070) }, - { AOM_CDF2(9166) }, - { AOM_CDF2(7499) }, - { AOM_CDF2(22475) } - }; - -static const aom_cdf_prob - default_uni_comp_ref_cdf[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - - 1][CDF_SIZE(2)] = { - { { AOM_CDF2(5284) }, { AOM_CDF2(3865) }, { AOM_CDF2(3128) } }, - { { AOM_CDF2(23152) }, { AOM_CDF2(14173) }, { AOM_CDF2(15270) } }, - { { AOM_CDF2(31774) }, { AOM_CDF2(25120) }, { AOM_CDF2(26710) } } - }; - -static const aom_cdf_prob default_single_ref_cdf[REF_CONTEXTS][SINGLE_REFS - 1] - [CDF_SIZE(2)] = { - { { AOM_CDF2(4897) }, - { AOM_CDF2(1555) }, - { AOM_CDF2(4236) }, - { AOM_CDF2(8650) }, - { AOM_CDF2(904) }, - { AOM_CDF2(1444) } }, - { { AOM_CDF2(16973) }, - { AOM_CDF2(16751) }, - { AOM_CDF2(19647) }, - { AOM_CDF2(24773) }, - { AOM_CDF2(11014) }, - { AOM_CDF2(15087) } }, - { { AOM_CDF2(29744) }, - { AOM_CDF2(30279) }, - { AOM_CDF2(31194) }, - { AOM_CDF2(31895) }, - { AOM_CDF2(26875) }, - { AOM_CDF2(30304) } } - }; - -static const aom_cdf_prob - default_comp_ref_cdf[REF_CONTEXTS][FWD_REFS - 1][CDF_SIZE(2)] = { - { { AOM_CDF2(4946) }, { AOM_CDF2(9468) }, { AOM_CDF2(1503) } }, - { { AOM_CDF2(19891) }, { AOM_CDF2(22441) }, { AOM_CDF2(15160) } }, - { { AOM_CDF2(30731) }, { AOM_CDF2(31059) }, { AOM_CDF2(27544) } } - }; - -static const aom_cdf_prob - default_comp_bwdref_cdf[REF_CONTEXTS][BWD_REFS - 1][CDF_SIZE(2)] = { - { { AOM_CDF2(2235) }, { AOM_CDF2(1423) } }, - { { AOM_CDF2(17182) }, { AOM_CDF2(15175) } }, - { { AOM_CDF2(30606) }, { AOM_CDF2(30489) } } - }; - -static const aom_cdf_prob - default_palette_y_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)] = { - { AOM_CDF7(7952, 13000, 18149, 21478, 25527, 29241) }, - { AOM_CDF7(7139, 11421, 16195, 19544, 23666, 28073) }, - { AOM_CDF7(7788, 12741, 17325, 20500, 24315, 28530) }, - { AOM_CDF7(8271, 14064, 18246, 21564, 25071, 28533) }, - { AOM_CDF7(12725, 19180, 21863, 24839, 27535, 30120) }, - { AOM_CDF7(9711, 14888, 16923, 21052, 25661, 27875) }, - { AOM_CDF7(14940, 20797, 21678, 24186, 27033, 28999) } - }; - -static const aom_cdf_prob - default_palette_uv_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)] = { - { AOM_CDF7(8713, 19979, 27128, 29609, 31331, 32272) }, - { AOM_CDF7(5839, 15573, 23581, 26947, 29848, 31700) }, - { AOM_CDF7(4426, 11260, 17999, 21483, 25863, 29430) }, - { AOM_CDF7(3228, 9464, 14993, 18089, 22523, 27420) }, - { AOM_CDF7(3768, 8886, 13091, 17852, 22495, 27207) }, - { AOM_CDF7(2464, 8451, 12861, 21632, 25525, 28555) }, - { AOM_CDF7(1269, 5435, 10433, 18963, 21700, 25865) } - }; - -static const aom_cdf_prob default_palette_y_mode_cdf - [PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS][CDF_SIZE(2)] = { - { { AOM_CDF2(31676) }, { AOM_CDF2(3419) }, { AOM_CDF2(1261) } }, - { { AOM_CDF2(31912) }, { AOM_CDF2(2859) }, { AOM_CDF2(980) } }, - { { AOM_CDF2(31823) }, { AOM_CDF2(3400) }, { AOM_CDF2(781) } }, - { { AOM_CDF2(32030) }, { AOM_CDF2(3561) }, { AOM_CDF2(904) } }, - { { AOM_CDF2(32309) }, { AOM_CDF2(7337) }, { AOM_CDF2(1462) } }, - { { AOM_CDF2(32265) }, { AOM_CDF2(4015) }, { AOM_CDF2(1521) } }, - { { AOM_CDF2(32450) }, { AOM_CDF2(7946) }, { AOM_CDF2(129) } } - }; - -static const aom_cdf_prob - default_palette_uv_mode_cdf[PALETTE_UV_MODE_CONTEXTS][CDF_SIZE(2)] = { - { AOM_CDF2(32461) }, { AOM_CDF2(21488) } - }; - -static const aom_cdf_prob default_palette_y_color_index_cdf - [PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] = { - { - { AOM_CDF2(28710) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(10553) }, - { AOM_CDF2(27036) }, - { AOM_CDF2(31603) }, - }, - { - { AOM_CDF3(27877, 30490) }, - { AOM_CDF3(11532, 25697) }, - { AOM_CDF3(6544, 30234) }, - { AOM_CDF3(23018, 28072) }, - { AOM_CDF3(31915, 32385) }, - }, - { - { AOM_CDF4(25572, 28046, 30045) }, - { AOM_CDF4(9478, 21590, 27256) }, - { AOM_CDF4(7248, 26837, 29824) }, - { AOM_CDF4(19167, 24486, 28349) }, - { AOM_CDF4(31400, 31825, 32250) }, - }, - { - { AOM_CDF5(24779, 26955, 28576, 30282) }, - { AOM_CDF5(8669, 20364, 24073, 28093) }, - { AOM_CDF5(4255, 27565, 29377, 31067) }, - { AOM_CDF5(19864, 23674, 26716, 29530) }, - { AOM_CDF5(31646, 31893, 32147, 32426) }, - }, - { - { AOM_CDF6(23132, 25407, 26970, 28435, 30073) }, - { AOM_CDF6(7443, 17242, 20717, 24762, 27982) }, - { AOM_CDF6(6300, 24862, 26944, 28784, 30671) }, - { AOM_CDF6(18916, 22895, 25267, 27435, 29652) }, - { AOM_CDF6(31270, 31550, 31808, 32059, 32353) }, - }, - { - { AOM_CDF7(23105, 25199, 26464, 27684, 28931, 30318) }, - { AOM_CDF7(6950, 15447, 18952, 22681, 25567, 28563) }, - { AOM_CDF7(7560, 23474, 25490, 27203, 28921, 30708) }, - { AOM_CDF7(18544, 22373, 24457, 26195, 28119, 30045) }, - { AOM_CDF7(31198, 31451, 31670, 31882, 32123, 32391) }, - }, - { - { AOM_CDF8(21689, 23883, 25163, 26352, 27506, 28827, 30195) }, - { AOM_CDF8(6892, 15385, 17840, 21606, 24287, 26753, 29204) }, - { AOM_CDF8(5651, 23182, 25042, 26518, 27982, 29392, 30900) }, - { AOM_CDF8(19349, 22578, 24418, 25994, 27524, 29031, 30448) }, - { AOM_CDF8(31028, 31270, 31504, 31705, 31927, 32153, 32392) }, - }, - }; - -static const aom_cdf_prob default_palette_uv_color_index_cdf - [PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] = { - { - { AOM_CDF2(29089) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(8713) }, - { AOM_CDF2(29257) }, - { AOM_CDF2(31610) }, - }, - { - { AOM_CDF3(25257, 29145) }, - { AOM_CDF3(12287, 27293) }, - { AOM_CDF3(7033, 27960) }, - { AOM_CDF3(20145, 25405) }, - { AOM_CDF3(30608, 31639) }, - }, - { - { AOM_CDF4(24210, 27175, 29903) }, - { AOM_CDF4(9888, 22386, 27214) }, - { AOM_CDF4(5901, 26053, 29293) }, - { AOM_CDF4(18318, 22152, 28333) }, - { AOM_CDF4(30459, 31136, 31926) }, - }, - { - { AOM_CDF5(22980, 25479, 27781, 29986) }, - { AOM_CDF5(8413, 21408, 24859, 28874) }, - { AOM_CDF5(2257, 29449, 30594, 31598) }, - { AOM_CDF5(19189, 21202, 25915, 28620) }, - { AOM_CDF5(31844, 32044, 32281, 32518) }, - }, - { - { AOM_CDF6(22217, 24567, 26637, 28683, 30548) }, - { AOM_CDF6(7307, 16406, 19636, 24632, 28424) }, - { AOM_CDF6(4441, 25064, 26879, 28942, 30919) }, - { AOM_CDF6(17210, 20528, 23319, 26750, 29582) }, - { AOM_CDF6(30674, 30953, 31396, 31735, 32207) }, - }, - { - { AOM_CDF7(21239, 23168, 25044, 26962, 28705, 30506) }, - { AOM_CDF7(6545, 15012, 18004, 21817, 25503, 28701) }, - { AOM_CDF7(3448, 26295, 27437, 28704, 30126, 31442) }, - { AOM_CDF7(15889, 18323, 21704, 24698, 26976, 29690) }, - { AOM_CDF7(30988, 31204, 31479, 31734, 31983, 32325) }, - }, - { - { AOM_CDF8(21442, 23288, 24758, 26246, 27649, 28980, 30563) }, - { AOM_CDF8(5863, 14933, 17552, 20668, 23683, 26411, 29273) }, - { AOM_CDF8(3415, 25810, 26877, 27990, 29223, 30394, 31618) }, - { AOM_CDF8(17965, 20084, 22232, 23974, 26274, 28402, 30390) }, - { AOM_CDF8(31190, 31329, 31516, 31679, 31825, 32026, 32322) }, - }, - }; - -static const aom_cdf_prob - default_txfm_partition_cdf[TXFM_PARTITION_CONTEXTS][CDF_SIZE(2)] = { - { AOM_CDF2(28581) }, { AOM_CDF2(23846) }, { AOM_CDF2(20847) }, - { AOM_CDF2(24315) }, { AOM_CDF2(18196) }, { AOM_CDF2(12133) }, - { AOM_CDF2(18791) }, { AOM_CDF2(10887) }, { AOM_CDF2(11005) }, - { AOM_CDF2(27179) }, { AOM_CDF2(20004) }, { AOM_CDF2(11281) }, - { AOM_CDF2(26549) }, { AOM_CDF2(19308) }, { AOM_CDF2(14224) }, - { AOM_CDF2(28015) }, { AOM_CDF2(21546) }, { AOM_CDF2(14400) }, - { AOM_CDF2(28165) }, { AOM_CDF2(22401) }, { AOM_CDF2(16088) } - }; - -static const aom_cdf_prob default_skip_cdfs[SKIP_CONTEXTS][CDF_SIZE(2)] = { - { AOM_CDF2(31671) }, { AOM_CDF2(16515) }, { AOM_CDF2(4576) } -}; - -static const aom_cdf_prob default_skip_mode_cdfs[SKIP_MODE_CONTEXTS][CDF_SIZE( - 2)] = { { AOM_CDF2(32621) }, { AOM_CDF2(20708) }, { AOM_CDF2(8127) } }; - -static const aom_cdf_prob - default_compound_idx_cdfs[COMP_INDEX_CONTEXTS][CDF_SIZE(2)] = { - { AOM_CDF2(18244) }, { AOM_CDF2(12865) }, { AOM_CDF2(7053) }, - { AOM_CDF2(13259) }, { AOM_CDF2(9334) }, { AOM_CDF2(4644) } - }; - -static const aom_cdf_prob - default_comp_group_idx_cdfs[COMP_GROUP_IDX_CONTEXTS][CDF_SIZE(2)] = { - { AOM_CDF2(26607) }, { AOM_CDF2(22891) }, { AOM_CDF2(18840) }, - { AOM_CDF2(24594) }, { AOM_CDF2(19934) }, { AOM_CDF2(22674) } - }; - -static const aom_cdf_prob default_intrabc_cdf[CDF_SIZE(2)] = { AOM_CDF2( - 30531) }; - -static const aom_cdf_prob default_filter_intra_mode_cdf[CDF_SIZE( - FILTER_INTRA_MODES)] = { AOM_CDF5(8949, 12776, 17211, 29558) }; - -static const aom_cdf_prob default_filter_intra_cdfs[BLOCK_SIZES_ALL][CDF_SIZE( - 2)] = { { AOM_CDF2(4621) }, { AOM_CDF2(6743) }, { AOM_CDF2(5893) }, - { AOM_CDF2(7866) }, { AOM_CDF2(12551) }, { AOM_CDF2(9394) }, - { AOM_CDF2(12408) }, { AOM_CDF2(14301) }, { AOM_CDF2(12756) }, - { AOM_CDF2(22343) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, { AOM_CDF2(12770) }, { AOM_CDF2(10368) }, - { AOM_CDF2(20229) }, { AOM_CDF2(18101) }, { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } }; - -static const aom_cdf_prob default_switchable_restore_cdf[CDF_SIZE( - RESTORE_SWITCHABLE_TYPES)] = { AOM_CDF3(9413, 22581) }; - -static const aom_cdf_prob default_wiener_restore_cdf[CDF_SIZE(2)] = { AOM_CDF2( - 11570) }; - -static const aom_cdf_prob default_sgrproj_restore_cdf[CDF_SIZE(2)] = { AOM_CDF2( - 16855) }; - -static const aom_cdf_prob default_delta_q_cdf[CDF_SIZE(DELTA_Q_PROBS + 1)] = { - AOM_CDF4(28160, 32120, 32677) -}; - -static const aom_cdf_prob default_delta_lf_multi_cdf[FRAME_LF_COUNT][CDF_SIZE( - DELTA_LF_PROBS + 1)] = { { AOM_CDF4(28160, 32120, 32677) }, - { AOM_CDF4(28160, 32120, 32677) }, - { AOM_CDF4(28160, 32120, 32677) }, - { AOM_CDF4(28160, 32120, 32677) } }; -static const aom_cdf_prob default_delta_lf_cdf[CDF_SIZE(DELTA_LF_PROBS + 1)] = { - AOM_CDF4(28160, 32120, 32677) -}; - -// FIXME(someone) need real defaults here -static const aom_cdf_prob default_seg_tree_cdf[CDF_SIZE(MAX_SEGMENTS)] = { - AOM_CDF8(4096, 8192, 12288, 16384, 20480, 24576, 28672) -}; - -static const aom_cdf_prob - default_segment_pred_cdf[SEG_TEMPORAL_PRED_CTXS][CDF_SIZE(2)] = { - { AOM_CDF2(128 * 128) }, { AOM_CDF2(128 * 128) }, { AOM_CDF2(128 * 128) } - }; - -static const aom_cdf_prob - default_spatial_pred_seg_tree_cdf[SPATIAL_PREDICTION_PROBS][CDF_SIZE( - MAX_SEGMENTS)] = { - { - AOM_CDF8(5622, 7893, 16093, 18233, 27809, 28373, 32533), - }, - { - AOM_CDF8(14274, 18230, 22557, 24935, 29980, 30851, 32344), - }, - { - AOM_CDF8(27527, 28487, 28723, 28890, 32397, 32647, 32679), - }, - }; - -static const aom_cdf_prob default_tx_size_cdf[MAX_TX_CATS][TX_SIZE_CONTEXTS] - [CDF_SIZE(MAX_TX_DEPTH + 1)] = { - { { AOM_CDF2(19968) }, - { AOM_CDF2(19968) }, - { AOM_CDF2(24320) } }, - { { AOM_CDF3(12272, 30172) }, - { AOM_CDF3(12272, 30172) }, - { AOM_CDF3(18677, 30848) } }, - { { AOM_CDF3(12986, 15180) }, - { AOM_CDF3(12986, 15180) }, - { AOM_CDF3(24302, 25602) } }, - { { AOM_CDF3(5782, 11475) }, - { AOM_CDF3(5782, 11475) }, - { AOM_CDF3(16803, 22759) } }, - }; - -#define MAX_COLOR_CONTEXT_HASH 8 -// Negative values are invalid -static const int palette_color_index_context_lookup[MAX_COLOR_CONTEXT_HASH + - 1] = { -1, -1, 0, -1, -1, - 4, 3, 2, 1 }; - -#define NUM_PALETTE_NEIGHBORS 3 // left, top-left and top. -int av1_get_palette_color_index_context(const uint8_t *color_map, int stride, - int r, int c, int palette_size, - uint8_t *color_order, int *color_idx) { - assert(palette_size <= PALETTE_MAX_SIZE); - assert(r > 0 || c > 0); - - // Get color indices of neighbors. - int color_neighbors[NUM_PALETTE_NEIGHBORS]; - color_neighbors[0] = (c - 1 >= 0) ? color_map[r * stride + c - 1] : -1; - color_neighbors[1] = - (c - 1 >= 0 && r - 1 >= 0) ? color_map[(r - 1) * stride + c - 1] : -1; - color_neighbors[2] = (r - 1 >= 0) ? color_map[(r - 1) * stride + c] : -1; - - // The +10 below should not be needed. But we get a warning "array subscript - // is above array bounds [-Werror=array-bounds]" without it, possibly due to - // this (or similar) bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124 - int scores[PALETTE_MAX_SIZE + 10] = { 0 }; - int i; - static const int weights[NUM_PALETTE_NEIGHBORS] = { 2, 1, 2 }; - for (i = 0; i < NUM_PALETTE_NEIGHBORS; ++i) { - if (color_neighbors[i] >= 0) { - scores[color_neighbors[i]] += weights[i]; - } - } - - int inverse_color_order[PALETTE_MAX_SIZE]; - for (i = 0; i < PALETTE_MAX_SIZE; ++i) { - color_order[i] = i; - inverse_color_order[i] = i; - } - - // Get the top NUM_PALETTE_NEIGHBORS scores (sorted from large to small). - for (i = 0; i < NUM_PALETTE_NEIGHBORS; ++i) { - int max = scores[i]; - int max_idx = i; - for (int j = i + 1; j < palette_size; ++j) { - if (scores[j] > max) { - max = scores[j]; - max_idx = j; - } - } - if (max_idx != i) { - // Move the score at index 'max_idx' to index 'i', and shift the scores - // from 'i' to 'max_idx - 1' by 1. - const int max_score = scores[max_idx]; - const uint8_t max_color_order = color_order[max_idx]; - for (int k = max_idx; k > i; --k) { - scores[k] = scores[k - 1]; - color_order[k] = color_order[k - 1]; - inverse_color_order[color_order[k]] = k; - } - scores[i] = max_score; - color_order[i] = max_color_order; - inverse_color_order[color_order[i]] = i; - } - } - - if (color_idx != NULL) - *color_idx = inverse_color_order[color_map[r * stride + c]]; - - // Get hash value of context. - int color_index_ctx_hash = 0; - static const int hash_multipliers[NUM_PALETTE_NEIGHBORS] = { 1, 2, 2 }; - for (i = 0; i < NUM_PALETTE_NEIGHBORS; ++i) { - color_index_ctx_hash += scores[i] * hash_multipliers[i]; - } - assert(color_index_ctx_hash > 0); - assert(color_index_ctx_hash <= MAX_COLOR_CONTEXT_HASH); - - // Lookup context from hash. - const int color_index_ctx = - palette_color_index_context_lookup[color_index_ctx_hash]; - assert(color_index_ctx >= 0); - assert(color_index_ctx < PALETTE_COLOR_INDEX_CONTEXTS); - return color_index_ctx; -} -#undef NUM_PALETTE_NEIGHBORS -#undef MAX_COLOR_CONTEXT_HASH - -static void init_mode_probs(FRAME_CONTEXT *fc) { - av1_copy(fc->palette_y_size_cdf, default_palette_y_size_cdf); - av1_copy(fc->palette_uv_size_cdf, default_palette_uv_size_cdf); - av1_copy(fc->palette_y_color_index_cdf, default_palette_y_color_index_cdf); - av1_copy(fc->palette_uv_color_index_cdf, default_palette_uv_color_index_cdf); - av1_copy(fc->kf_y_cdf, default_kf_y_mode_cdf); - av1_copy(fc->angle_delta_cdf, default_angle_delta_cdf); - av1_copy(fc->comp_inter_cdf, default_comp_inter_cdf); - av1_copy(fc->comp_ref_type_cdf, default_comp_ref_type_cdf); - av1_copy(fc->uni_comp_ref_cdf, default_uni_comp_ref_cdf); - av1_copy(fc->palette_y_mode_cdf, default_palette_y_mode_cdf); - av1_copy(fc->palette_uv_mode_cdf, default_palette_uv_mode_cdf); - av1_copy(fc->comp_ref_cdf, default_comp_ref_cdf); - av1_copy(fc->comp_bwdref_cdf, default_comp_bwdref_cdf); - av1_copy(fc->single_ref_cdf, default_single_ref_cdf); - av1_copy(fc->txfm_partition_cdf, default_txfm_partition_cdf); - av1_copy(fc->compound_index_cdf, default_compound_idx_cdfs); - av1_copy(fc->comp_group_idx_cdf, default_comp_group_idx_cdfs); - av1_copy(fc->newmv_cdf, default_newmv_cdf); - av1_copy(fc->zeromv_cdf, default_zeromv_cdf); - av1_copy(fc->refmv_cdf, default_refmv_cdf); - av1_copy(fc->drl_cdf, default_drl_cdf); - av1_copy(fc->motion_mode_cdf, default_motion_mode_cdf); - av1_copy(fc->obmc_cdf, default_obmc_cdf); - av1_copy(fc->inter_compound_mode_cdf, default_inter_compound_mode_cdf); - av1_copy(fc->compound_type_cdf, default_compound_type_cdf); - av1_copy(fc->wedge_idx_cdf, default_wedge_idx_cdf); - av1_copy(fc->interintra_cdf, default_interintra_cdf); - av1_copy(fc->wedge_interintra_cdf, default_wedge_interintra_cdf); - av1_copy(fc->interintra_mode_cdf, default_interintra_mode_cdf); - av1_copy(fc->seg.pred_cdf, default_segment_pred_cdf); - av1_copy(fc->seg.tree_cdf, default_seg_tree_cdf); - av1_copy(fc->filter_intra_cdfs, default_filter_intra_cdfs); - av1_copy(fc->filter_intra_mode_cdf, default_filter_intra_mode_cdf); - av1_copy(fc->switchable_restore_cdf, default_switchable_restore_cdf); - av1_copy(fc->wiener_restore_cdf, default_wiener_restore_cdf); - av1_copy(fc->sgrproj_restore_cdf, default_sgrproj_restore_cdf); - av1_copy(fc->y_mode_cdf, default_if_y_mode_cdf); - av1_copy(fc->uv_mode_cdf, default_uv_mode_cdf); - av1_copy(fc->switchable_interp_cdf, default_switchable_interp_cdf); - av1_copy(fc->partition_cdf, default_partition_cdf); - av1_copy(fc->intra_ext_tx_cdf, default_intra_ext_tx_cdf); - av1_copy(fc->inter_ext_tx_cdf, default_inter_ext_tx_cdf); - av1_copy(fc->skip_mode_cdfs, default_skip_mode_cdfs); - av1_copy(fc->skip_cdfs, default_skip_cdfs); - av1_copy(fc->intra_inter_cdf, default_intra_inter_cdf); - for (int i = 0; i < SPATIAL_PREDICTION_PROBS; i++) - av1_copy(fc->seg.spatial_pred_seg_cdf[i], - default_spatial_pred_seg_tree_cdf[i]); - av1_copy(fc->tx_size_cdf, default_tx_size_cdf); - av1_copy(fc->delta_q_cdf, default_delta_q_cdf); - av1_copy(fc->delta_lf_cdf, default_delta_lf_cdf); - av1_copy(fc->delta_lf_multi_cdf, default_delta_lf_multi_cdf); - av1_copy(fc->cfl_sign_cdf, default_cfl_sign_cdf); - av1_copy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf); - av1_copy(fc->intrabc_cdf, default_intrabc_cdf); -} - -void av1_set_default_ref_deltas(int8_t *ref_deltas) { - assert(ref_deltas != NULL); - - ref_deltas[INTRA_FRAME] = 1; - ref_deltas[LAST_FRAME] = 0; - ref_deltas[LAST2_FRAME] = ref_deltas[LAST_FRAME]; - ref_deltas[LAST3_FRAME] = ref_deltas[LAST_FRAME]; - ref_deltas[BWDREF_FRAME] = ref_deltas[LAST_FRAME]; - ref_deltas[GOLDEN_FRAME] = -1; - ref_deltas[ALTREF2_FRAME] = -1; - ref_deltas[ALTREF_FRAME] = -1; -} - -void av1_set_default_mode_deltas(int8_t *mode_deltas) { - assert(mode_deltas != NULL); - - mode_deltas[0] = 0; - mode_deltas[1] = 0; -} - -static void set_default_lf_deltas(struct loopfilter *lf) { - lf->mode_ref_delta_enabled = 1; - lf->mode_ref_delta_update = 1; - - av1_set_default_ref_deltas(lf->ref_deltas); - av1_set_default_mode_deltas(lf->mode_deltas); -} - -void av1_setup_frame_contexts(AV1_COMMON *cm) { - // Store the frame context into a special slot (not associated with any - // reference buffer), so that we can set up cm->pre_fc correctly later - // This function must ONLY be called when cm->fc has been initialized with - // default probs, either by av1_setup_past_independence or after manually - // initializing them - cm->frame_contexts[FRAME_CONTEXT_DEFAULTS] = *cm->fc; - if (cm->large_scale_tile) { - for (int i = 0; i < FRAME_CONTEXTS; ++i) cm->frame_contexts[i] = *cm->fc; - } -} - -void av1_setup_past_independence(AV1_COMMON *cm) { - // Reset the segment feature data to the default stats: - // Features disabled, 0, with delta coding (Default state). - av1_clearall_segfeatures(&cm->seg); - - cm->current_frame_seg_map = cm->cur_frame->seg_map; - - if (cm->current_frame_seg_map) - memset(cm->current_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); - - // reset mode ref deltas - av1_set_default_ref_deltas(cm->cur_frame->ref_deltas); - av1_set_default_mode_deltas(cm->cur_frame->mode_deltas); - set_default_lf_deltas(&cm->lf); - - av1_default_coef_probs(cm); - init_mode_probs(cm->fc); - av1_init_mv_probs(cm); - av1_init_lv_map(cm); - cm->fc->initialized = 1; - av1_setup_frame_contexts(cm); - - // prev_mip will only be allocated in encoder. - if (frame_is_intra_only(cm) && cm->prev_mip) - memset(cm->prev_mip, 0, - cm->mi_stride * cm->mi_rows * sizeof(*cm->prev_mip)); -} diff --git a/third_party/aom/av1/common/entropymode.h b/third_party/aom/av1/common/entropymode.h deleted file mode 100644 index 7047f34d2..000000000 --- a/third_party/aom/av1/common/entropymode.h +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_ENTROPYMODE_H_ -#define AOM_AV1_COMMON_ENTROPYMODE_H_ - -#include "av1/common/entropy.h" -#include "av1/common/entropymv.h" -#include "av1/common/filter.h" -#include "av1/common/seg_common.h" -#include "aom_dsp/aom_filter.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define BLOCK_SIZE_GROUPS 4 - -#define TX_SIZE_CONTEXTS 3 - -#define INTER_OFFSET(mode) ((mode)-NEARESTMV) -#define INTER_COMPOUND_OFFSET(mode) (uint8_t)((mode)-NEAREST_NEARESTMV) - -// Number of possible contexts for a color index. -// As can be seen from av1_get_palette_color_index_context(), the possible -// contexts are (2,0,0), (2,2,1), (3,2,0), (4,1,0), (5,0,0). These are mapped to -// a value from 0 to 4 using 'palette_color_index_context_lookup' table. -#define PALETTE_COLOR_INDEX_CONTEXTS 5 - -// Palette Y mode context for a block is determined by number of neighboring -// blocks (top and/or left) using a palette for Y plane. So, possible Y mode' -// context values are: -// 0 if neither left nor top block uses palette for Y plane, -// 1 if exactly one of left or top block uses palette for Y plane, and -// 2 if both left and top blocks use palette for Y plane. -#define PALETTE_Y_MODE_CONTEXTS 3 - -// Palette UV mode context for a block is determined by whether this block uses -// palette for the Y plane. So, possible values are: -// 0 if this block doesn't use palette for Y plane. -// 1 if this block uses palette for Y plane (i.e. Y palette size > 0). -#define PALETTE_UV_MODE_CONTEXTS 2 - -// Map the number of pixels in a block size to a context -// 64(BLOCK_8X8, BLOCK_4x16, BLOCK_16X4) -> 0 -// 128(BLOCK_8X16, BLOCK_16x8) -> 1 -// ... -// 4096(BLOCK_64X64) -> 6 -#define PALATTE_BSIZE_CTXS 7 - -#define KF_MODE_CONTEXTS 5 - -struct AV1Common; - -typedef struct { - const int16_t *scan; - const int16_t *iscan; - const int16_t *neighbors; -} SCAN_ORDER; - -typedef struct frame_contexts { - aom_cdf_prob txb_skip_cdf[TX_SIZES][TXB_SKIP_CONTEXTS][CDF_SIZE(2)]; - aom_cdf_prob eob_extra_cdf[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS] - [CDF_SIZE(2)]; - aom_cdf_prob dc_sign_cdf[PLANE_TYPES][DC_SIGN_CONTEXTS][CDF_SIZE(2)]; - aom_cdf_prob eob_flag_cdf16[PLANE_TYPES][2][CDF_SIZE(5)]; - aom_cdf_prob eob_flag_cdf32[PLANE_TYPES][2][CDF_SIZE(6)]; - aom_cdf_prob eob_flag_cdf64[PLANE_TYPES][2][CDF_SIZE(7)]; - aom_cdf_prob eob_flag_cdf128[PLANE_TYPES][2][CDF_SIZE(8)]; - aom_cdf_prob eob_flag_cdf256[PLANE_TYPES][2][CDF_SIZE(9)]; - aom_cdf_prob eob_flag_cdf512[PLANE_TYPES][2][CDF_SIZE(10)]; - aom_cdf_prob eob_flag_cdf1024[PLANE_TYPES][2][CDF_SIZE(11)]; - aom_cdf_prob coeff_base_eob_cdf[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS_EOB] - [CDF_SIZE(3)]; - aom_cdf_prob coeff_base_cdf[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS] - [CDF_SIZE(4)]; - aom_cdf_prob coeff_br_cdf[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS] - [CDF_SIZE(BR_CDF_SIZE)]; - - aom_cdf_prob newmv_cdf[NEWMV_MODE_CONTEXTS][CDF_SIZE(2)]; - aom_cdf_prob zeromv_cdf[GLOBALMV_MODE_CONTEXTS][CDF_SIZE(2)]; - aom_cdf_prob refmv_cdf[REFMV_MODE_CONTEXTS][CDF_SIZE(2)]; - aom_cdf_prob drl_cdf[DRL_MODE_CONTEXTS][CDF_SIZE(2)]; - - aom_cdf_prob inter_compound_mode_cdf[INTER_MODE_CONTEXTS] - [CDF_SIZE(INTER_COMPOUND_MODES)]; - aom_cdf_prob compound_type_cdf[BLOCK_SIZES_ALL][CDF_SIZE(COMPOUND_TYPES - 1)]; - aom_cdf_prob wedge_idx_cdf[BLOCK_SIZES_ALL][CDF_SIZE(16)]; - aom_cdf_prob interintra_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(2)]; - aom_cdf_prob wedge_interintra_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)]; - aom_cdf_prob interintra_mode_cdf[BLOCK_SIZE_GROUPS] - [CDF_SIZE(INTERINTRA_MODES)]; - aom_cdf_prob motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE(MOTION_MODES)]; - aom_cdf_prob obmc_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)]; - aom_cdf_prob palette_y_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)]; - aom_cdf_prob palette_uv_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)]; - aom_cdf_prob palette_y_color_index_cdf[PALETTE_SIZES] - [PALETTE_COLOR_INDEX_CONTEXTS] - [CDF_SIZE(PALETTE_COLORS)]; - aom_cdf_prob palette_uv_color_index_cdf[PALETTE_SIZES] - [PALETTE_COLOR_INDEX_CONTEXTS] - [CDF_SIZE(PALETTE_COLORS)]; - aom_cdf_prob palette_y_mode_cdf[PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS] - [CDF_SIZE(2)]; - aom_cdf_prob palette_uv_mode_cdf[PALETTE_UV_MODE_CONTEXTS][CDF_SIZE(2)]; - aom_cdf_prob comp_inter_cdf[COMP_INTER_CONTEXTS][CDF_SIZE(2)]; - aom_cdf_prob single_ref_cdf[REF_CONTEXTS][SINGLE_REFS - 1][CDF_SIZE(2)]; - aom_cdf_prob comp_ref_type_cdf[COMP_REF_TYPE_CONTEXTS][CDF_SIZE(2)]; - aom_cdf_prob uni_comp_ref_cdf[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - 1] - [CDF_SIZE(2)]; - aom_cdf_prob comp_ref_cdf[REF_CONTEXTS][FWD_REFS - 1][CDF_SIZE(2)]; - aom_cdf_prob comp_bwdref_cdf[REF_CONTEXTS][BWD_REFS - 1][CDF_SIZE(2)]; - aom_cdf_prob txfm_partition_cdf[TXFM_PARTITION_CONTEXTS][CDF_SIZE(2)]; - aom_cdf_prob compound_index_cdf[COMP_INDEX_CONTEXTS][CDF_SIZE(2)]; - aom_cdf_prob comp_group_idx_cdf[COMP_GROUP_IDX_CONTEXTS][CDF_SIZE(2)]; - aom_cdf_prob skip_mode_cdfs[SKIP_CONTEXTS][CDF_SIZE(2)]; - aom_cdf_prob skip_cdfs[SKIP_CONTEXTS][CDF_SIZE(2)]; - aom_cdf_prob intra_inter_cdf[INTRA_INTER_CONTEXTS][CDF_SIZE(2)]; - nmv_context nmvc; - nmv_context ndvc; - aom_cdf_prob intrabc_cdf[CDF_SIZE(2)]; - struct segmentation_probs seg; - aom_cdf_prob filter_intra_cdfs[BLOCK_SIZES_ALL][CDF_SIZE(2)]; - aom_cdf_prob filter_intra_mode_cdf[CDF_SIZE(FILTER_INTRA_MODES)]; - aom_cdf_prob switchable_restore_cdf[CDF_SIZE(RESTORE_SWITCHABLE_TYPES)]; - aom_cdf_prob wiener_restore_cdf[CDF_SIZE(2)]; - aom_cdf_prob sgrproj_restore_cdf[CDF_SIZE(2)]; - aom_cdf_prob y_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(INTRA_MODES)]; - aom_cdf_prob uv_mode_cdf[CFL_ALLOWED_TYPES][INTRA_MODES] - [CDF_SIZE(UV_INTRA_MODES)]; - aom_cdf_prob partition_cdf[PARTITION_CONTEXTS][CDF_SIZE(EXT_PARTITION_TYPES)]; - aom_cdf_prob switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS] - [CDF_SIZE(SWITCHABLE_FILTERS)]; - /* kf_y_cdf is discarded after use, so does not require persistent storage. - However, we keep it with the other CDFs in this struct since it needs to - be copied to each tile to support parallelism just like the others. - */ - aom_cdf_prob kf_y_cdf[KF_MODE_CONTEXTS][KF_MODE_CONTEXTS] - [CDF_SIZE(INTRA_MODES)]; - - aom_cdf_prob angle_delta_cdf[DIRECTIONAL_MODES] - [CDF_SIZE(2 * MAX_ANGLE_DELTA + 1)]; - - aom_cdf_prob tx_size_cdf[MAX_TX_CATS][TX_SIZE_CONTEXTS] - [CDF_SIZE(MAX_TX_DEPTH + 1)]; - aom_cdf_prob delta_q_cdf[CDF_SIZE(DELTA_Q_PROBS + 1)]; - aom_cdf_prob delta_lf_multi_cdf[FRAME_LF_COUNT][CDF_SIZE(DELTA_LF_PROBS + 1)]; - aom_cdf_prob delta_lf_cdf[CDF_SIZE(DELTA_LF_PROBS + 1)]; - aom_cdf_prob intra_ext_tx_cdf[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] - [CDF_SIZE(TX_TYPES)]; - aom_cdf_prob inter_ext_tx_cdf[EXT_TX_SETS_INTER][EXT_TX_SIZES] - [CDF_SIZE(TX_TYPES)]; - aom_cdf_prob cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)]; - aom_cdf_prob cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)]; - int initialized; -} FRAME_CONTEXT; - -static const int av1_ext_tx_ind[EXT_TX_SET_TYPES][TX_TYPES] = { - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 1, 3, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 1, 5, 6, 4, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0 }, - { 3, 4, 5, 8, 6, 7, 9, 10, 11, 0, 1, 2, 0, 0, 0, 0 }, - { 7, 8, 9, 12, 10, 11, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6 }, -}; - -static const int av1_ext_tx_inv[EXT_TX_SET_TYPES][TX_TYPES] = { - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 9, 0, 3, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 9, 0, 10, 11, 3, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 9, 10, 11, 0, 1, 2, 4, 5, 3, 6, 7, 8, 0, 0, 0, 0 }, - { 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 4, 5, 3, 6, 7, 8 }, -}; - -void av1_set_default_ref_deltas(int8_t *ref_deltas); -void av1_set_default_mode_deltas(int8_t *mode_deltas); -void av1_setup_frame_contexts(struct AV1Common *cm); -void av1_setup_past_independence(struct AV1Common *cm); - -// Returns (int)ceil(log2(n)). -// NOTE: This implementation only works for n <= 2^30. -static INLINE int av1_ceil_log2(int n) { - if (n < 2) return 0; - int i = 1, p = 2; - while (p < n) { - i++; - p = p << 1; - } - return i; -} - -// Returns the context for palette color index at row 'r' and column 'c', -// along with the 'color_order' of neighbors and the 'color_idx'. -// The 'color_map' is a 2D array with the given 'stride'. -int av1_get_palette_color_index_context(const uint8_t *color_map, int stride, - int r, int c, int palette_size, - uint8_t *color_order, int *color_idx); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_ENTROPYMODE_H_ diff --git a/third_party/aom/av1/common/entropymv.c b/third_party/aom/av1/common/entropymv.c deleted file mode 100644 index 491337387..000000000 --- a/third_party/aom/av1/common/entropymv.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "av1/common/onyxc_int.h" -#include "av1/common/entropymv.h" - -static const nmv_context default_nmv_context = { - { AOM_CDF4(4096, 11264, 19328) }, // joints_cdf - { { - // Vertical component - { AOM_CDF11(28672, 30976, 31858, 32320, 32551, 32656, 32740, 32757, - 32762, 32767) }, // class_cdf // fp - { { AOM_CDF4(16384, 24576, 26624) }, - { AOM_CDF4(12288, 21248, 24128) } }, // class0_fp_cdf - { AOM_CDF4(8192, 17408, 21248) }, // fp_cdf - { AOM_CDF2(128 * 128) }, // sign_cdf - { AOM_CDF2(160 * 128) }, // class0_hp_cdf - { AOM_CDF2(128 * 128) }, // hp_cdf - { AOM_CDF2(216 * 128) }, // class0_cdf - { { AOM_CDF2(128 * 136) }, - { AOM_CDF2(128 * 140) }, - { AOM_CDF2(128 * 148) }, - { AOM_CDF2(128 * 160) }, - { AOM_CDF2(128 * 176) }, - { AOM_CDF2(128 * 192) }, - { AOM_CDF2(128 * 224) }, - { AOM_CDF2(128 * 234) }, - { AOM_CDF2(128 * 234) }, - { AOM_CDF2(128 * 240) } }, // bits_cdf - }, - { - // Horizontal component - { AOM_CDF11(28672, 30976, 31858, 32320, 32551, 32656, 32740, 32757, - 32762, 32767) }, // class_cdf // fp - { { AOM_CDF4(16384, 24576, 26624) }, - { AOM_CDF4(12288, 21248, 24128) } }, // class0_fp_cdf - { AOM_CDF4(8192, 17408, 21248) }, // fp_cdf - { AOM_CDF2(128 * 128) }, // sign_cdf - { AOM_CDF2(160 * 128) }, // class0_hp_cdf - { AOM_CDF2(128 * 128) }, // hp_cdf - { AOM_CDF2(216 * 128) }, // class0_cdf - { { AOM_CDF2(128 * 136) }, - { AOM_CDF2(128 * 140) }, - { AOM_CDF2(128 * 148) }, - { AOM_CDF2(128 * 160) }, - { AOM_CDF2(128 * 176) }, - { AOM_CDF2(128 * 192) }, - { AOM_CDF2(128 * 224) }, - { AOM_CDF2(128 * 234) }, - { AOM_CDF2(128 * 234) }, - { AOM_CDF2(128 * 240) } }, // bits_cdf - } }, -}; - -void av1_init_mv_probs(AV1_COMMON *cm) { - // NB: this sets CDFs too - cm->fc->nmvc = default_nmv_context; - cm->fc->ndvc = default_nmv_context; -} diff --git a/third_party/aom/av1/common/entropymv.h b/third_party/aom/av1/common/entropymv.h deleted file mode 100644 index fa818a2c1..000000000 --- a/third_party/aom/av1/common/entropymv.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_ENTROPYMV_H_ -#define AOM_AV1_COMMON_ENTROPYMV_H_ - -#include "config/aom_config.h" - -#include "aom_dsp/prob.h" - -#include "av1/common/mv.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct AV1Common; - -void av1_init_mv_probs(struct AV1Common *cm); - -#define MV_UPDATE_PROB 252 - -/* Symbols for coding which components are zero jointly */ -#define MV_JOINTS 4 -typedef enum { - MV_JOINT_ZERO = 0, /* Zero vector */ - MV_JOINT_HNZVZ = 1, /* Vert zero, hor nonzero */ - MV_JOINT_HZVNZ = 2, /* Hor zero, vert nonzero */ - MV_JOINT_HNZVNZ = 3, /* Both components nonzero */ -} MV_JOINT_TYPE; - -static INLINE int mv_joint_vertical(MV_JOINT_TYPE type) { - return type == MV_JOINT_HZVNZ || type == MV_JOINT_HNZVNZ; -} - -static INLINE int mv_joint_horizontal(MV_JOINT_TYPE type) { - return type == MV_JOINT_HNZVZ || type == MV_JOINT_HNZVNZ; -} - -/* Symbols for coding magnitude class of nonzero components */ -#define MV_CLASSES 11 -typedef enum { - MV_CLASS_0 = 0, /* (0, 2] integer pel */ - MV_CLASS_1 = 1, /* (2, 4] integer pel */ - MV_CLASS_2 = 2, /* (4, 8] integer pel */ - MV_CLASS_3 = 3, /* (8, 16] integer pel */ - MV_CLASS_4 = 4, /* (16, 32] integer pel */ - MV_CLASS_5 = 5, /* (32, 64] integer pel */ - MV_CLASS_6 = 6, /* (64, 128] integer pel */ - MV_CLASS_7 = 7, /* (128, 256] integer pel */ - MV_CLASS_8 = 8, /* (256, 512] integer pel */ - MV_CLASS_9 = 9, /* (512, 1024] integer pel */ - MV_CLASS_10 = 10, /* (1024,2048] integer pel */ -} MV_CLASS_TYPE; - -#define CLASS0_BITS 1 /* bits at integer precision for class 0 */ -#define CLASS0_SIZE (1 << CLASS0_BITS) -#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2) -#define MV_BITS_CONTEXTS 6 -#define MV_FP_SIZE 4 - -#define MV_MAX_BITS (MV_CLASSES + CLASS0_BITS + 2) -#define MV_MAX ((1 << MV_MAX_BITS) - 1) -#define MV_VALS ((MV_MAX << 1) + 1) - -#define MV_IN_USE_BITS 14 -#define MV_UPP (1 << MV_IN_USE_BITS) -#define MV_LOW (-(1 << MV_IN_USE_BITS)) - -typedef struct { - aom_cdf_prob classes_cdf[CDF_SIZE(MV_CLASSES)]; - aom_cdf_prob class0_fp_cdf[CLASS0_SIZE][CDF_SIZE(MV_FP_SIZE)]; - aom_cdf_prob fp_cdf[CDF_SIZE(MV_FP_SIZE)]; - aom_cdf_prob sign_cdf[CDF_SIZE(2)]; - aom_cdf_prob class0_hp_cdf[CDF_SIZE(2)]; - aom_cdf_prob hp_cdf[CDF_SIZE(2)]; - aom_cdf_prob class0_cdf[CDF_SIZE(CLASS0_SIZE)]; - aom_cdf_prob bits_cdf[MV_OFFSET_BITS][CDF_SIZE(2)]; -} nmv_component; - -typedef struct { - aom_cdf_prob joints_cdf[CDF_SIZE(MV_JOINTS)]; - nmv_component comps[2]; -} nmv_context; - -typedef enum { - MV_SUBPEL_NONE = -1, - MV_SUBPEL_LOW_PRECISION = 0, - MV_SUBPEL_HIGH_PRECISION, -} MvSubpelPrecision; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_ENTROPYMV_H_ diff --git a/third_party/aom/av1/common/enums.h b/third_party/aom/av1/common/enums.h deleted file mode 100644 index 869c06ef2..000000000 --- a/third_party/aom/av1/common/enums.h +++ /dev/null @@ -1,619 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_ENUMS_H_ -#define AOM_AV1_COMMON_ENUMS_H_ - -#include "config/aom_config.h" - -#include "aom/aom_codec.h" -#include "aom/aom_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#undef MAX_SB_SIZE - -// Max superblock size -#define MAX_SB_SIZE_LOG2 7 -#define MAX_SB_SIZE (1 << MAX_SB_SIZE_LOG2) -#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE) - -// Min superblock size -#define MIN_SB_SIZE_LOG2 6 - -// Pixels per Mode Info (MI) unit -#define MI_SIZE_LOG2 2 -#define MI_SIZE (1 << MI_SIZE_LOG2) - -// MI-units per max superblock (MI Block - MIB) -#define MAX_MIB_SIZE_LOG2 (MAX_SB_SIZE_LOG2 - MI_SIZE_LOG2) -#define MAX_MIB_SIZE (1 << MAX_MIB_SIZE_LOG2) - -// MI-units per min superblock -#define MIN_MIB_SIZE_LOG2 (MIN_SB_SIZE_LOG2 - MI_SIZE_LOG2) - -// Mask to extract MI offset within max MIB -#define MAX_MIB_MASK (MAX_MIB_SIZE - 1) - -// Maximum number of tile rows and tile columns -#define MAX_TILE_ROWS 64 -#define MAX_TILE_COLS 64 - -#define MAX_VARTX_DEPTH 2 - -#define MI_SIZE_64X64 (64 >> MI_SIZE_LOG2) -#define MI_SIZE_128X128 (128 >> MI_SIZE_LOG2) - -#define MAX_PALETTE_SQUARE (64 * 64) -// Maximum number of colors in a palette. -#define PALETTE_MAX_SIZE 8 -// Minimum number of colors in a palette. -#define PALETTE_MIN_SIZE 2 - -#define FRAME_OFFSET_BITS 5 -#define MAX_FRAME_DISTANCE ((1 << FRAME_OFFSET_BITS) - 1) - -#define REF_FRAMES_LOG2 3 -#define REF_FRAMES (1 << REF_FRAMES_LOG2) - -// 4 scratch frames for the new frames to support a maximum of 4 cores decoding -// in parallel, 3 for scaled references on the encoder. -// TODO(hkuang): Add ondemand frame buffers instead of hardcoding the number -// of framebuffers. -// TODO(jkoleszar): These 3 extra references could probably come from the -// normal reference pool. -#define FRAME_BUFFERS (REF_FRAMES + 7) - -// 4 frame filter levels: y plane vertical, y plane horizontal, -// u plane, and v plane -#define FRAME_LF_COUNT 4 -#define DEFAULT_DELTA_LF_MULTI 0 -#define MAX_MODE_LF_DELTAS 2 - -#define DIST_PRECISION_BITS 4 -#define DIST_PRECISION (1 << DIST_PRECISION_BITS) // 16 - -// TODO(chengchen): Temporal flag serve as experimental flag for WIP -// bitmask construction. -// Shall be removed when bitmask code is completely checkedin -#define LOOP_FILTER_BITMASK 0 - -#define PROFILE_BITS 3 -// The following three profiles are currently defined. -// Profile 0. 8-bit and 10-bit 4:2:0 and 4:0:0 only. -// Profile 1. 8-bit and 10-bit 4:4:4 -// Profile 2. 8-bit and 10-bit 4:2:2 -// 12-bit 4:0:0, 4:2:2 and 4:4:4 -// Since we have three bits for the profiles, it can be extended later. -typedef enum BITSTREAM_PROFILE { - PROFILE_0, - PROFILE_1, - PROFILE_2, - MAX_PROFILES, -} BITSTREAM_PROFILE; - -#define LEVEL_MAJOR_BITS 3 -#define LEVEL_MINOR_BITS 2 -#define LEVEL_BITS (LEVEL_MAJOR_BITS + LEVEL_MINOR_BITS) - -#define LEVEL_MAJOR_MIN 2 -#define LEVEL_MAJOR_MAX ((1 << LEVEL_MAJOR_BITS) - 1 + LEVEL_MAJOR_MIN) -#define LEVEL_MINOR_MIN 0 -#define LEVEL_MINOR_MAX ((1 << LEVEL_MINOR_BITS) - 1) - -#define OP_POINTS_CNT_MINUS_1_BITS 5 -#define OP_POINTS_IDC_BITS 12 - -// Note: Some enums use the attribute 'packed' to use smallest possible integer -// type, so that we can save memory when they are used in structs/arrays. - -typedef enum ATTRIBUTE_PACKED { - BLOCK_4X4, - BLOCK_4X8, - BLOCK_8X4, - BLOCK_8X8, - BLOCK_8X16, - BLOCK_16X8, - BLOCK_16X16, - BLOCK_16X32, - BLOCK_32X16, - BLOCK_32X32, - BLOCK_32X64, - BLOCK_64X32, - BLOCK_64X64, - BLOCK_64X128, - BLOCK_128X64, - BLOCK_128X128, - BLOCK_4X16, - BLOCK_16X4, - BLOCK_8X32, - BLOCK_32X8, - BLOCK_16X64, - BLOCK_64X16, - BLOCK_SIZES_ALL, - BLOCK_SIZES = BLOCK_4X16, - BLOCK_INVALID = 255, - BLOCK_LARGEST = (BLOCK_SIZES - 1) -} BLOCK_SIZE; - -// 4X4, 8X8, 16X16, 32X32, 64X64, 128X128 -#define SQR_BLOCK_SIZES 6 - -typedef enum ATTRIBUTE_PACKED { - PARTITION_NONE, - PARTITION_HORZ, - PARTITION_VERT, - PARTITION_SPLIT, - PARTITION_HORZ_A, // HORZ split and the top partition is split again - PARTITION_HORZ_B, // HORZ split and the bottom partition is split again - PARTITION_VERT_A, // VERT split and the left partition is split again - PARTITION_VERT_B, // VERT split and the right partition is split again - PARTITION_HORZ_4, // 4:1 horizontal partition - PARTITION_VERT_4, // 4:1 vertical partition - EXT_PARTITION_TYPES, - PARTITION_TYPES = PARTITION_SPLIT + 1, - PARTITION_INVALID = 255 -} PARTITION_TYPE; - -typedef char PARTITION_CONTEXT; -#define PARTITION_PLOFFSET 4 // number of probability models per block size -#define PARTITION_BLOCK_SIZES 5 -#define PARTITION_CONTEXTS (PARTITION_BLOCK_SIZES * PARTITION_PLOFFSET) - -// block transform size -#if defined(_MSC_VER) -typedef uint8_t TX_SIZE; -enum ATTRIBUTE_PACKED { -#else -typedef enum ATTRIBUTE_PACKED { -#endif - TX_4X4, // 4x4 transform - TX_8X8, // 8x8 transform - TX_16X16, // 16x16 transform - TX_32X32, // 32x32 transform - TX_64X64, // 64x64 transform - TX_4X8, // 4x8 transform - TX_8X4, // 8x4 transform - TX_8X16, // 8x16 transform - TX_16X8, // 16x8 transform - TX_16X32, // 16x32 transform - TX_32X16, // 32x16 transform - TX_32X64, // 32x64 transform - TX_64X32, // 64x32 transform - TX_4X16, // 4x16 transform - TX_16X4, // 16x4 transform - TX_8X32, // 8x32 transform - TX_32X8, // 32x8 transform - TX_16X64, // 16x64 transform - TX_64X16, // 64x16 transform - TX_SIZES_ALL, // Includes rectangular transforms - TX_SIZES = TX_4X8, // Does NOT include rectangular transforms - TX_SIZES_LARGEST = TX_64X64, - TX_INVALID = 255 // Invalid transform size -#if defined(_MSC_VER) -}; -#else -} TX_SIZE; -#endif - -#define TX_SIZE_LUMA_MIN (TX_4X4) -/* We don't need to code a transform size unless the allowed size is at least - one more than the minimum. */ -#define TX_SIZE_CTX_MIN (TX_SIZE_LUMA_MIN + 1) - -// Maximum tx_size categories -#define MAX_TX_CATS (TX_SIZES - TX_SIZE_CTX_MIN) -#define MAX_TX_DEPTH 2 - -#define MAX_TX_SIZE_LOG2 (6) -#define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2) -#define MIN_TX_SIZE_LOG2 2 -#define MIN_TX_SIZE (1 << MIN_TX_SIZE_LOG2) -#define MAX_TX_SQUARE (MAX_TX_SIZE * MAX_TX_SIZE) - -// Pad 4 extra columns to remove horizontal availability check. -#define TX_PAD_HOR_LOG2 2 -#define TX_PAD_HOR 4 -// Pad 6 extra rows (2 on top and 4 on bottom) to remove vertical availability -// check. -#define TX_PAD_TOP 2 -#define TX_PAD_BOTTOM 4 -#define TX_PAD_VER (TX_PAD_TOP + TX_PAD_BOTTOM) -// Pad 16 extra bytes to avoid reading overflow in SIMD optimization. -#define TX_PAD_END 16 -#define TX_PAD_2D ((32 + TX_PAD_HOR) * (32 + TX_PAD_VER) + TX_PAD_END) - -// Number of maxium size transform blocks in the maximum size superblock -#define MAX_TX_BLOCKS_IN_MAX_SB_LOG2 ((MAX_SB_SIZE_LOG2 - MAX_TX_SIZE_LOG2) * 2) -#define MAX_TX_BLOCKS_IN_MAX_SB (1 << MAX_TX_BLOCKS_IN_MAX_SB_LOG2) - -// frame transform mode -typedef enum ATTRIBUTE_PACKED { - ONLY_4X4, // use only 4x4 transform - TX_MODE_LARGEST, // transform size is the largest possible for pu size - TX_MODE_SELECT, // transform specified for each block - TX_MODES, -} TX_MODE; - -// 1D tx types -typedef enum ATTRIBUTE_PACKED { - DCT_1D, - ADST_1D, - FLIPADST_1D, - IDTX_1D, - TX_TYPES_1D, -} TX_TYPE_1D; - -typedef enum ATTRIBUTE_PACKED { - DCT_DCT, // DCT in both horizontal and vertical - ADST_DCT, // ADST in vertical, DCT in horizontal - DCT_ADST, // DCT in vertical, ADST in horizontal - ADST_ADST, // ADST in both directions - FLIPADST_DCT, - DCT_FLIPADST, - FLIPADST_FLIPADST, - ADST_FLIPADST, - FLIPADST_ADST, - IDTX, - V_DCT, - H_DCT, - V_ADST, - H_ADST, - V_FLIPADST, - H_FLIPADST, - TX_TYPES, -} TX_TYPE; - -typedef enum ATTRIBUTE_PACKED { - REG_REG, - REG_SMOOTH, - REG_SHARP, - SMOOTH_REG, - SMOOTH_SMOOTH, - SMOOTH_SHARP, - SHARP_REG, - SHARP_SMOOTH, - SHARP_SHARP, -} DUAL_FILTER_TYPE; - -typedef enum ATTRIBUTE_PACKED { - // DCT only - EXT_TX_SET_DCTONLY, - // DCT + Identity only - EXT_TX_SET_DCT_IDTX, - // Discrete Trig transforms w/o flip (4) + Identity (1) - EXT_TX_SET_DTT4_IDTX, - // Discrete Trig transforms w/o flip (4) + Identity (1) + 1D Hor/vert DCT (2) - EXT_TX_SET_DTT4_IDTX_1DDCT, - // Discrete Trig transforms w/ flip (9) + Identity (1) + 1D Hor/Ver DCT (2) - EXT_TX_SET_DTT9_IDTX_1DDCT, - // Discrete Trig transforms w/ flip (9) + Identity (1) + 1D Hor/Ver (6) - EXT_TX_SET_ALL16, - EXT_TX_SET_TYPES -} TxSetType; - -#define IS_2D_TRANSFORM(tx_type) (tx_type < IDTX) - -#define EXT_TX_SIZES 4 // number of sizes that use extended transforms -#define EXT_TX_SETS_INTER 4 // Sets of transform selections for INTER -#define EXT_TX_SETS_INTRA 3 // Sets of transform selections for INTRA - -typedef enum ATTRIBUTE_PACKED { - AOM_LAST_FLAG = 1 << 0, - AOM_LAST2_FLAG = 1 << 1, - AOM_LAST3_FLAG = 1 << 2, - AOM_GOLD_FLAG = 1 << 3, - AOM_BWD_FLAG = 1 << 4, - AOM_ALT2_FLAG = 1 << 5, - AOM_ALT_FLAG = 1 << 6, - AOM_REFFRAME_ALL = (1 << 7) - 1 -} AOM_REFFRAME; - -typedef enum ATTRIBUTE_PACKED { - UNIDIR_COMP_REFERENCE, - BIDIR_COMP_REFERENCE, - COMP_REFERENCE_TYPES, -} COMP_REFERENCE_TYPE; - -typedef enum ATTRIBUTE_PACKED { - PLANE_TYPE_Y, - PLANE_TYPE_UV, - PLANE_TYPES -} PLANE_TYPE; - -#define CFL_ALPHABET_SIZE_LOG2 4 -#define CFL_ALPHABET_SIZE (1 << CFL_ALPHABET_SIZE_LOG2) -#define CFL_MAGS_SIZE ((2 << CFL_ALPHABET_SIZE_LOG2) + 1) -#define CFL_IDX_U(idx) (idx >> CFL_ALPHABET_SIZE_LOG2) -#define CFL_IDX_V(idx) (idx & (CFL_ALPHABET_SIZE - 1)) - -typedef enum ATTRIBUTE_PACKED { - CFL_PRED_U, - CFL_PRED_V, - CFL_PRED_PLANES -} CFL_PRED_TYPE; - -typedef enum ATTRIBUTE_PACKED { - CFL_SIGN_ZERO, - CFL_SIGN_NEG, - CFL_SIGN_POS, - CFL_SIGNS -} CFL_SIGN_TYPE; - -typedef enum ATTRIBUTE_PACKED { - CFL_DISALLOWED, - CFL_ALLOWED, - CFL_ALLOWED_TYPES -} CFL_ALLOWED_TYPE; - -// CFL_SIGN_ZERO,CFL_SIGN_ZERO is invalid -#define CFL_JOINT_SIGNS (CFL_SIGNS * CFL_SIGNS - 1) -// CFL_SIGN_U is equivalent to (js + 1) / 3 for js in 0 to 8 -#define CFL_SIGN_U(js) (((js + 1) * 11) >> 5) -// CFL_SIGN_V is equivalent to (js + 1) % 3 for js in 0 to 8 -#define CFL_SIGN_V(js) ((js + 1) - CFL_SIGNS * CFL_SIGN_U(js)) - -// There is no context when the alpha for a given plane is zero. -// So there are 2 fewer contexts than joint signs. -#define CFL_ALPHA_CONTEXTS (CFL_JOINT_SIGNS + 1 - CFL_SIGNS) -#define CFL_CONTEXT_U(js) (js + 1 - CFL_SIGNS) -// Also, the contexts are symmetric under swapping the planes. -#define CFL_CONTEXT_V(js) \ - (CFL_SIGN_V(js) * CFL_SIGNS + CFL_SIGN_U(js) - CFL_SIGNS) - -typedef enum ATTRIBUTE_PACKED { - PALETTE_MAP, - COLOR_MAP_TYPES, -} COLOR_MAP_TYPE; - -typedef enum ATTRIBUTE_PACKED { - TWO_COLORS, - THREE_COLORS, - FOUR_COLORS, - FIVE_COLORS, - SIX_COLORS, - SEVEN_COLORS, - EIGHT_COLORS, - PALETTE_SIZES -} PALETTE_SIZE; - -typedef enum ATTRIBUTE_PACKED { - PALETTE_COLOR_ONE, - PALETTE_COLOR_TWO, - PALETTE_COLOR_THREE, - PALETTE_COLOR_FOUR, - PALETTE_COLOR_FIVE, - PALETTE_COLOR_SIX, - PALETTE_COLOR_SEVEN, - PALETTE_COLOR_EIGHT, - PALETTE_COLORS -} PALETTE_COLOR; - -// Note: All directional predictors must be between V_PRED and D67_PRED (both -// inclusive). -typedef enum ATTRIBUTE_PACKED { - DC_PRED, // Average of above and left pixels - V_PRED, // Vertical - H_PRED, // Horizontal - D45_PRED, // Directional 45 degree - D135_PRED, // Directional 135 degree - D113_PRED, // Directional 113 degree - D157_PRED, // Directional 157 degree - D203_PRED, // Directional 203 degree - D67_PRED, // Directional 67 degree - SMOOTH_PRED, // Combination of horizontal and vertical interpolation - SMOOTH_V_PRED, // Vertical interpolation - SMOOTH_H_PRED, // Horizontal interpolation - PAETH_PRED, // Predict from the direction of smallest gradient - NEARESTMV, - NEARMV, - GLOBALMV, - NEWMV, - // Compound ref compound modes - NEAREST_NEARESTMV, - NEAR_NEARMV, - NEAREST_NEWMV, - NEW_NEARESTMV, - NEAR_NEWMV, - NEW_NEARMV, - GLOBAL_GLOBALMV, - NEW_NEWMV, - MB_MODE_COUNT, - INTRA_MODE_START = DC_PRED, - INTRA_MODE_END = NEARESTMV, - INTRA_MODE_NUM = INTRA_MODE_END - INTRA_MODE_START, - SINGLE_INTER_MODE_START = NEARESTMV, - SINGLE_INTER_MODE_END = NEAREST_NEARESTMV, - SINGLE_INTER_MODE_NUM = SINGLE_INTER_MODE_END - SINGLE_INTER_MODE_START, - COMP_INTER_MODE_START = NEAREST_NEARESTMV, - COMP_INTER_MODE_END = MB_MODE_COUNT, - COMP_INTER_MODE_NUM = COMP_INTER_MODE_END - COMP_INTER_MODE_START, - INTER_MODE_START = NEARESTMV, - INTER_MODE_END = MB_MODE_COUNT, - INTRA_MODES = PAETH_PRED + 1, // PAETH_PRED has to be the last intra mode. - INTRA_INVALID = MB_MODE_COUNT // For uv_mode in inter blocks -} PREDICTION_MODE; - -// TODO(ltrudeau) Do we really want to pack this? -// TODO(ltrudeau) Do we match with PREDICTION_MODE? -typedef enum ATTRIBUTE_PACKED { - UV_DC_PRED, // Average of above and left pixels - UV_V_PRED, // Vertical - UV_H_PRED, // Horizontal - UV_D45_PRED, // Directional 45 degree - UV_D135_PRED, // Directional 135 degree - UV_D113_PRED, // Directional 113 degree - UV_D157_PRED, // Directional 157 degree - UV_D203_PRED, // Directional 203 degree - UV_D67_PRED, // Directional 67 degree - UV_SMOOTH_PRED, // Combination of horizontal and vertical interpolation - UV_SMOOTH_V_PRED, // Vertical interpolation - UV_SMOOTH_H_PRED, // Horizontal interpolation - UV_PAETH_PRED, // Predict from the direction of smallest gradient - UV_CFL_PRED, // Chroma-from-Luma - UV_INTRA_MODES, - UV_MODE_INVALID, // For uv_mode in inter blocks -} UV_PREDICTION_MODE; - -typedef enum ATTRIBUTE_PACKED { - SIMPLE_TRANSLATION, - OBMC_CAUSAL, // 2-sided OBMC - WARPED_CAUSAL, // 2-sided WARPED - MOTION_MODES -} MOTION_MODE; - -typedef enum ATTRIBUTE_PACKED { - II_DC_PRED, - II_V_PRED, - II_H_PRED, - II_SMOOTH_PRED, - INTERINTRA_MODES -} INTERINTRA_MODE; - -typedef enum ATTRIBUTE_PACKED { - COMPOUND_AVERAGE, - COMPOUND_WEDGE, - COMPOUND_DIFFWTD, - COMPOUND_TYPES, -} COMPOUND_TYPE; - -typedef enum ATTRIBUTE_PACKED { - FILTER_DC_PRED, - FILTER_V_PRED, - FILTER_H_PRED, - FILTER_D157_PRED, - FILTER_PAETH_PRED, - FILTER_INTRA_MODES, -} FILTER_INTRA_MODE; - -#define DIRECTIONAL_MODES 8 -#define MAX_ANGLE_DELTA 3 -#define ANGLE_STEP 3 - -#define INTER_MODES (1 + NEWMV - NEARESTMV) - -#define INTER_COMPOUND_MODES (1 + NEW_NEWMV - NEAREST_NEARESTMV) - -#define SKIP_CONTEXTS 3 -#define SKIP_MODE_CONTEXTS 3 - -#define COMP_INDEX_CONTEXTS 6 -#define COMP_GROUP_IDX_CONTEXTS 6 - -#define NMV_CONTEXTS 3 - -#define NEWMV_MODE_CONTEXTS 6 -#define GLOBALMV_MODE_CONTEXTS 2 -#define REFMV_MODE_CONTEXTS 6 -#define DRL_MODE_CONTEXTS 3 - -#define GLOBALMV_OFFSET 3 -#define REFMV_OFFSET 4 - -#define NEWMV_CTX_MASK ((1 << GLOBALMV_OFFSET) - 1) -#define GLOBALMV_CTX_MASK ((1 << (REFMV_OFFSET - GLOBALMV_OFFSET)) - 1) -#define REFMV_CTX_MASK ((1 << (8 - REFMV_OFFSET)) - 1) - -#define COMP_NEWMV_CTXS 5 -#define INTER_MODE_CONTEXTS 8 - -#define DELTA_Q_SMALL 3 -#define DELTA_Q_PROBS (DELTA_Q_SMALL) -#define DEFAULT_DELTA_Q_RES 4 -#define DELTA_LF_SMALL 3 -#define DELTA_LF_PROBS (DELTA_LF_SMALL) -#define DEFAULT_DELTA_LF_RES 2 - -/* Segment Feature Masks */ -#define MAX_MV_REF_CANDIDATES 2 - -#define MAX_REF_MV_STACK_SIZE 8 -#define REF_CAT_LEVEL 640 - -#define INTRA_INTER_CONTEXTS 4 -#define COMP_INTER_CONTEXTS 5 -#define REF_CONTEXTS 3 - -#define COMP_REF_TYPE_CONTEXTS 5 -#define UNI_COMP_REF_CONTEXTS 3 - -#define TXFM_PARTITION_CONTEXTS ((TX_SIZES - TX_8X8) * 6 - 3) -typedef uint8_t TXFM_CONTEXT; - -#define NONE_FRAME -1 -#define INTRA_FRAME 0 -#define LAST_FRAME 1 -#define LAST2_FRAME 2 -#define LAST3_FRAME 3 -#define GOLDEN_FRAME 4 -#define BWDREF_FRAME 5 -#define ALTREF2_FRAME 6 -#define ALTREF_FRAME 7 -#define EXTREF_FRAME REF_FRAMES -#define LAST_REF_FRAMES (LAST3_FRAME - LAST_FRAME + 1) - -#define INTER_REFS_PER_FRAME (ALTREF_FRAME - LAST_FRAME + 1) - -#define FWD_REFS (GOLDEN_FRAME - LAST_FRAME + 1) -#define FWD_RF_OFFSET(ref) (ref - LAST_FRAME) -#define BWD_REFS (ALTREF_FRAME - BWDREF_FRAME + 1) -#define BWD_RF_OFFSET(ref) (ref - BWDREF_FRAME) - -#define SINGLE_REFS (FWD_REFS + BWD_REFS) - -typedef enum ATTRIBUTE_PACKED { - LAST_LAST2_FRAMES, // { LAST_FRAME, LAST2_FRAME } - LAST_LAST3_FRAMES, // { LAST_FRAME, LAST3_FRAME } - LAST_GOLDEN_FRAMES, // { LAST_FRAME, GOLDEN_FRAME } - BWDREF_ALTREF_FRAMES, // { BWDREF_FRAME, ALTREF_FRAME } - LAST2_LAST3_FRAMES, // { LAST2_FRAME, LAST3_FRAME } - LAST2_GOLDEN_FRAMES, // { LAST2_FRAME, GOLDEN_FRAME } - LAST3_GOLDEN_FRAMES, // { LAST3_FRAME, GOLDEN_FRAME } - BWDREF_ALTREF2_FRAMES, // { BWDREF_FRAME, ALTREF2_FRAME } - ALTREF2_ALTREF_FRAMES, // { ALTREF2_FRAME, ALTREF_FRAME } - TOTAL_UNIDIR_COMP_REFS, - // NOTE: UNIDIR_COMP_REFS is the number of uni-directional reference pairs - // that are explicitly signaled. - UNIDIR_COMP_REFS = BWDREF_ALTREF_FRAMES + 1, -} UNIDIR_COMP_REF; - -#define TOTAL_COMP_REFS (FWD_REFS * BWD_REFS + TOTAL_UNIDIR_COMP_REFS) - -#define COMP_REFS (FWD_REFS * BWD_REFS + UNIDIR_COMP_REFS) - -// NOTE: A limited number of unidirectional reference pairs can be signalled for -// compound prediction. The use of skip mode, on the other hand, makes it -// possible to have a reference pair not listed for explicit signaling. -#define MODE_CTX_REF_FRAMES (REF_FRAMES + TOTAL_COMP_REFS) - -typedef enum ATTRIBUTE_PACKED { - RESTORE_NONE, - RESTORE_WIENER, - RESTORE_SGRPROJ, - RESTORE_SWITCHABLE, - RESTORE_SWITCHABLE_TYPES = RESTORE_SWITCHABLE, - RESTORE_TYPES = 4, -} RestorationType; - -#define SUPERRES_SCALE_BITS 3 -#define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1) - -// In large_scale_tile coding, external references are used. -#define MAX_EXTERNAL_REFERENCES 128 -#define MAX_TILES 512 - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_ENUMS_H_ diff --git a/third_party/aom/av1/common/filter.h b/third_party/aom/av1/common/filter.h deleted file mode 100644 index 571422d11..000000000 --- a/third_party/aom/av1/common/filter.h +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_FILTER_H_ -#define AOM_AV1_COMMON_FILTER_H_ - -#include <assert.h> - -#include "config/aom_config.h" - -#include "aom/aom_integer.h" -#include "aom_dsp/aom_filter.h" -#include "aom_ports/mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_FILTER_TAP 8 - -typedef enum ATTRIBUTE_PACKED { - EIGHTTAP_REGULAR, - EIGHTTAP_SMOOTH, - MULTITAP_SHARP, - BILINEAR, - INTERP_FILTERS_ALL, - SWITCHABLE_FILTERS = BILINEAR, - SWITCHABLE = SWITCHABLE_FILTERS + 1, /* the last switchable one */ - EXTRA_FILTERS = INTERP_FILTERS_ALL - SWITCHABLE_FILTERS, -} InterpFilter; - -// With CONFIG_DUAL_FILTER, pack two InterpFilter's into a uint32_t: since -// there are at most 10 filters, we can use 16 bits for each and have more than -// enough space. This reduces argument passing and unifies the operation of -// setting a (pair of) filters. -// -// Without CONFIG_DUAL_FILTER, -typedef uint32_t InterpFilters; -static INLINE InterpFilter av1_extract_interp_filter(InterpFilters filters, - int x_filter) { - return (InterpFilter)((filters >> (x_filter ? 16 : 0)) & 0xf); -} - -static INLINE InterpFilters av1_make_interp_filters(InterpFilter y_filter, - InterpFilter x_filter) { - uint16_t y16 = y_filter & 0xf; - uint16_t x16 = x_filter & 0xf; - return y16 | ((uint32_t)x16 << 16); -} - -static INLINE InterpFilters av1_broadcast_interp_filter(InterpFilter filter) { - return av1_make_interp_filters(filter, filter); -} - -static INLINE InterpFilter av1_unswitchable_filter(InterpFilter filter) { - return filter == SWITCHABLE ? EIGHTTAP_REGULAR : filter; -} - -/* (1 << LOG_SWITCHABLE_FILTERS) > SWITCHABLE_FILTERS */ -#define LOG_SWITCHABLE_FILTERS 2 - -#define MAX_SUBPEL_TAPS 12 -#define SWITCHABLE_FILTER_CONTEXTS ((SWITCHABLE_FILTERS + 1) * 4) -#define INTER_FILTER_COMP_OFFSET (SWITCHABLE_FILTERS + 1) -#define INTER_FILTER_DIR_OFFSET ((SWITCHABLE_FILTERS + 1) * 2) - -typedef struct InterpFilterParams { - const int16_t *filter_ptr; - uint16_t taps; - uint16_t subpel_shifts; - InterpFilter interp_filter; -} InterpFilterParams; - -DECLARE_ALIGNED(256, static const InterpKernel, - av1_bilinear_filters[SUBPEL_SHIFTS]) = { - { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 0, 120, 8, 0, 0, 0 }, - { 0, 0, 0, 112, 16, 0, 0, 0 }, { 0, 0, 0, 104, 24, 0, 0, 0 }, - { 0, 0, 0, 96, 32, 0, 0, 0 }, { 0, 0, 0, 88, 40, 0, 0, 0 }, - { 0, 0, 0, 80, 48, 0, 0, 0 }, { 0, 0, 0, 72, 56, 0, 0, 0 }, - { 0, 0, 0, 64, 64, 0, 0, 0 }, { 0, 0, 0, 56, 72, 0, 0, 0 }, - { 0, 0, 0, 48, 80, 0, 0, 0 }, { 0, 0, 0, 40, 88, 0, 0, 0 }, - { 0, 0, 0, 32, 96, 0, 0, 0 }, { 0, 0, 0, 24, 104, 0, 0, 0 }, - { 0, 0, 0, 16, 112, 0, 0, 0 }, { 0, 0, 0, 8, 120, 0, 0, 0 } -}; - -DECLARE_ALIGNED(256, static const InterpKernel, - av1_sub_pel_filters_8[SUBPEL_SHIFTS]) = { - { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, -6, 126, 8, -2, 0, 0 }, - { 0, 2, -10, 122, 18, -4, 0, 0 }, { 0, 2, -12, 116, 28, -8, 2, 0 }, - { 0, 2, -14, 110, 38, -10, 2, 0 }, { 0, 2, -14, 102, 48, -12, 2, 0 }, - { 0, 2, -16, 94, 58, -12, 2, 0 }, { 0, 2, -14, 84, 66, -12, 2, 0 }, - { 0, 2, -14, 76, 76, -14, 2, 0 }, { 0, 2, -12, 66, 84, -14, 2, 0 }, - { 0, 2, -12, 58, 94, -16, 2, 0 }, { 0, 2, -12, 48, 102, -14, 2, 0 }, - { 0, 2, -10, 38, 110, -14, 2, 0 }, { 0, 2, -8, 28, 116, -12, 2, 0 }, - { 0, 0, -4, 18, 122, -10, 2, 0 }, { 0, 0, -2, 8, 126, -6, 2, 0 } -}; - -DECLARE_ALIGNED(256, static const InterpKernel, - av1_sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = { - { 0, 0, 0, 128, 0, 0, 0, 0 }, { -2, 2, -6, 126, 8, -2, 2, 0 }, - { -2, 6, -12, 124, 16, -6, 4, -2 }, { -2, 8, -18, 120, 26, -10, 6, -2 }, - { -4, 10, -22, 116, 38, -14, 6, -2 }, { -4, 10, -22, 108, 48, -18, 8, -2 }, - { -4, 10, -24, 100, 60, -20, 8, -2 }, { -4, 10, -24, 90, 70, -22, 10, -2 }, - { -4, 12, -24, 80, 80, -24, 12, -4 }, { -2, 10, -22, 70, 90, -24, 10, -4 }, - { -2, 8, -20, 60, 100, -24, 10, -4 }, { -2, 8, -18, 48, 108, -22, 10, -4 }, - { -2, 6, -14, 38, 116, -22, 10, -4 }, { -2, 6, -10, 26, 120, -18, 8, -2 }, - { -2, 4, -6, 16, 124, -12, 6, -2 }, { 0, 2, -2, 8, 126, -6, 2, -2 } -}; - -DECLARE_ALIGNED(256, static const InterpKernel, - av1_sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = { - { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, 28, 62, 34, 2, 0, 0 }, - { 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 }, - { 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 }, - { 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, -2, 16, 54, 48, 12, 0, 0 }, - { 0, -2, 14, 52, 52, 14, -2, 0 }, { 0, 0, 12, 48, 54, 16, -2, 0 }, - { 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 }, - { 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 }, - { 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 28, 2, 0 } -}; - -static const InterpFilterParams - av1_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = { - { (const int16_t *)av1_sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS, - EIGHTTAP_REGULAR }, - { (const int16_t *)av1_sub_pel_filters_8smooth, SUBPEL_TAPS, - SUBPEL_SHIFTS, EIGHTTAP_SMOOTH }, - { (const int16_t *)av1_sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS, - MULTITAP_SHARP }, - { (const int16_t *)av1_bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS, - BILINEAR } - }; - -// A special 2-tap bilinear filter for IntraBC chroma. IntraBC uses full pixel -// MV for luma. If sub-sampling exists, chroma may possibly use half-pel MV. -DECLARE_ALIGNED(256, static const int16_t, av1_intrabc_bilinear_filter[2]) = { - 64, - 64, -}; - -static const InterpFilterParams av1_intrabc_filter_params = { - av1_intrabc_bilinear_filter, 2, 0, BILINEAR -}; - -DECLARE_ALIGNED(256, static const InterpKernel, - av1_sub_pel_filters_4[SUBPEL_SHIFTS]) = { - { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -4, 126, 8, -2, 0, 0 }, - { 0, 0, -8, 122, 18, -4, 0, 0 }, { 0, 0, -10, 116, 28, -6, 0, 0 }, - { 0, 0, -12, 110, 38, -8, 0, 0 }, { 0, 0, -12, 102, 48, -10, 0, 0 }, - { 0, 0, -14, 94, 58, -10, 0, 0 }, { 0, 0, -12, 84, 66, -10, 0, 0 }, - { 0, 0, -12, 76, 76, -12, 0, 0 }, { 0, 0, -10, 66, 84, -12, 0, 0 }, - { 0, 0, -10, 58, 94, -14, 0, 0 }, { 0, 0, -10, 48, 102, -12, 0, 0 }, - { 0, 0, -8, 38, 110, -12, 0, 0 }, { 0, 0, -6, 28, 116, -10, 0, 0 }, - { 0, 0, -4, 18, 122, -8, 0, 0 }, { 0, 0, -2, 8, 126, -4, 0, 0 } -}; -DECLARE_ALIGNED(256, static const InterpKernel, - av1_sub_pel_filters_4smooth[SUBPEL_SHIFTS]) = { - { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 30, 62, 34, 2, 0, 0 }, - { 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 }, - { 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 }, - { 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, 0, 14, 54, 48, 12, 0, 0 }, - { 0, 0, 12, 52, 52, 12, 0, 0 }, { 0, 0, 12, 48, 54, 14, 0, 0 }, - { 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 }, - { 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 }, - { 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 30, 0, 0 } -}; - -// For w<=4, MULTITAP_SHARP is the same as EIGHTTAP_REGULAR -static const InterpFilterParams av1_interp_4tap[SWITCHABLE_FILTERS + 1] = { - { (const int16_t *)av1_sub_pel_filters_4, SUBPEL_TAPS, SUBPEL_SHIFTS, - EIGHTTAP_REGULAR }, - { (const int16_t *)av1_sub_pel_filters_4smooth, SUBPEL_TAPS, SUBPEL_SHIFTS, - EIGHTTAP_SMOOTH }, - { (const int16_t *)av1_sub_pel_filters_4, SUBPEL_TAPS, SUBPEL_SHIFTS, - EIGHTTAP_REGULAR }, - { (const int16_t *)av1_bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS, - BILINEAR }, -}; - -static INLINE const InterpFilterParams * -av1_get_interp_filter_params_with_block_size(const InterpFilter interp_filter, - const int w) { - if (w <= 4) return &av1_interp_4tap[interp_filter]; - return &av1_interp_filter_params_list[interp_filter]; -} - -static INLINE const InterpFilterParams *av1_get_4tap_interp_filter_params( - const InterpFilter interp_filter) { - return &av1_interp_4tap[interp_filter]; -} - -static INLINE const int16_t *av1_get_interp_filter_kernel( - const InterpFilter interp_filter) { - return av1_interp_filter_params_list[interp_filter].filter_ptr; -} - -static INLINE const int16_t *av1_get_interp_filter_subpel_kernel( - const InterpFilterParams *const filter_params, const int subpel) { - return filter_params->filter_ptr + filter_params->taps * subpel; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_FILTER_H_ diff --git a/third_party/aom/av1/common/frame_buffers.c b/third_party/aom/av1/common/frame_buffers.c deleted file mode 100644 index fd6c4bc79..000000000 --- a/third_party/aom/av1/common/frame_buffers.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <assert.h> - -#include "av1/common/frame_buffers.h" -#include "aom_mem/aom_mem.h" - -int av1_alloc_internal_frame_buffers(InternalFrameBufferList *list) { - assert(list != NULL); - av1_free_internal_frame_buffers(list); - - list->num_internal_frame_buffers = - AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS; - list->int_fb = (InternalFrameBuffer *)aom_calloc( - list->num_internal_frame_buffers, sizeof(*list->int_fb)); - return (list->int_fb == NULL); -} - -void av1_free_internal_frame_buffers(InternalFrameBufferList *list) { - int i; - - assert(list != NULL); - - for (i = 0; i < list->num_internal_frame_buffers; ++i) { - aom_free(list->int_fb[i].data); - list->int_fb[i].data = NULL; - } - aom_free(list->int_fb); - list->int_fb = NULL; -} - -void av1_zero_unused_internal_frame_buffers(InternalFrameBufferList *list) { - int i; - - assert(list != NULL); - - for (i = 0; i < list->num_internal_frame_buffers; ++i) { - if (list->int_fb[i].data && !list->int_fb[i].in_use) - memset(list->int_fb[i].data, 0, list->int_fb[i].size); - } -} - -int av1_get_frame_buffer(void *cb_priv, size_t min_size, - aom_codec_frame_buffer_t *fb) { - int i; - InternalFrameBufferList *const int_fb_list = - (InternalFrameBufferList *)cb_priv; - if (int_fb_list == NULL) return -1; - - // Find a free frame buffer. - for (i = 0; i < int_fb_list->num_internal_frame_buffers; ++i) { - if (!int_fb_list->int_fb[i].in_use) break; - } - - if (i == int_fb_list->num_internal_frame_buffers) return -1; - - if (int_fb_list->int_fb[i].size < min_size) { - aom_free(int_fb_list->int_fb[i].data); - // The data must be zeroed to fix a valgrind error from the C loop filter - // due to access uninitialized memory in frame border. It could be - // skipped if border were totally removed. - int_fb_list->int_fb[i].data = (uint8_t *)aom_calloc(1, min_size); - if (!int_fb_list->int_fb[i].data) return -1; - int_fb_list->int_fb[i].size = min_size; - } - - fb->data = int_fb_list->int_fb[i].data; - fb->size = int_fb_list->int_fb[i].size; - int_fb_list->int_fb[i].in_use = 1; - - // Set the frame buffer's private data to point at the internal frame buffer. - fb->priv = &int_fb_list->int_fb[i]; - return 0; -} - -int av1_release_frame_buffer(void *cb_priv, aom_codec_frame_buffer_t *fb) { - InternalFrameBuffer *const int_fb = (InternalFrameBuffer *)fb->priv; - (void)cb_priv; - if (int_fb) int_fb->in_use = 0; - fb->priv = NULL; - return 0; -} diff --git a/third_party/aom/av1/common/frame_buffers.h b/third_party/aom/av1/common/frame_buffers.h deleted file mode 100644 index 16188e51c..000000000 --- a/third_party/aom/av1/common/frame_buffers.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_FRAME_BUFFERS_H_ -#define AOM_AV1_COMMON_FRAME_BUFFERS_H_ - -#include "aom/aom_frame_buffer.h" -#include "aom/aom_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct InternalFrameBuffer { - uint8_t *data; - size_t size; - int in_use; -} InternalFrameBuffer; - -typedef struct InternalFrameBufferList { - int num_internal_frame_buffers; - InternalFrameBuffer *int_fb; -} InternalFrameBufferList; - -// Initializes |list|. Returns 0 on success. -int av1_alloc_internal_frame_buffers(InternalFrameBufferList *list); - -// Free any data allocated to the frame buffers. -void av1_free_internal_frame_buffers(InternalFrameBufferList *list); - -// Zeros all unused internal frame buffers. In particular, this zeros the -// frame borders. Call this function after a sequence header change to -// re-initialize the frame borders for the different width, height, or bit -// depth. -void av1_zero_unused_internal_frame_buffers(InternalFrameBufferList *list); - -// Callback used by libaom to request an external frame buffer. |cb_priv| -// Callback private data, which points to an InternalFrameBufferList. -// |min_size| is the minimum size in bytes needed to decode the next frame. -// |fb| pointer to the frame buffer. -int av1_get_frame_buffer(void *cb_priv, size_t min_size, - aom_codec_frame_buffer_t *fb); - -// Callback used by libaom when there are no references to the frame buffer. -// |cb_priv| is not used. |fb| pointer to the frame buffer. -int av1_release_frame_buffer(void *cb_priv, aom_codec_frame_buffer_t *fb); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_FRAME_BUFFERS_H_ diff --git a/third_party/aom/av1/common/idct.c b/third_party/aom/av1/common/idct.c deleted file mode 100644 index 2c1cb9827..000000000 --- a/third_party/aom/av1/common/idct.c +++ /dev/null @@ -1,322 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <math.h> - -#include "config/aom_dsp_rtcd.h" -#include "config/av1_rtcd.h" - -#include "aom_ports/mem.h" -#include "av1/common/av1_inv_txfm1d_cfg.h" -#include "av1/common/av1_txfm.h" -#include "av1/common/blockd.h" -#include "av1/common/enums.h" -#include "av1/common/idct.h" - -int av1_get_tx_scale(const TX_SIZE tx_size) { - const int pels = tx_size_2d[tx_size]; - // Largest possible pels is 4096 (64x64). - return (pels > 256) + (pels > 1024); -} - -// NOTE: The implementation of all inverses need to be aware of the fact -// that input and output could be the same buffer. - -// idct -void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd) { - if (eob > 1) - av1_highbd_iwht4x4_16_add(input, dest, stride, bd); - else - av1_highbd_iwht4x4_1_add(input, dest, stride, bd); -} - -void av1_highbd_inv_txfm_add_4x4_c(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); - int eob = txfm_param->eob; - int bd = txfm_param->bd; - int lossless = txfm_param->lossless; - const int32_t *src = cast_to_int32(input); - const TX_TYPE tx_type = txfm_param->tx_type; - if (lossless) { - assert(tx_type == DCT_DCT); - av1_highbd_iwht4x4_add(input, dest, stride, eob, bd); - return; - } - - av1_inv_txfm2d_add_4x4_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, bd); -} - -void av1_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); - const int32_t *src = cast_to_int32(input); - av1_inv_txfm2d_add_4x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, - txfm_param->tx_type, txfm_param->bd); -} - -void av1_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); - const int32_t *src = cast_to_int32(input); - av1_inv_txfm2d_add_8x4_c(src, CONVERT_TO_SHORTPTR(dest), stride, - txfm_param->tx_type, txfm_param->bd); -} - -void av1_highbd_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - const int32_t *src = cast_to_int32(input); - av1_inv_txfm2d_add_16x32_c(src, CONVERT_TO_SHORTPTR(dest), stride, - txfm_param->tx_type, txfm_param->bd); -} - -void av1_highbd_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - const int32_t *src = cast_to_int32(input); - av1_inv_txfm2d_add_32x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, - txfm_param->tx_type, txfm_param->bd); -} - -void av1_highbd_inv_txfm_add_16x4(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - const int32_t *src = cast_to_int32(input); - av1_inv_txfm2d_add_16x4_c(src, CONVERT_TO_SHORTPTR(dest), stride, - txfm_param->tx_type, txfm_param->bd); -} - -void av1_highbd_inv_txfm_add_4x16(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - const int32_t *src = cast_to_int32(input); - av1_inv_txfm2d_add_4x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, - txfm_param->tx_type, txfm_param->bd); -} - -void av1_highbd_inv_txfm_add_32x8(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - const int32_t *src = cast_to_int32(input); - av1_inv_txfm2d_add_32x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, - txfm_param->tx_type, txfm_param->bd); -} - -void av1_highbd_inv_txfm_add_8x32(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - const int32_t *src = cast_to_int32(input); - av1_inv_txfm2d_add_8x32_c(src, CONVERT_TO_SHORTPTR(dest), stride, - txfm_param->tx_type, txfm_param->bd); -} - -void av1_highbd_inv_txfm_add_32x64(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - const int32_t *src = cast_to_int32(input); - av1_inv_txfm2d_add_32x64_c(src, CONVERT_TO_SHORTPTR(dest), stride, - txfm_param->tx_type, txfm_param->bd); -} - -void av1_highbd_inv_txfm_add_64x32(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - const int32_t *src = cast_to_int32(input); - av1_inv_txfm2d_add_64x32_c(src, CONVERT_TO_SHORTPTR(dest), stride, - txfm_param->tx_type, txfm_param->bd); -} - -void av1_highbd_inv_txfm_add_16x64(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - const int32_t *src = cast_to_int32(input); - av1_inv_txfm2d_add_16x64_c(src, CONVERT_TO_SHORTPTR(dest), stride, - txfm_param->tx_type, txfm_param->bd); -} - -void av1_highbd_inv_txfm_add_64x16(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - const int32_t *src = cast_to_int32(input); - av1_inv_txfm2d_add_64x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, - txfm_param->tx_type, txfm_param->bd); -} - -void av1_highbd_inv_txfm_add_8x8_c(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - int bd = txfm_param->bd; - const TX_TYPE tx_type = txfm_param->tx_type; - const int32_t *src = cast_to_int32(input); - - av1_inv_txfm2d_add_8x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, bd); -} - -void av1_highbd_inv_txfm_add_16x16_c(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - int bd = txfm_param->bd; - const TX_TYPE tx_type = txfm_param->tx_type; - const int32_t *src = cast_to_int32(input); - - av1_inv_txfm2d_add_16x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, - bd); -} - -void av1_highbd_inv_txfm_add_8x16_c(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - const int32_t *src = cast_to_int32(input); - av1_inv_txfm2d_add_8x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, - txfm_param->tx_type, txfm_param->bd); -} - -void av1_highbd_inv_txfm_add_16x8_c(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - const int32_t *src = cast_to_int32(input); - av1_inv_txfm2d_add_16x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, - txfm_param->tx_type, txfm_param->bd); -} - -void av1_highbd_inv_txfm_add_32x32_c(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - const int bd = txfm_param->bd; - const TX_TYPE tx_type = txfm_param->tx_type; - const int32_t *src = cast_to_int32(input); - - av1_inv_txfm2d_add_32x32_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, - bd); -} - -void av1_highbd_inv_txfm_add_64x64_c(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - const int bd = txfm_param->bd; - const TX_TYPE tx_type = txfm_param->tx_type; - const int32_t *src = cast_to_int32(input); - assert(tx_type == DCT_DCT); - av1_inv_txfm2d_add_64x64_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, - bd); -} - -static void init_txfm_param(const MACROBLOCKD *xd, int plane, TX_SIZE tx_size, - TX_TYPE tx_type, int eob, int reduced_tx_set, - TxfmParam *txfm_param) { - (void)plane; - txfm_param->tx_type = tx_type; - txfm_param->tx_size = tx_size; - txfm_param->eob = eob; - txfm_param->lossless = xd->lossless[xd->mi[0]->segment_id]; - txfm_param->bd = xd->bd; - txfm_param->is_hbd = get_bitdepth_data_path_index(xd); - txfm_param->tx_set_type = av1_get_ext_tx_set_type( - txfm_param->tx_size, is_inter_block(xd->mi[0]), reduced_tx_set); -} - -void av1_highbd_inv_txfm_add_c(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); - const TX_SIZE tx_size = txfm_param->tx_size; - switch (tx_size) { - case TX_32X32: - av1_highbd_inv_txfm_add_32x32_c(input, dest, stride, txfm_param); - break; - case TX_16X16: - av1_highbd_inv_txfm_add_16x16_c(input, dest, stride, txfm_param); - break; - case TX_8X8: - av1_highbd_inv_txfm_add_8x8_c(input, dest, stride, txfm_param); - break; - case TX_4X8: - av1_highbd_inv_txfm_add_4x8(input, dest, stride, txfm_param); - break; - case TX_8X4: - av1_highbd_inv_txfm_add_8x4(input, dest, stride, txfm_param); - break; - case TX_8X16: - av1_highbd_inv_txfm_add_8x16_c(input, dest, stride, txfm_param); - break; - case TX_16X8: - av1_highbd_inv_txfm_add_16x8_c(input, dest, stride, txfm_param); - break; - case TX_16X32: - av1_highbd_inv_txfm_add_16x32(input, dest, stride, txfm_param); - break; - case TX_32X16: - av1_highbd_inv_txfm_add_32x16(input, dest, stride, txfm_param); - break; - case TX_64X64: - av1_highbd_inv_txfm_add_64x64_c(input, dest, stride, txfm_param); - break; - case TX_32X64: - av1_highbd_inv_txfm_add_32x64(input, dest, stride, txfm_param); - break; - case TX_64X32: - av1_highbd_inv_txfm_add_64x32(input, dest, stride, txfm_param); - break; - case TX_16X64: - av1_highbd_inv_txfm_add_16x64(input, dest, stride, txfm_param); - break; - case TX_64X16: - av1_highbd_inv_txfm_add_64x16(input, dest, stride, txfm_param); - break; - case TX_4X4: - // this is like av1_short_idct4x4 but has a special case around eob<=1 - // which is significant (not just an optimization) for the lossless - // case. - av1_highbd_inv_txfm_add_4x4_c(input, dest, stride, txfm_param); - break; - case TX_16X4: - av1_highbd_inv_txfm_add_16x4(input, dest, stride, txfm_param); - break; - case TX_4X16: - av1_highbd_inv_txfm_add_4x16(input, dest, stride, txfm_param); - break; - case TX_8X32: - av1_highbd_inv_txfm_add_8x32(input, dest, stride, txfm_param); - break; - case TX_32X8: - av1_highbd_inv_txfm_add_32x8(input, dest, stride, txfm_param); - break; - default: assert(0 && "Invalid transform size"); break; - } -} - -void av1_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, - const TxfmParam *txfm_param) { - const TX_SIZE tx_size = txfm_param->tx_size; - DECLARE_ALIGNED(32, uint16_t, tmp[MAX_TX_SQUARE]); - int tmp_stride = MAX_TX_SIZE; - int w = tx_size_wide[tx_size]; - int h = tx_size_high[tx_size]; - for (int r = 0; r < h; ++r) { - for (int c = 0; c < w; ++c) { - tmp[r * tmp_stride + c] = dst[r * stride + c]; - } - } - - av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride, - txfm_param); - - for (int r = 0; r < h; ++r) { - for (int c = 0; c < w; ++c) { - dst[r * stride + c] = (uint8_t)tmp[r * tmp_stride + c]; - } - } -} - -void av1_inverse_transform_block(const MACROBLOCKD *xd, - const tran_low_t *dqcoeff, int plane, - TX_TYPE tx_type, TX_SIZE tx_size, uint8_t *dst, - int stride, int eob, int reduced_tx_set) { - if (!eob) return; - - assert(eob <= av1_get_max_eob(tx_size)); - - TxfmParam txfm_param; - init_txfm_param(xd, plane, tx_size, tx_type, eob, reduced_tx_set, - &txfm_param); - assert(av1_ext_tx_used[txfm_param.tx_set_type][txfm_param.tx_type]); - - if (txfm_param.is_hbd) { - av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param); - } else { - av1_inv_txfm_add(dqcoeff, dst, stride, &txfm_param); - } -} diff --git a/third_party/aom/av1/common/idct.h b/third_party/aom/av1/common/idct.h deleted file mode 100644 index d9454e73f..000000000 --- a/third_party/aom/av1/common/idct.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_IDCT_H_ -#define AOM_AV1_COMMON_IDCT_H_ - -#include "config/aom_config.h" - -#include "av1/common/blockd.h" -#include "av1/common/common.h" -#include "av1/common/enums.h" -#include "aom_dsp/txfm_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef void (*transform_1d)(const tran_low_t *, tran_low_t *); - -typedef struct { - transform_1d cols, rows; // vertical and horizontal -} transform_2d; - -#define MAX_TX_SCALE 1 -int av1_get_tx_scale(const TX_SIZE tx_size); - -void av1_inverse_transform_block(const MACROBLOCKD *xd, - const tran_low_t *dqcoeff, int plane, - TX_TYPE tx_type, TX_SIZE tx_size, uint8_t *dst, - int stride, int eob, int reduced_tx_set); -void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd); - -static INLINE const int32_t *cast_to_int32(const tran_low_t *input) { - assert(sizeof(int32_t) == sizeof(tran_low_t)); - return (const int32_t *)input; -} - -typedef void(highbd_inv_txfm_add)(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *param); - -highbd_inv_txfm_add av1_highbd_inv_txfm_add_4x8; -highbd_inv_txfm_add av1_highbd_inv_txfm_add_8x4; -highbd_inv_txfm_add av1_highbd_inv_txfm_add_16x32; -highbd_inv_txfm_add av1_highbd_inv_txfm_add_32x16; -highbd_inv_txfm_add av1_highbd_inv_txfm_add_32x64; -highbd_inv_txfm_add av1_highbd_inv_txfm_add_64x32; -highbd_inv_txfm_add av1_highbd_inv_txfm_add_16x64; -highbd_inv_txfm_add av1_highbd_inv_txfm_add_64x16; -highbd_inv_txfm_add av1_highbd_inv_txfm_add_16x4; -highbd_inv_txfm_add av1_highbd_inv_txfm_add_4x16; -highbd_inv_txfm_add av1_highbd_inv_txfm_add_8x32; -highbd_inv_txfm_add av1_highbd_inv_txfm_add_32x8; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_IDCT_H_ diff --git a/third_party/aom/av1/common/mv.h b/third_party/aom/av1/common/mv.h deleted file mode 100644 index 5b0225192..000000000 --- a/third_party/aom/av1/common/mv.h +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_MV_H_ -#define AOM_AV1_COMMON_MV_H_ - -#include "av1/common/common.h" -#include "av1/common/common_data.h" -#include "aom_dsp/aom_filter.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define INVALID_MV 0x80008000 - -typedef struct mv { - int16_t row; - int16_t col; -} MV; - -static const MV kZeroMv = { 0, 0 }; - -typedef union int_mv { - uint32_t as_int; - MV as_mv; -} int_mv; /* facilitates faster equality tests and copies */ - -typedef struct mv32 { - int32_t row; - int32_t col; -} MV32; - -// Bits of precision used for the model -#define WARPEDMODEL_PREC_BITS 16 -#define WARPEDMODEL_ROW3HOMO_PREC_BITS 16 - -#define WARPEDMODEL_TRANS_CLAMP (128 << WARPEDMODEL_PREC_BITS) -#define WARPEDMODEL_NONDIAGAFFINE_CLAMP (1 << (WARPEDMODEL_PREC_BITS - 3)) -#define WARPEDMODEL_ROW3HOMO_CLAMP (1 << (WARPEDMODEL_PREC_BITS - 2)) - -// Bits of subpel precision for warped interpolation -#define WARPEDPIXEL_PREC_BITS 6 -#define WARPEDPIXEL_PREC_SHIFTS (1 << WARPEDPIXEL_PREC_BITS) - -#define WARP_PARAM_REDUCE_BITS 6 - -#define WARPEDDIFF_PREC_BITS (WARPEDMODEL_PREC_BITS - WARPEDPIXEL_PREC_BITS) - -/* clang-format off */ -typedef enum ATTRIBUTE_PACKED { - IDENTITY = 0, // identity transformation, 0-parameter - TRANSLATION = 1, // translational motion 2-parameter - ROTZOOM = 2, // simplified affine with rotation + zoom only, 4-parameter - AFFINE = 3, // affine, 6-parameter - TRANS_TYPES, -} TransformationType; -/* clang-format on */ - -// Number of types used for global motion (must be >= 3 and <= TRANS_TYPES) -// The following can be useful: -// GLOBAL_TRANS_TYPES 3 - up to rotation-zoom -// GLOBAL_TRANS_TYPES 4 - up to affine -// GLOBAL_TRANS_TYPES 6 - up to hor/ver trapezoids -// GLOBAL_TRANS_TYPES 7 - up to full homography -#define GLOBAL_TRANS_TYPES 4 - -typedef struct { - int global_warp_allowed; - int local_warp_allowed; -} WarpTypesAllowed; - -// number of parameters used by each transformation in TransformationTypes -static const int trans_model_params[TRANS_TYPES] = { 0, 2, 4, 6 }; - -// The order of values in the wmmat matrix below is best described -// by the homography: -// [x' (m2 m3 m0 [x -// z . y' = m4 m5 m1 * y -// 1] m6 m7 1) 1] -typedef struct { - TransformationType wmtype; - int32_t wmmat[8]; - int16_t alpha, beta, gamma, delta; - int8_t invalid; -} WarpedMotionParams; - -/* clang-format off */ -static const WarpedMotionParams default_warp_params = { - IDENTITY, - { 0, 0, (1 << WARPEDMODEL_PREC_BITS), 0, 0, (1 << WARPEDMODEL_PREC_BITS), 0, - 0 }, - 0, 0, 0, 0, - 0, -}; -/* clang-format on */ - -// The following constants describe the various precisions -// of different parameters in the global motion experiment. -// -// Given the general homography: -// [x' (a b c [x -// z . y' = d e f * y -// 1] g h i) 1] -// -// Constants using the name ALPHA here are related to parameters -// a, b, d, e. Constants using the name TRANS are related -// to parameters c and f. -// -// Anything ending in PREC_BITS is the number of bits of precision -// to maintain when converting from double to integer. -// -// The ABS parameters are used to create an upper and lower bound -// for each parameter. In other words, after a parameter is integerized -// it is clamped between -(1 << ABS_XXX_BITS) and (1 << ABS_XXX_BITS). -// -// XXX_PREC_DIFF and XXX_DECODE_FACTOR -// are computed once here to prevent repetitive -// computation on the decoder side. These are -// to allow the global motion parameters to be encoded in a lower -// precision than the warped model precision. This means that they -// need to be changed to warped precision when they are decoded. -// -// XX_MIN, XX_MAX are also computed to avoid repeated computation - -#define SUBEXPFIN_K 3 -#define GM_TRANS_PREC_BITS 6 -#define GM_ABS_TRANS_BITS 12 -#define GM_ABS_TRANS_ONLY_BITS (GM_ABS_TRANS_BITS - GM_TRANS_PREC_BITS + 3) -#define GM_TRANS_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_TRANS_PREC_BITS) -#define GM_TRANS_ONLY_PREC_DIFF (WARPEDMODEL_PREC_BITS - 3) -#define GM_TRANS_DECODE_FACTOR (1 << GM_TRANS_PREC_DIFF) -#define GM_TRANS_ONLY_DECODE_FACTOR (1 << GM_TRANS_ONLY_PREC_DIFF) - -#define GM_ALPHA_PREC_BITS 15 -#define GM_ABS_ALPHA_BITS 12 -#define GM_ALPHA_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_ALPHA_PREC_BITS) -#define GM_ALPHA_DECODE_FACTOR (1 << GM_ALPHA_PREC_DIFF) - -#define GM_ROW3HOMO_PREC_BITS 16 -#define GM_ABS_ROW3HOMO_BITS 11 -#define GM_ROW3HOMO_PREC_DIFF \ - (WARPEDMODEL_ROW3HOMO_PREC_BITS - GM_ROW3HOMO_PREC_BITS) -#define GM_ROW3HOMO_DECODE_FACTOR (1 << GM_ROW3HOMO_PREC_DIFF) - -#define GM_TRANS_MAX (1 << GM_ABS_TRANS_BITS) -#define GM_ALPHA_MAX (1 << GM_ABS_ALPHA_BITS) -#define GM_ROW3HOMO_MAX (1 << GM_ABS_ROW3HOMO_BITS) - -#define GM_TRANS_MIN -GM_TRANS_MAX -#define GM_ALPHA_MIN -GM_ALPHA_MAX -#define GM_ROW3HOMO_MIN -GM_ROW3HOMO_MAX - -static INLINE int block_center_x(int mi_col, BLOCK_SIZE bs) { - const int bw = block_size_wide[bs]; - return mi_col * MI_SIZE + bw / 2 - 1; -} - -static INLINE int block_center_y(int mi_row, BLOCK_SIZE bs) { - const int bh = block_size_high[bs]; - return mi_row * MI_SIZE + bh / 2 - 1; -} - -static INLINE int convert_to_trans_prec(int allow_hp, int coor) { - if (allow_hp) - return ROUND_POWER_OF_TWO_SIGNED(coor, WARPEDMODEL_PREC_BITS - 3); - else - return ROUND_POWER_OF_TWO_SIGNED(coor, WARPEDMODEL_PREC_BITS - 2) * 2; -} -static INLINE void integer_mv_precision(MV *mv) { - int mod = (mv->row % 8); - if (mod != 0) { - mv->row -= mod; - if (abs(mod) > 4) { - if (mod > 0) { - mv->row += 8; - } else { - mv->row -= 8; - } - } - } - - mod = (mv->col % 8); - if (mod != 0) { - mv->col -= mod; - if (abs(mod) > 4) { - if (mod > 0) { - mv->col += 8; - } else { - mv->col -= 8; - } - } - } -} -// Convert a global motion vector into a motion vector at the centre of the -// given block. -// -// The resulting motion vector will have three fractional bits of precision. If -// allow_hp is zero, the bottom bit will always be zero. If CONFIG_AMVR and -// is_integer is true, the bottom three bits will be zero (so the motion vector -// represents an integer) -static INLINE int_mv gm_get_motion_vector(const WarpedMotionParams *gm, - int allow_hp, BLOCK_SIZE bsize, - int mi_col, int mi_row, - int is_integer) { - int_mv res; - - if (gm->wmtype == IDENTITY) { - res.as_int = 0; - return res; - } - - const int32_t *mat = gm->wmmat; - int x, y, tx, ty; - - if (gm->wmtype == TRANSLATION) { - // All global motion vectors are stored with WARPEDMODEL_PREC_BITS (16) - // bits of fractional precision. The offset for a translation is stored in - // entries 0 and 1. For translations, all but the top three (two if - // cm->allow_high_precision_mv is false) fractional bits are always zero. - // - // After the right shifts, there are 3 fractional bits of precision. If - // allow_hp is false, the bottom bit is always zero (so we don't need a - // call to convert_to_trans_prec here) - res.as_mv.row = gm->wmmat[0] >> GM_TRANS_ONLY_PREC_DIFF; - res.as_mv.col = gm->wmmat[1] >> GM_TRANS_ONLY_PREC_DIFF; - assert(IMPLIES(1 & (res.as_mv.row | res.as_mv.col), allow_hp)); - if (is_integer) { - integer_mv_precision(&res.as_mv); - } - return res; - } - - x = block_center_x(mi_col, bsize); - y = block_center_y(mi_row, bsize); - - if (gm->wmtype == ROTZOOM) { - assert(gm->wmmat[5] == gm->wmmat[2]); - assert(gm->wmmat[4] == -gm->wmmat[3]); - } - - const int xc = - (mat[2] - (1 << WARPEDMODEL_PREC_BITS)) * x + mat[3] * y + mat[0]; - const int yc = - mat[4] * x + (mat[5] - (1 << WARPEDMODEL_PREC_BITS)) * y + mat[1]; - tx = convert_to_trans_prec(allow_hp, xc); - ty = convert_to_trans_prec(allow_hp, yc); - - res.as_mv.row = ty; - res.as_mv.col = tx; - - if (is_integer) { - integer_mv_precision(&res.as_mv); - } - return res; -} - -static INLINE TransformationType get_gmtype(const WarpedMotionParams *gm) { - if (gm->wmmat[5] == (1 << WARPEDMODEL_PREC_BITS) && !gm->wmmat[4] && - gm->wmmat[2] == (1 << WARPEDMODEL_PREC_BITS) && !gm->wmmat[3]) { - return ((!gm->wmmat[1] && !gm->wmmat[0]) ? IDENTITY : TRANSLATION); - } - if (gm->wmmat[2] == gm->wmmat[5] && gm->wmmat[3] == -gm->wmmat[4]) - return ROTZOOM; - else - return AFFINE; -} - -typedef struct candidate_mv { - int_mv this_mv; - int_mv comp_mv; - int weight; -} CANDIDATE_MV; - -static INLINE int is_zero_mv(const MV *mv) { - return *((const uint32_t *)mv) == 0; -} - -static INLINE int is_equal_mv(const MV *a, const MV *b) { - return *((const uint32_t *)a) == *((const uint32_t *)b); -} - -static INLINE void clamp_mv(MV *mv, int min_col, int max_col, int min_row, - int max_row) { - mv->col = clamp(mv->col, min_col, max_col); - mv->row = clamp(mv->row, min_row, max_row); -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_MV_H_ diff --git a/third_party/aom/av1/common/mvref_common.c b/third_party/aom/av1/common/mvref_common.c deleted file mode 100644 index 7f24ab4e6..000000000 --- a/third_party/aom/av1/common/mvref_common.c +++ /dev/null @@ -1,1523 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <stdlib.h> - -#include "av1/common/mvref_common.h" -#include "av1/common/warped_motion.h" - -// Although we assign 32 bit integers, all the values are strictly under 14 -// bits. -static int div_mult[32] = { 0, 16384, 8192, 5461, 4096, 3276, 2730, 2340, - 2048, 1820, 1638, 1489, 1365, 1260, 1170, 1092, - 1024, 963, 910, 862, 819, 780, 744, 712, - 682, 655, 630, 606, 585, 564, 546, 528 }; - -// TODO(jingning): Consider the use of lookup table for (num / den) -// altogether. -static void get_mv_projection(MV *output, MV ref, int num, int den) { - den = AOMMIN(den, MAX_FRAME_DISTANCE); - num = num > 0 ? AOMMIN(num, MAX_FRAME_DISTANCE) - : AOMMAX(num, -MAX_FRAME_DISTANCE); - const int mv_row = - ROUND_POWER_OF_TWO_SIGNED(ref.row * num * div_mult[den], 14); - const int mv_col = - ROUND_POWER_OF_TWO_SIGNED(ref.col * num * div_mult[den], 14); - const int clamp_max = MV_UPP - 1; - const int clamp_min = MV_LOW + 1; - output->row = (int16_t)clamp(mv_row, clamp_min, clamp_max); - output->col = (int16_t)clamp(mv_col, clamp_min, clamp_max); -} - -void av1_copy_frame_mvs(const AV1_COMMON *const cm, - const MB_MODE_INFO *const mi, int mi_row, int mi_col, - int x_mis, int y_mis) { - const int frame_mvs_stride = ROUND_POWER_OF_TWO(cm->mi_cols, 1); - MV_REF *frame_mvs = - cm->cur_frame->mvs + (mi_row >> 1) * frame_mvs_stride + (mi_col >> 1); - x_mis = ROUND_POWER_OF_TWO(x_mis, 1); - y_mis = ROUND_POWER_OF_TWO(y_mis, 1); - int w, h; - - for (h = 0; h < y_mis; h++) { - MV_REF *mv = frame_mvs; - for (w = 0; w < x_mis; w++) { - mv->ref_frame = NONE_FRAME; - mv->mv.as_int = 0; - - for (int idx = 0; idx < 2; ++idx) { - MV_REFERENCE_FRAME ref_frame = mi->ref_frame[idx]; - if (ref_frame > INTRA_FRAME) { - int8_t ref_idx = cm->ref_frame_side[ref_frame]; - if (ref_idx) continue; - if ((abs(mi->mv[idx].as_mv.row) > REFMVS_LIMIT) || - (abs(mi->mv[idx].as_mv.col) > REFMVS_LIMIT)) - continue; - mv->ref_frame = ref_frame; - mv->mv.as_int = mi->mv[idx].as_int; - } - } - mv++; - } - frame_mvs += frame_mvs_stride; - } -} - -static void add_ref_mv_candidate( - const MB_MODE_INFO *const candidate, const MV_REFERENCE_FRAME rf[2], - uint8_t *refmv_count, uint8_t *ref_match_count, uint8_t *newmv_count, - CANDIDATE_MV *ref_mv_stack, int_mv *gm_mv_candidates, - const WarpedMotionParams *gm_params, int col, int weight) { - if (!is_inter_block(candidate)) return; // for intrabc - int index = 0, ref; - assert(weight % 2 == 0); - - if (rf[1] == NONE_FRAME) { - // single reference frame - for (ref = 0; ref < 2; ++ref) { - if (candidate->ref_frame[ref] == rf[0]) { - int_mv this_refmv; - if (is_global_mv_block(candidate, gm_params[rf[0]].wmtype)) - this_refmv = gm_mv_candidates[0]; - else - this_refmv = get_sub_block_mv(candidate, ref, col); - - for (index = 0; index < *refmv_count; ++index) - if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) break; - - if (index < *refmv_count) ref_mv_stack[index].weight += weight; - - // Add a new item to the list. - if (index == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) { - ref_mv_stack[index].this_mv = this_refmv; - ref_mv_stack[index].weight = weight; - ++(*refmv_count); - } - if (have_newmv_in_inter_mode(candidate->mode)) ++*newmv_count; - ++*ref_match_count; - } - } - } else { - // compound reference frame - if (candidate->ref_frame[0] == rf[0] && candidate->ref_frame[1] == rf[1]) { - int_mv this_refmv[2]; - - for (ref = 0; ref < 2; ++ref) { - if (is_global_mv_block(candidate, gm_params[rf[ref]].wmtype)) - this_refmv[ref] = gm_mv_candidates[ref]; - else - this_refmv[ref] = get_sub_block_mv(candidate, ref, col); - } - - for (index = 0; index < *refmv_count; ++index) - if ((ref_mv_stack[index].this_mv.as_int == this_refmv[0].as_int) && - (ref_mv_stack[index].comp_mv.as_int == this_refmv[1].as_int)) - break; - - if (index < *refmv_count) ref_mv_stack[index].weight += weight; - - // Add a new item to the list. - if (index == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) { - ref_mv_stack[index].this_mv = this_refmv[0]; - ref_mv_stack[index].comp_mv = this_refmv[1]; - ref_mv_stack[index].weight = weight; - ++(*refmv_count); - } - if (have_newmv_in_inter_mode(candidate->mode)) ++*newmv_count; - ++*ref_match_count; - } - } -} - -static void scan_row_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd, - int mi_row, int mi_col, - const MV_REFERENCE_FRAME rf[2], int row_offset, - CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count, - uint8_t *ref_match_count, uint8_t *newmv_count, - int_mv *gm_mv_candidates, int max_row_offset, - int *processed_rows) { - int end_mi = AOMMIN(xd->n4_w, cm->mi_cols - mi_col); - end_mi = AOMMIN(end_mi, mi_size_wide[BLOCK_64X64]); - const int n8_w_8 = mi_size_wide[BLOCK_8X8]; - const int n8_w_16 = mi_size_wide[BLOCK_16X16]; - int i; - int col_offset = 0; - // TODO(jingning): Revisit this part after cb4x4 is stable. - if (abs(row_offset) > 1) { - col_offset = 1; - if ((mi_col & 0x01) && xd->n4_w < n8_w_8) --col_offset; - } - const int use_step_16 = (xd->n4_w >= 16); - MB_MODE_INFO **const candidate_mi0 = xd->mi + row_offset * xd->mi_stride; - (void)mi_row; - - for (i = 0; i < end_mi;) { - const MB_MODE_INFO *const candidate = candidate_mi0[col_offset + i]; - const int candidate_bsize = candidate->sb_type; - const int n4_w = mi_size_wide[candidate_bsize]; - int len = AOMMIN(xd->n4_w, n4_w); - if (use_step_16) - len = AOMMAX(n8_w_16, len); - else if (abs(row_offset) > 1) - len = AOMMAX(len, n8_w_8); - - int weight = 2; - if (xd->n4_w >= n8_w_8 && xd->n4_w <= n4_w) { - int inc = AOMMIN(-max_row_offset + row_offset + 1, - mi_size_high[candidate_bsize]); - // Obtain range used in weight calculation. - weight = AOMMAX(weight, inc); - // Update processed rows. - *processed_rows = inc - row_offset - 1; - } - - add_ref_mv_candidate(candidate, rf, refmv_count, ref_match_count, - newmv_count, ref_mv_stack, gm_mv_candidates, - cm->global_motion, col_offset + i, len * weight); - - i += len; - } -} - -static void scan_col_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd, - int mi_row, int mi_col, - const MV_REFERENCE_FRAME rf[2], int col_offset, - CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count, - uint8_t *ref_match_count, uint8_t *newmv_count, - int_mv *gm_mv_candidates, int max_col_offset, - int *processed_cols) { - int end_mi = AOMMIN(xd->n4_h, cm->mi_rows - mi_row); - end_mi = AOMMIN(end_mi, mi_size_high[BLOCK_64X64]); - const int n8_h_8 = mi_size_high[BLOCK_8X8]; - const int n8_h_16 = mi_size_high[BLOCK_16X16]; - int i; - int row_offset = 0; - if (abs(col_offset) > 1) { - row_offset = 1; - if ((mi_row & 0x01) && xd->n4_h < n8_h_8) --row_offset; - } - const int use_step_16 = (xd->n4_h >= 16); - (void)mi_col; - - for (i = 0; i < end_mi;) { - const MB_MODE_INFO *const candidate = - xd->mi[(row_offset + i) * xd->mi_stride + col_offset]; - const int candidate_bsize = candidate->sb_type; - const int n4_h = mi_size_high[candidate_bsize]; - int len = AOMMIN(xd->n4_h, n4_h); - if (use_step_16) - len = AOMMAX(n8_h_16, len); - else if (abs(col_offset) > 1) - len = AOMMAX(len, n8_h_8); - - int weight = 2; - if (xd->n4_h >= n8_h_8 && xd->n4_h <= n4_h) { - int inc = AOMMIN(-max_col_offset + col_offset + 1, - mi_size_wide[candidate_bsize]); - // Obtain range used in weight calculation. - weight = AOMMAX(weight, inc); - // Update processed cols. - *processed_cols = inc - col_offset - 1; - } - - add_ref_mv_candidate(candidate, rf, refmv_count, ref_match_count, - newmv_count, ref_mv_stack, gm_mv_candidates, - cm->global_motion, col_offset, len * weight); - - i += len; - } -} - -static void scan_blk_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd, - const int mi_row, const int mi_col, - const MV_REFERENCE_FRAME rf[2], int row_offset, - int col_offset, CANDIDATE_MV *ref_mv_stack, - uint8_t *ref_match_count, uint8_t *newmv_count, - int_mv *gm_mv_candidates, - uint8_t refmv_count[MODE_CTX_REF_FRAMES]) { - const TileInfo *const tile = &xd->tile; - POSITION mi_pos; - - mi_pos.row = row_offset; - mi_pos.col = col_offset; - - if (is_inside(tile, mi_col, mi_row, &mi_pos)) { - const MB_MODE_INFO *const candidate = - xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col]; - const int len = mi_size_wide[BLOCK_8X8]; - - add_ref_mv_candidate(candidate, rf, refmv_count, ref_match_count, - newmv_count, ref_mv_stack, gm_mv_candidates, - cm->global_motion, mi_pos.col, 2 * len); - } // Analyze a single 8x8 block motion information. -} - -static int has_top_right(const AV1_COMMON *cm, const MACROBLOCKD *xd, - int mi_row, int mi_col, int bs) { - const int sb_mi_size = mi_size_wide[cm->seq_params.sb_size]; - const int mask_row = mi_row & (sb_mi_size - 1); - const int mask_col = mi_col & (sb_mi_size - 1); - - if (bs > mi_size_wide[BLOCK_64X64]) return 0; - - // In a split partition all apart from the bottom right has a top right - int has_tr = !((mask_row & bs) && (mask_col & bs)); - - // bs > 0 and bs is a power of 2 - assert(bs > 0 && !(bs & (bs - 1))); - - // For each 4x4 group of blocks, when the bottom right is decoded the blocks - // to the right have not been decoded therefore the bottom right does - // not have a top right - while (bs < sb_mi_size) { - if (mask_col & bs) { - if ((mask_col & (2 * bs)) && (mask_row & (2 * bs))) { - has_tr = 0; - break; - } - } else { - break; - } - bs <<= 1; - } - - // The left hand of two vertical rectangles always has a top right (as the - // block above will have been decoded) - if (xd->n4_w < xd->n4_h) - if (!xd->is_sec_rect) has_tr = 1; - - // The bottom of two horizontal rectangles never has a top right (as the block - // to the right won't have been decoded) - if (xd->n4_w > xd->n4_h) - if (xd->is_sec_rect) has_tr = 0; - - // The bottom left square of a Vertical A (in the old format) does - // not have a top right as it is decoded before the right hand - // rectangle of the partition - if (xd->mi[0]->partition == PARTITION_VERT_A) { - if (xd->n4_w == xd->n4_h) - if (mask_row & bs) has_tr = 0; - } - - return has_tr; -} - -static int check_sb_border(const int mi_row, const int mi_col, - const int row_offset, const int col_offset) { - const int sb_mi_size = mi_size_wide[BLOCK_64X64]; - const int row = mi_row & (sb_mi_size - 1); - const int col = mi_col & (sb_mi_size - 1); - - if (row + row_offset < 0 || row + row_offset >= sb_mi_size || - col + col_offset < 0 || col + col_offset >= sb_mi_size) - return 0; - - return 1; -} - -static int add_tpl_ref_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd, - int mi_row, int mi_col, MV_REFERENCE_FRAME ref_frame, - int blk_row, int blk_col, int_mv *gm_mv_candidates, - uint8_t refmv_count[MODE_CTX_REF_FRAMES], - CANDIDATE_MV ref_mv_stacks[][MAX_REF_MV_STACK_SIZE], - int16_t *mode_context) { - POSITION mi_pos; - int idx; - const int weight_unit = 1; // mi_size_wide[BLOCK_8X8]; - - mi_pos.row = (mi_row & 0x01) ? blk_row : blk_row + 1; - mi_pos.col = (mi_col & 0x01) ? blk_col : blk_col + 1; - - if (!is_inside(&xd->tile, mi_col, mi_row, &mi_pos)) return 0; - - const TPL_MV_REF *prev_frame_mvs = - cm->tpl_mvs + ((mi_row + mi_pos.row) >> 1) * (cm->mi_stride >> 1) + - ((mi_col + mi_pos.col) >> 1); - - MV_REFERENCE_FRAME rf[2]; - av1_set_ref_frame(rf, ref_frame); - - if (rf[1] == NONE_FRAME) { - int cur_frame_index = cm->cur_frame->cur_frame_offset; - int buf_idx_0 = cm->frame_refs[FWD_RF_OFFSET(rf[0])].idx; - int frame0_index = cm->buffer_pool->frame_bufs[buf_idx_0].cur_frame_offset; - int cur_offset_0 = get_relative_dist(cm, cur_frame_index, frame0_index); - CANDIDATE_MV *ref_mv_stack = ref_mv_stacks[rf[0]]; - - if (prev_frame_mvs->mfmv0.as_int != INVALID_MV) { - int_mv this_refmv; - - get_mv_projection(&this_refmv.as_mv, prev_frame_mvs->mfmv0.as_mv, - cur_offset_0, prev_frame_mvs->ref_frame_offset); - lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv, - cm->cur_frame_force_integer_mv); - - if (blk_row == 0 && blk_col == 0) - if (abs(this_refmv.as_mv.row - gm_mv_candidates[0].as_mv.row) >= 16 || - abs(this_refmv.as_mv.col - gm_mv_candidates[0].as_mv.col) >= 16) - mode_context[ref_frame] |= (1 << GLOBALMV_OFFSET); - - for (idx = 0; idx < refmv_count[rf[0]]; ++idx) - if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int) break; - - if (idx < refmv_count[rf[0]]) ref_mv_stack[idx].weight += 2 * weight_unit; - - if (idx == refmv_count[rf[0]] && - refmv_count[rf[0]] < MAX_REF_MV_STACK_SIZE) { - ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int; - ref_mv_stack[idx].weight = 2 * weight_unit; - ++(refmv_count[rf[0]]); - } - return 1; - } - } else { - // Process compound inter mode - int cur_frame_index = cm->cur_frame->cur_frame_offset; - int buf_idx_0 = cm->frame_refs[FWD_RF_OFFSET(rf[0])].idx; - int frame0_index = cm->buffer_pool->frame_bufs[buf_idx_0].cur_frame_offset; - - int cur_offset_0 = get_relative_dist(cm, cur_frame_index, frame0_index); - int buf_idx_1 = cm->frame_refs[FWD_RF_OFFSET(rf[1])].idx; - int frame1_index = cm->buffer_pool->frame_bufs[buf_idx_1].cur_frame_offset; - int cur_offset_1 = get_relative_dist(cm, cur_frame_index, frame1_index); - CANDIDATE_MV *ref_mv_stack = ref_mv_stacks[ref_frame]; - - if (prev_frame_mvs->mfmv0.as_int != INVALID_MV) { - int_mv this_refmv; - int_mv comp_refmv; - get_mv_projection(&this_refmv.as_mv, prev_frame_mvs->mfmv0.as_mv, - cur_offset_0, prev_frame_mvs->ref_frame_offset); - get_mv_projection(&comp_refmv.as_mv, prev_frame_mvs->mfmv0.as_mv, - cur_offset_1, prev_frame_mvs->ref_frame_offset); - - lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv, - cm->cur_frame_force_integer_mv); - lower_mv_precision(&comp_refmv.as_mv, cm->allow_high_precision_mv, - cm->cur_frame_force_integer_mv); - - if (blk_row == 0 && blk_col == 0) - if (abs(this_refmv.as_mv.row - gm_mv_candidates[0].as_mv.row) >= 16 || - abs(this_refmv.as_mv.col - gm_mv_candidates[0].as_mv.col) >= 16 || - abs(comp_refmv.as_mv.row - gm_mv_candidates[1].as_mv.row) >= 16 || - abs(comp_refmv.as_mv.col - gm_mv_candidates[1].as_mv.col) >= 16) - mode_context[ref_frame] |= (1 << GLOBALMV_OFFSET); - - for (idx = 0; idx < refmv_count[ref_frame]; ++idx) - if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int && - comp_refmv.as_int == ref_mv_stack[idx].comp_mv.as_int) - break; - - if (idx < refmv_count[ref_frame]) - ref_mv_stack[idx].weight += 2 * weight_unit; - - if (idx == refmv_count[ref_frame] && - refmv_count[ref_frame] < MAX_REF_MV_STACK_SIZE) { - ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int; - ref_mv_stack[idx].comp_mv.as_int = comp_refmv.as_int; - ref_mv_stack[idx].weight = 2 * weight_unit; - ++(refmv_count[ref_frame]); - } - return 1; - } - } - return 0; -} - -static void process_compound_ref_mv_candidate( - const MB_MODE_INFO *const candidate, const AV1_COMMON *const cm, - const MV_REFERENCE_FRAME *const rf, int_mv ref_id[2][2], - int ref_id_count[2], int_mv ref_diff[2][2], int ref_diff_count[2]) { - for (int rf_idx = 0; rf_idx < 2; ++rf_idx) { - MV_REFERENCE_FRAME can_rf = candidate->ref_frame[rf_idx]; - - for (int cmp_idx = 0; cmp_idx < 2; ++cmp_idx) { - if (can_rf == rf[cmp_idx] && ref_id_count[cmp_idx] < 2) { - ref_id[cmp_idx][ref_id_count[cmp_idx]] = candidate->mv[rf_idx]; - ++ref_id_count[cmp_idx]; - } else if (can_rf > INTRA_FRAME && ref_diff_count[cmp_idx] < 2) { - int_mv this_mv = candidate->mv[rf_idx]; - if (cm->ref_frame_sign_bias[can_rf] != - cm->ref_frame_sign_bias[rf[cmp_idx]]) { - this_mv.as_mv.row = -this_mv.as_mv.row; - this_mv.as_mv.col = -this_mv.as_mv.col; - } - ref_diff[cmp_idx][ref_diff_count[cmp_idx]] = this_mv; - ++ref_diff_count[cmp_idx]; - } - } - } -} - -static void process_single_ref_mv_candidate( - const MB_MODE_INFO *const candidate, const AV1_COMMON *const cm, - MV_REFERENCE_FRAME ref_frame, uint8_t refmv_count[MODE_CTX_REF_FRAMES], - CANDIDATE_MV ref_mv_stack[][MAX_REF_MV_STACK_SIZE]) { - for (int rf_idx = 0; rf_idx < 2; ++rf_idx) { - if (candidate->ref_frame[rf_idx] > INTRA_FRAME) { - int_mv this_mv = candidate->mv[rf_idx]; - if (cm->ref_frame_sign_bias[candidate->ref_frame[rf_idx]] != - cm->ref_frame_sign_bias[ref_frame]) { - this_mv.as_mv.row = -this_mv.as_mv.row; - this_mv.as_mv.col = -this_mv.as_mv.col; - } - int stack_idx; - for (stack_idx = 0; stack_idx < refmv_count[ref_frame]; ++stack_idx) { - const int_mv stack_mv = ref_mv_stack[ref_frame][stack_idx].this_mv; - if (this_mv.as_int == stack_mv.as_int) break; - } - - if (stack_idx == refmv_count[ref_frame]) { - ref_mv_stack[ref_frame][stack_idx].this_mv = this_mv; - - // TODO(jingning): Set an arbitrary small number here. The weight - // doesn't matter as long as it is properly initialized. - ref_mv_stack[ref_frame][stack_idx].weight = 2; - ++refmv_count[ref_frame]; - } - } - } -} - -static void setup_ref_mv_list( - const AV1_COMMON *cm, const MACROBLOCKD *xd, MV_REFERENCE_FRAME ref_frame, - uint8_t refmv_count[MODE_CTX_REF_FRAMES], - CANDIDATE_MV ref_mv_stack[][MAX_REF_MV_STACK_SIZE], - int_mv mv_ref_list[][MAX_MV_REF_CANDIDATES], int_mv *gm_mv_candidates, - int mi_row, int mi_col, int16_t *mode_context) { - const int bs = AOMMAX(xd->n4_w, xd->n4_h); - const int has_tr = has_top_right(cm, xd, mi_row, mi_col, bs); - MV_REFERENCE_FRAME rf[2]; - - const TileInfo *const tile = &xd->tile; - int max_row_offset = 0, max_col_offset = 0; - const int row_adj = (xd->n4_h < mi_size_high[BLOCK_8X8]) && (mi_row & 0x01); - const int col_adj = (xd->n4_w < mi_size_wide[BLOCK_8X8]) && (mi_col & 0x01); - int processed_rows = 0; - int processed_cols = 0; - - av1_set_ref_frame(rf, ref_frame); - mode_context[ref_frame] = 0; - refmv_count[ref_frame] = 0; - - // Find valid maximum row/col offset. - if (xd->up_available) { - max_row_offset = -(MVREF_ROW_COLS << 1) + row_adj; - - if (xd->n4_h < mi_size_high[BLOCK_8X8]) - max_row_offset = -(2 << 1) + row_adj; - - max_row_offset = find_valid_row_offset(tile, mi_row, max_row_offset); - } - - if (xd->left_available) { - max_col_offset = -(MVREF_ROW_COLS << 1) + col_adj; - - if (xd->n4_w < mi_size_wide[BLOCK_8X8]) - max_col_offset = -(2 << 1) + col_adj; - - max_col_offset = find_valid_col_offset(tile, mi_col, max_col_offset); - } - - uint8_t col_match_count = 0; - uint8_t row_match_count = 0; - uint8_t newmv_count = 0; - - // Scan the first above row mode info. row_offset = -1; - if (abs(max_row_offset) >= 1) - scan_row_mbmi(cm, xd, mi_row, mi_col, rf, -1, ref_mv_stack[ref_frame], - &refmv_count[ref_frame], &row_match_count, &newmv_count, - gm_mv_candidates, max_row_offset, &processed_rows); - // Scan the first left column mode info. col_offset = -1; - if (abs(max_col_offset) >= 1) - scan_col_mbmi(cm, xd, mi_row, mi_col, rf, -1, ref_mv_stack[ref_frame], - &refmv_count[ref_frame], &col_match_count, &newmv_count, - gm_mv_candidates, max_col_offset, &processed_cols); - // Check top-right boundary - if (has_tr) - scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, xd->n4_w, - ref_mv_stack[ref_frame], &row_match_count, &newmv_count, - gm_mv_candidates, &refmv_count[ref_frame]); - - const uint8_t nearest_match = (row_match_count > 0) + (col_match_count > 0); - const uint8_t nearest_refmv_count = refmv_count[ref_frame]; - - // TODO(yunqing): for comp_search, do it for all 3 cases. - for (int idx = 0; idx < nearest_refmv_count; ++idx) - ref_mv_stack[ref_frame][idx].weight += REF_CAT_LEVEL; - - if (cm->allow_ref_frame_mvs) { - int is_available = 0; - const int voffset = AOMMAX(mi_size_high[BLOCK_8X8], xd->n4_h); - const int hoffset = AOMMAX(mi_size_wide[BLOCK_8X8], xd->n4_w); - const int blk_row_end = AOMMIN(xd->n4_h, mi_size_high[BLOCK_64X64]); - const int blk_col_end = AOMMIN(xd->n4_w, mi_size_wide[BLOCK_64X64]); - - const int tpl_sample_pos[3][2] = { - { voffset, -2 }, - { voffset, hoffset }, - { voffset - 2, hoffset }, - }; - const int allow_extension = (xd->n4_h >= mi_size_high[BLOCK_8X8]) && - (xd->n4_h < mi_size_high[BLOCK_64X64]) && - (xd->n4_w >= mi_size_wide[BLOCK_8X8]) && - (xd->n4_w < mi_size_wide[BLOCK_64X64]); - - const int step_h = (xd->n4_h >= mi_size_high[BLOCK_64X64]) - ? mi_size_high[BLOCK_16X16] - : mi_size_high[BLOCK_8X8]; - const int step_w = (xd->n4_w >= mi_size_wide[BLOCK_64X64]) - ? mi_size_wide[BLOCK_16X16] - : mi_size_wide[BLOCK_8X8]; - - for (int blk_row = 0; blk_row < blk_row_end; blk_row += step_h) { - for (int blk_col = 0; blk_col < blk_col_end; blk_col += step_w) { - int ret = add_tpl_ref_mv(cm, xd, mi_row, mi_col, ref_frame, blk_row, - blk_col, gm_mv_candidates, refmv_count, - ref_mv_stack, mode_context); - if (blk_row == 0 && blk_col == 0) is_available = ret; - } - } - - if (is_available == 0) mode_context[ref_frame] |= (1 << GLOBALMV_OFFSET); - - for (int i = 0; i < 3 && allow_extension; ++i) { - const int blk_row = tpl_sample_pos[i][0]; - const int blk_col = tpl_sample_pos[i][1]; - - if (!check_sb_border(mi_row, mi_col, blk_row, blk_col)) continue; - add_tpl_ref_mv(cm, xd, mi_row, mi_col, ref_frame, blk_row, blk_col, - gm_mv_candidates, refmv_count, ref_mv_stack, mode_context); - } - } - - uint8_t dummy_newmv_count = 0; - - // Scan the second outer area. - scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, -1, ref_mv_stack[ref_frame], - &row_match_count, &dummy_newmv_count, gm_mv_candidates, - &refmv_count[ref_frame]); - - for (int idx = 2; idx <= MVREF_ROW_COLS; ++idx) { - const int row_offset = -(idx << 1) + 1 + row_adj; - const int col_offset = -(idx << 1) + 1 + col_adj; - - if (abs(row_offset) <= abs(max_row_offset) && - abs(row_offset) > processed_rows) - scan_row_mbmi(cm, xd, mi_row, mi_col, rf, row_offset, - ref_mv_stack[ref_frame], &refmv_count[ref_frame], - &row_match_count, &dummy_newmv_count, gm_mv_candidates, - max_row_offset, &processed_rows); - - if (abs(col_offset) <= abs(max_col_offset) && - abs(col_offset) > processed_cols) - scan_col_mbmi(cm, xd, mi_row, mi_col, rf, col_offset, - ref_mv_stack[ref_frame], &refmv_count[ref_frame], - &col_match_count, &dummy_newmv_count, gm_mv_candidates, - max_col_offset, &processed_cols); - } - - const uint8_t ref_match_count = (row_match_count > 0) + (col_match_count > 0); - - switch (nearest_match) { - case 0: - mode_context[ref_frame] |= 0; - if (ref_match_count >= 1) mode_context[ref_frame] |= 1; - if (ref_match_count == 1) - mode_context[ref_frame] |= (1 << REFMV_OFFSET); - else if (ref_match_count >= 2) - mode_context[ref_frame] |= (2 << REFMV_OFFSET); - break; - case 1: - mode_context[ref_frame] |= (newmv_count > 0) ? 2 : 3; - if (ref_match_count == 1) - mode_context[ref_frame] |= (3 << REFMV_OFFSET); - else if (ref_match_count >= 2) - mode_context[ref_frame] |= (4 << REFMV_OFFSET); - break; - case 2: - default: - if (newmv_count >= 1) - mode_context[ref_frame] |= 4; - else - mode_context[ref_frame] |= 5; - - mode_context[ref_frame] |= (5 << REFMV_OFFSET); - break; - } - - // Rank the likelihood and assign nearest and near mvs. - int len = nearest_refmv_count; - while (len > 0) { - int nr_len = 0; - for (int idx = 1; idx < len; ++idx) { - if (ref_mv_stack[ref_frame][idx - 1].weight < - ref_mv_stack[ref_frame][idx].weight) { - CANDIDATE_MV tmp_mv = ref_mv_stack[ref_frame][idx - 1]; - ref_mv_stack[ref_frame][idx - 1] = ref_mv_stack[ref_frame][idx]; - ref_mv_stack[ref_frame][idx] = tmp_mv; - nr_len = idx; - } - } - len = nr_len; - } - - len = refmv_count[ref_frame]; - while (len > nearest_refmv_count) { - int nr_len = nearest_refmv_count; - for (int idx = nearest_refmv_count + 1; idx < len; ++idx) { - if (ref_mv_stack[ref_frame][idx - 1].weight < - ref_mv_stack[ref_frame][idx].weight) { - CANDIDATE_MV tmp_mv = ref_mv_stack[ref_frame][idx - 1]; - ref_mv_stack[ref_frame][idx - 1] = ref_mv_stack[ref_frame][idx]; - ref_mv_stack[ref_frame][idx] = tmp_mv; - nr_len = idx; - } - } - len = nr_len; - } - - if (rf[1] > NONE_FRAME) { - // TODO(jingning, yunqing): Refactor and consolidate the compound and - // single reference frame modes. Reduce unnecessary redundancy. - if (refmv_count[ref_frame] < MAX_MV_REF_CANDIDATES) { - int_mv ref_id[2][2], ref_diff[2][2]; - int ref_id_count[2] = { 0 }, ref_diff_count[2] = { 0 }; - - int mi_width = AOMMIN(mi_size_wide[BLOCK_64X64], xd->n4_w); - mi_width = AOMMIN(mi_width, cm->mi_cols - mi_col); - int mi_height = AOMMIN(mi_size_high[BLOCK_64X64], xd->n4_h); - mi_height = AOMMIN(mi_height, cm->mi_rows - mi_row); - int mi_size = AOMMIN(mi_width, mi_height); - - for (int idx = 0; abs(max_row_offset) >= 1 && idx < mi_size;) { - const MB_MODE_INFO *const candidate = xd->mi[-xd->mi_stride + idx]; - process_compound_ref_mv_candidate( - candidate, cm, rf, ref_id, ref_id_count, ref_diff, ref_diff_count); - idx += mi_size_wide[candidate->sb_type]; - } - - for (int idx = 0; abs(max_col_offset) >= 1 && idx < mi_size;) { - const MB_MODE_INFO *const candidate = xd->mi[idx * xd->mi_stride - 1]; - process_compound_ref_mv_candidate( - candidate, cm, rf, ref_id, ref_id_count, ref_diff, ref_diff_count); - idx += mi_size_high[candidate->sb_type]; - } - - // Build up the compound mv predictor - int_mv comp_list[3][2]; - - for (int idx = 0; idx < 2; ++idx) { - int comp_idx = 0; - for (int list_idx = 0; list_idx < ref_id_count[idx] && comp_idx < 2; - ++list_idx, ++comp_idx) - comp_list[comp_idx][idx] = ref_id[idx][list_idx]; - for (int list_idx = 0; list_idx < ref_diff_count[idx] && comp_idx < 2; - ++list_idx, ++comp_idx) - comp_list[comp_idx][idx] = ref_diff[idx][list_idx]; - for (; comp_idx < 3; ++comp_idx) - comp_list[comp_idx][idx] = gm_mv_candidates[idx]; - } - - if (refmv_count[ref_frame]) { - assert(refmv_count[ref_frame] == 1); - if (comp_list[0][0].as_int == - ref_mv_stack[ref_frame][0].this_mv.as_int && - comp_list[0][1].as_int == - ref_mv_stack[ref_frame][0].comp_mv.as_int) { - ref_mv_stack[ref_frame][refmv_count[ref_frame]].this_mv = - comp_list[1][0]; - ref_mv_stack[ref_frame][refmv_count[ref_frame]].comp_mv = - comp_list[1][1]; - } else { - ref_mv_stack[ref_frame][refmv_count[ref_frame]].this_mv = - comp_list[0][0]; - ref_mv_stack[ref_frame][refmv_count[ref_frame]].comp_mv = - comp_list[0][1]; - } - ref_mv_stack[ref_frame][refmv_count[ref_frame]].weight = 2; - ++refmv_count[ref_frame]; - } else { - for (int idx = 0; idx < MAX_MV_REF_CANDIDATES; ++idx) { - ref_mv_stack[ref_frame][refmv_count[ref_frame]].this_mv = - comp_list[idx][0]; - ref_mv_stack[ref_frame][refmv_count[ref_frame]].comp_mv = - comp_list[idx][1]; - ref_mv_stack[ref_frame][refmv_count[ref_frame]].weight = 2; - ++refmv_count[ref_frame]; - } - } - } - - assert(refmv_count[ref_frame] >= 2); - - for (int idx = 0; idx < refmv_count[ref_frame]; ++idx) { - clamp_mv_ref(&ref_mv_stack[ref_frame][idx].this_mv.as_mv, - xd->n4_w << MI_SIZE_LOG2, xd->n4_h << MI_SIZE_LOG2, xd); - clamp_mv_ref(&ref_mv_stack[ref_frame][idx].comp_mv.as_mv, - xd->n4_w << MI_SIZE_LOG2, xd->n4_h << MI_SIZE_LOG2, xd); - } - } else { - // Handle single reference frame extension - int mi_width = AOMMIN(mi_size_wide[BLOCK_64X64], xd->n4_w); - mi_width = AOMMIN(mi_width, cm->mi_cols - mi_col); - int mi_height = AOMMIN(mi_size_high[BLOCK_64X64], xd->n4_h); - mi_height = AOMMIN(mi_height, cm->mi_rows - mi_row); - int mi_size = AOMMIN(mi_width, mi_height); - - for (int idx = 0; abs(max_row_offset) >= 1 && idx < mi_size && - refmv_count[ref_frame] < MAX_MV_REF_CANDIDATES;) { - const MB_MODE_INFO *const candidate = xd->mi[-xd->mi_stride + idx]; - process_single_ref_mv_candidate(candidate, cm, ref_frame, refmv_count, - ref_mv_stack); - idx += mi_size_wide[candidate->sb_type]; - } - - for (int idx = 0; abs(max_col_offset) >= 1 && idx < mi_size && - refmv_count[ref_frame] < MAX_MV_REF_CANDIDATES;) { - const MB_MODE_INFO *const candidate = xd->mi[idx * xd->mi_stride - 1]; - process_single_ref_mv_candidate(candidate, cm, ref_frame, refmv_count, - ref_mv_stack); - idx += mi_size_high[candidate->sb_type]; - } - - for (int idx = 0; idx < refmv_count[ref_frame]; ++idx) { - clamp_mv_ref(&ref_mv_stack[ref_frame][idx].this_mv.as_mv, - xd->n4_w << MI_SIZE_LOG2, xd->n4_h << MI_SIZE_LOG2, xd); - } - - if (mv_ref_list != NULL) { - for (int idx = refmv_count[ref_frame]; idx < MAX_MV_REF_CANDIDATES; ++idx) - mv_ref_list[rf[0]][idx].as_int = gm_mv_candidates[0].as_int; - - for (int idx = 0; - idx < AOMMIN(MAX_MV_REF_CANDIDATES, refmv_count[ref_frame]); ++idx) { - mv_ref_list[rf[0]][idx].as_int = - ref_mv_stack[ref_frame][idx].this_mv.as_int; - } - } - } -} - -void av1_find_mv_refs(const AV1_COMMON *cm, const MACROBLOCKD *xd, - MB_MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, - uint8_t ref_mv_count[MODE_CTX_REF_FRAMES], - CANDIDATE_MV ref_mv_stack[][MAX_REF_MV_STACK_SIZE], - int_mv mv_ref_list[][MAX_MV_REF_CANDIDATES], - int_mv *global_mvs, int mi_row, int mi_col, - int16_t *mode_context) { - int_mv zeromv[2]; - BLOCK_SIZE bsize = mi->sb_type; - MV_REFERENCE_FRAME rf[2]; - av1_set_ref_frame(rf, ref_frame); - - if (ref_frame < REF_FRAMES) { - if (ref_frame != INTRA_FRAME) { - global_mvs[ref_frame] = gm_get_motion_vector( - &cm->global_motion[ref_frame], cm->allow_high_precision_mv, bsize, - mi_col, mi_row, cm->cur_frame_force_integer_mv); - } else { - global_mvs[ref_frame].as_int = INVALID_MV; - } - } - - if (ref_frame != INTRA_FRAME) { - zeromv[0].as_int = - gm_get_motion_vector(&cm->global_motion[rf[0]], - cm->allow_high_precision_mv, bsize, mi_col, mi_row, - cm->cur_frame_force_integer_mv) - .as_int; - zeromv[1].as_int = - (rf[1] != NONE_FRAME) - ? gm_get_motion_vector(&cm->global_motion[rf[1]], - cm->allow_high_precision_mv, bsize, mi_col, - mi_row, cm->cur_frame_force_integer_mv) - .as_int - : 0; - } else { - zeromv[0].as_int = zeromv[1].as_int = 0; - } - - setup_ref_mv_list(cm, xd, ref_frame, ref_mv_count, ref_mv_stack, mv_ref_list, - zeromv, mi_row, mi_col, mode_context); -} - -void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv, - int_mv *near_mv, int is_integer) { - int i; - // Make sure all the candidates are properly clamped etc - for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { - lower_mv_precision(&mvlist[i].as_mv, allow_hp, is_integer); - } - *nearest_mv = mvlist[0]; - *near_mv = mvlist[1]; -} - -void av1_setup_frame_buf_refs(AV1_COMMON *cm) { - cm->cur_frame->cur_frame_offset = cm->frame_offset; - - MV_REFERENCE_FRAME ref_frame; - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - const int buf_idx = cm->frame_refs[ref_frame - LAST_FRAME].idx; - if (buf_idx >= 0) - cm->cur_frame->ref_frame_offset[ref_frame - LAST_FRAME] = - cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset; - } -} - -void av1_setup_frame_sign_bias(AV1_COMMON *cm) { - MV_REFERENCE_FRAME ref_frame; - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - const int buf_idx = cm->frame_refs[ref_frame - LAST_FRAME].idx; - if (cm->seq_params.enable_order_hint && buf_idx != INVALID_IDX) { - const int ref_frame_offset = - cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset; - cm->ref_frame_sign_bias[ref_frame] = - (get_relative_dist(cm, ref_frame_offset, (int)cm->frame_offset) <= 0) - ? 0 - : 1; - } else { - cm->ref_frame_sign_bias[ref_frame] = 0; - } - } -} - -#define MAX_OFFSET_WIDTH 64 -#define MAX_OFFSET_HEIGHT 0 - -static int get_block_position(AV1_COMMON *cm, int *mi_r, int *mi_c, int blk_row, - int blk_col, MV mv, int sign_bias) { - const int base_blk_row = (blk_row >> 3) << 3; - const int base_blk_col = (blk_col >> 3) << 3; - - const int row_offset = (mv.row >= 0) ? (mv.row >> (4 + MI_SIZE_LOG2)) - : -((-mv.row) >> (4 + MI_SIZE_LOG2)); - - const int col_offset = (mv.col >= 0) ? (mv.col >> (4 + MI_SIZE_LOG2)) - : -((-mv.col) >> (4 + MI_SIZE_LOG2)); - - const int row = - (sign_bias == 1) ? blk_row - row_offset : blk_row + row_offset; - const int col = - (sign_bias == 1) ? blk_col - col_offset : blk_col + col_offset; - - if (row < 0 || row >= (cm->mi_rows >> 1) || col < 0 || - col >= (cm->mi_cols >> 1)) - return 0; - - if (row < base_blk_row - (MAX_OFFSET_HEIGHT >> 3) || - row >= base_blk_row + 8 + (MAX_OFFSET_HEIGHT >> 3) || - col < base_blk_col - (MAX_OFFSET_WIDTH >> 3) || - col >= base_blk_col + 8 + (MAX_OFFSET_WIDTH >> 3)) - return 0; - - *mi_r = row; - *mi_c = col; - - return 1; -} - -// Note: motion_filed_projection finds motion vectors of current frame's -// reference frame, and projects them to current frame. To make it clear, -// let's call current frame's reference frame as start frame. -// Call Start frame's reference frames as reference frames. -// Call ref_offset as frame distances between start frame and its reference -// frames. -static int motion_field_projection(AV1_COMMON *cm, - MV_REFERENCE_FRAME start_frame, int dir) { - TPL_MV_REF *tpl_mvs_base = cm->tpl_mvs; - int ref_offset[REF_FRAMES] = { 0 }; - - (void)dir; - - const int start_frame_idx = cm->frame_refs[FWD_RF_OFFSET(start_frame)].idx; - if (start_frame_idx < 0) return 0; - - if (cm->buffer_pool->frame_bufs[start_frame_idx].intra_only) return 0; - - if (cm->buffer_pool->frame_bufs[start_frame_idx].mi_rows != cm->mi_rows || - cm->buffer_pool->frame_bufs[start_frame_idx].mi_cols != cm->mi_cols) - return 0; - - const int start_frame_offset = - cm->buffer_pool->frame_bufs[start_frame_idx].cur_frame_offset; - const unsigned int *const ref_frame_offsets = - &cm->buffer_pool->frame_bufs[start_frame_idx].ref_frame_offset[0]; - const int cur_frame_offset = cm->cur_frame->cur_frame_offset; - int start_to_current_frame_offset = - get_relative_dist(cm, start_frame_offset, cur_frame_offset); - - for (MV_REFERENCE_FRAME rf = LAST_FRAME; rf <= INTER_REFS_PER_FRAME; ++rf) { - ref_offset[rf] = get_relative_dist(cm, start_frame_offset, - ref_frame_offsets[rf - LAST_FRAME]); - } - - if (dir == 2) start_to_current_frame_offset = -start_to_current_frame_offset; - - MV_REF *mv_ref_base = cm->buffer_pool->frame_bufs[start_frame_idx].mvs; - const int mvs_rows = (cm->mi_rows + 1) >> 1; - const int mvs_cols = (cm->mi_cols + 1) >> 1; - - for (int blk_row = 0; blk_row < mvs_rows; ++blk_row) { - for (int blk_col = 0; blk_col < mvs_cols; ++blk_col) { - MV_REF *mv_ref = &mv_ref_base[blk_row * mvs_cols + blk_col]; - MV fwd_mv = mv_ref->mv.as_mv; - - if (mv_ref->ref_frame > INTRA_FRAME) { - int_mv this_mv; - int mi_r, mi_c; - const int ref_frame_offset = ref_offset[mv_ref->ref_frame]; - - int pos_valid = - abs(ref_frame_offset) <= MAX_FRAME_DISTANCE && - ref_frame_offset > 0 && - abs(start_to_current_frame_offset) <= MAX_FRAME_DISTANCE; - - if (pos_valid) { - get_mv_projection(&this_mv.as_mv, fwd_mv, - start_to_current_frame_offset, ref_frame_offset); - pos_valid = get_block_position(cm, &mi_r, &mi_c, blk_row, blk_col, - this_mv.as_mv, dir >> 1); - } - - if (pos_valid) { - const int mi_offset = mi_r * (cm->mi_stride >> 1) + mi_c; - - tpl_mvs_base[mi_offset].mfmv0.as_mv.row = fwd_mv.row; - tpl_mvs_base[mi_offset].mfmv0.as_mv.col = fwd_mv.col; - tpl_mvs_base[mi_offset].ref_frame_offset = ref_frame_offset; - } - } - } - } - - return 1; -} - -void av1_setup_motion_field(AV1_COMMON *cm) { - memset(cm->ref_frame_side, 0, sizeof(cm->ref_frame_side)); - if (!cm->seq_params.enable_order_hint) return; - - TPL_MV_REF *tpl_mvs_base = cm->tpl_mvs; - int size = ((cm->mi_rows + MAX_MIB_SIZE) >> 1) * (cm->mi_stride >> 1); - for (int idx = 0; idx < size; ++idx) { - tpl_mvs_base[idx].mfmv0.as_int = INVALID_MV; - tpl_mvs_base[idx].ref_frame_offset = 0; - } - - const int cur_order_hint = cm->cur_frame->cur_frame_offset; - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - - int ref_buf_idx[INTER_REFS_PER_FRAME]; - int ref_order_hint[INTER_REFS_PER_FRAME]; - - for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { - const int ref_idx = ref_frame - LAST_FRAME; - const int buf_idx = cm->frame_refs[ref_idx].idx; - int order_hint = 0; - - if (buf_idx >= 0) order_hint = frame_bufs[buf_idx].cur_frame_offset; - - ref_buf_idx[ref_idx] = buf_idx; - ref_order_hint[ref_idx] = order_hint; - - if (get_relative_dist(cm, order_hint, cur_order_hint) > 0) - cm->ref_frame_side[ref_frame] = 1; - else if (order_hint == cur_order_hint) - cm->ref_frame_side[ref_frame] = -1; - } - - int ref_stamp = MFMV_STACK_SIZE - 1; - - if (ref_buf_idx[LAST_FRAME - LAST_FRAME] >= 0) { - const int alt_of_lst_order_hint = - frame_bufs[ref_buf_idx[LAST_FRAME - LAST_FRAME]] - .ref_frame_offset[ALTREF_FRAME - LAST_FRAME]; - - const int is_lst_overlay = - (alt_of_lst_order_hint == ref_order_hint[GOLDEN_FRAME - LAST_FRAME]); - if (!is_lst_overlay) motion_field_projection(cm, LAST_FRAME, 2); - --ref_stamp; - } - - if (get_relative_dist(cm, ref_order_hint[BWDREF_FRAME - LAST_FRAME], - cur_order_hint) > 0) { - if (motion_field_projection(cm, BWDREF_FRAME, 0)) --ref_stamp; - } - - if (get_relative_dist(cm, ref_order_hint[ALTREF2_FRAME - LAST_FRAME], - cur_order_hint) > 0) { - if (motion_field_projection(cm, ALTREF2_FRAME, 0)) --ref_stamp; - } - - if (get_relative_dist(cm, ref_order_hint[ALTREF_FRAME - LAST_FRAME], - cur_order_hint) > 0 && - ref_stamp >= 0) - if (motion_field_projection(cm, ALTREF_FRAME, 0)) --ref_stamp; - - if (ref_stamp >= 0 && ref_buf_idx[LAST2_FRAME - LAST_FRAME] >= 0) - if (motion_field_projection(cm, LAST2_FRAME, 2)) --ref_stamp; -} - -static INLINE void record_samples(MB_MODE_INFO *mbmi, int *pts, int *pts_inref, - int row_offset, int sign_r, int col_offset, - int sign_c) { - int bw = block_size_wide[mbmi->sb_type]; - int bh = block_size_high[mbmi->sb_type]; - int x = col_offset * MI_SIZE + sign_c * AOMMAX(bw, MI_SIZE) / 2 - 1; - int y = row_offset * MI_SIZE + sign_r * AOMMAX(bh, MI_SIZE) / 2 - 1; - - pts[0] = (x * 8); - pts[1] = (y * 8); - pts_inref[0] = (x * 8) + mbmi->mv[0].as_mv.col; - pts_inref[1] = (y * 8) + mbmi->mv[0].as_mv.row; -} - -// Select samples according to the motion vector difference. -int selectSamples(MV *mv, int *pts, int *pts_inref, int len, BLOCK_SIZE bsize) { - const int bw = block_size_wide[bsize]; - const int bh = block_size_high[bsize]; - const int thresh = clamp(AOMMAX(bw, bh), 16, 112); - int pts_mvd[SAMPLES_ARRAY_SIZE] = { 0 }; - int i, j, k, l = len; - int ret = 0; - assert(len <= LEAST_SQUARES_SAMPLES_MAX); - - // Obtain the motion vector difference. - for (i = 0; i < len; ++i) { - pts_mvd[i] = abs(pts_inref[2 * i] - pts[2 * i] - mv->col) + - abs(pts_inref[2 * i + 1] - pts[2 * i + 1] - mv->row); - - if (pts_mvd[i] > thresh) - pts_mvd[i] = -1; - else - ret++; - } - - // Keep at least 1 sample. - if (!ret) return 1; - - i = 0; - j = l - 1; - for (k = 0; k < l - ret; k++) { - while (pts_mvd[i] != -1) i++; - while (pts_mvd[j] == -1) j--; - assert(i != j); - if (i > j) break; - - // Replace the discarded samples; - pts_mvd[i] = pts_mvd[j]; - pts[2 * i] = pts[2 * j]; - pts[2 * i + 1] = pts[2 * j + 1]; - pts_inref[2 * i] = pts_inref[2 * j]; - pts_inref[2 * i + 1] = pts_inref[2 * j + 1]; - i++; - j--; - } - - return ret; -} - -// Note: Samples returned are at 1/8-pel precision -// Sample are the neighbor block center point's coordinates relative to the -// left-top pixel of current block. -int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, - int *pts, int *pts_inref) { - MB_MODE_INFO *const mbmi0 = xd->mi[0]; - int ref_frame = mbmi0->ref_frame[0]; - int up_available = xd->up_available; - int left_available = xd->left_available; - int i, mi_step = 1, np = 0; - - const TileInfo *const tile = &xd->tile; - int do_tl = 1; - int do_tr = 1; - - // scan the nearest above rows - if (up_available) { - int mi_row_offset = -1; - MB_MODE_INFO *mbmi = xd->mi[mi_row_offset * xd->mi_stride]; - uint8_t n4_w = mi_size_wide[mbmi->sb_type]; - - if (xd->n4_w <= n4_w) { - // Handle "current block width <= above block width" case. - int col_offset = -mi_col % n4_w; - - if (col_offset < 0) do_tl = 0; - if (col_offset + n4_w > xd->n4_w) do_tr = 0; - - if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) { - record_samples(mbmi, pts, pts_inref, 0, -1, col_offset, 1); - pts += 2; - pts_inref += 2; - np++; - if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX; - } - } else { - // Handle "current block width > above block width" case. - for (i = 0; i < AOMMIN(xd->n4_w, cm->mi_cols - mi_col); i += mi_step) { - int mi_col_offset = i; - mbmi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]; - n4_w = mi_size_wide[mbmi->sb_type]; - mi_step = AOMMIN(xd->n4_w, n4_w); - - if (mbmi->ref_frame[0] == ref_frame && - mbmi->ref_frame[1] == NONE_FRAME) { - record_samples(mbmi, pts, pts_inref, 0, -1, i, 1); - pts += 2; - pts_inref += 2; - np++; - if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX; - } - } - } - } - assert(np <= LEAST_SQUARES_SAMPLES_MAX); - - // scan the nearest left columns - if (left_available) { - int mi_col_offset = -1; - - MB_MODE_INFO *mbmi = xd->mi[mi_col_offset]; - uint8_t n4_h = mi_size_high[mbmi->sb_type]; - - if (xd->n4_h <= n4_h) { - // Handle "current block height <= above block height" case. - int row_offset = -mi_row % n4_h; - - if (row_offset < 0) do_tl = 0; - - if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) { - record_samples(mbmi, pts, pts_inref, row_offset, 1, 0, -1); - pts += 2; - pts_inref += 2; - np++; - if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX; - } - } else { - // Handle "current block height > above block height" case. - for (i = 0; i < AOMMIN(xd->n4_h, cm->mi_rows - mi_row); i += mi_step) { - int mi_row_offset = i; - mbmi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]; - n4_h = mi_size_high[mbmi->sb_type]; - mi_step = AOMMIN(xd->n4_h, n4_h); - - if (mbmi->ref_frame[0] == ref_frame && - mbmi->ref_frame[1] == NONE_FRAME) { - record_samples(mbmi, pts, pts_inref, i, 1, 0, -1); - pts += 2; - pts_inref += 2; - np++; - if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX; - } - } - } - } - assert(np <= LEAST_SQUARES_SAMPLES_MAX); - - // Top-left block - if (do_tl && left_available && up_available) { - int mi_row_offset = -1; - int mi_col_offset = -1; - - MB_MODE_INFO *mbmi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]; - - if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) { - record_samples(mbmi, pts, pts_inref, 0, -1, 0, -1); - pts += 2; - pts_inref += 2; - np++; - if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX; - } - } - assert(np <= LEAST_SQUARES_SAMPLES_MAX); - - // Top-right block - if (do_tr && - has_top_right(cm, xd, mi_row, mi_col, AOMMAX(xd->n4_w, xd->n4_h))) { - POSITION trb_pos = { -1, xd->n4_w }; - - if (is_inside(tile, mi_col, mi_row, &trb_pos)) { - int mi_row_offset = -1; - int mi_col_offset = xd->n4_w; - - MB_MODE_INFO *mbmi = - xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]; - - if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) { - record_samples(mbmi, pts, pts_inref, 0, -1, xd->n4_w, 1); - np++; - if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX; - } - } - } - assert(np <= LEAST_SQUARES_SAMPLES_MAX); - - return np; -} - -void av1_setup_skip_mode_allowed(AV1_COMMON *cm) { - cm->is_skip_mode_allowed = 0; - cm->ref_frame_idx_0 = cm->ref_frame_idx_1 = INVALID_IDX; - - if (!cm->seq_params.enable_order_hint || frame_is_intra_only(cm) || - cm->reference_mode == SINGLE_REFERENCE) - return; - - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - const int cur_frame_offset = cm->frame_offset; - int ref_frame_offset[2] = { -1, INT_MAX }; - int ref_idx[2] = { INVALID_IDX, INVALID_IDX }; - - // Identify the nearest forward and backward references. - for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) { - const int buf_idx = cm->frame_refs[i].idx; - if (buf_idx == INVALID_IDX) continue; - - const int ref_offset = frame_bufs[buf_idx].cur_frame_offset; - if (get_relative_dist(cm, ref_offset, cur_frame_offset) < 0) { - // Forward reference - if (ref_frame_offset[0] == -1 || - get_relative_dist(cm, ref_offset, ref_frame_offset[0]) > 0) { - ref_frame_offset[0] = ref_offset; - ref_idx[0] = i; - } - } else if (get_relative_dist(cm, ref_offset, cur_frame_offset) > 0) { - // Backward reference - if (ref_frame_offset[1] == INT_MAX || - get_relative_dist(cm, ref_offset, ref_frame_offset[1]) < 0) { - ref_frame_offset[1] = ref_offset; - ref_idx[1] = i; - } - } - } - - if (ref_idx[0] != INVALID_IDX && ref_idx[1] != INVALID_IDX) { - // == Bi-directional prediction == - cm->is_skip_mode_allowed = 1; - cm->ref_frame_idx_0 = AOMMIN(ref_idx[0], ref_idx[1]); - cm->ref_frame_idx_1 = AOMMAX(ref_idx[0], ref_idx[1]); - } else if (ref_idx[0] != INVALID_IDX && ref_idx[1] == INVALID_IDX) { - // == Forward prediction only == - // Identify the second nearest forward reference. - ref_frame_offset[1] = -1; - for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) { - const int buf_idx = cm->frame_refs[i].idx; - if (buf_idx == INVALID_IDX) continue; - - const int ref_offset = frame_bufs[buf_idx].cur_frame_offset; - if ((ref_frame_offset[0] != -1 && - get_relative_dist(cm, ref_offset, ref_frame_offset[0]) < 0) && - (ref_frame_offset[1] == -1 || - get_relative_dist(cm, ref_offset, ref_frame_offset[1]) > 0)) { - // Second closest forward reference - ref_frame_offset[1] = ref_offset; - ref_idx[1] = i; - } - } - if (ref_frame_offset[1] != -1) { - cm->is_skip_mode_allowed = 1; - cm->ref_frame_idx_0 = AOMMIN(ref_idx[0], ref_idx[1]); - cm->ref_frame_idx_1 = AOMMAX(ref_idx[0], ref_idx[1]); - } - } -} - -typedef struct { - int map_idx; // frame map index - int buf_idx; // frame buffer index - int sort_idx; // index based on the offset to be used for sorting -} REF_FRAME_INFO; - -static int compare_ref_frame_info(const void *arg_a, const void *arg_b) { - const REF_FRAME_INFO *info_a = (REF_FRAME_INFO *)arg_a; - const REF_FRAME_INFO *info_b = (REF_FRAME_INFO *)arg_b; - - if (info_a->sort_idx < info_b->sort_idx) return -1; - if (info_a->sort_idx > info_b->sort_idx) return 1; - return (info_a->map_idx < info_b->map_idx) - ? -1 - : ((info_a->map_idx > info_b->map_idx) ? 1 : 0); -} - -static void set_ref_frame_info(AV1_COMMON *const cm, int frame_idx, - REF_FRAME_INFO *ref_info) { - assert(frame_idx >= 0 && frame_idx < INTER_REFS_PER_FRAME); - - const int buf_idx = ref_info->buf_idx; - - cm->frame_refs[frame_idx].idx = buf_idx; - cm->frame_refs[frame_idx].buf = &cm->buffer_pool->frame_bufs[buf_idx].buf; - cm->frame_refs[frame_idx].map_idx = ref_info->map_idx; -} - -void av1_set_frame_refs(AV1_COMMON *const cm, int lst_map_idx, - int gld_map_idx) { - BufferPool *const pool = cm->buffer_pool; - RefCntBuffer *const frame_bufs = pool->frame_bufs; - - int lst_frame_sort_idx = -1; - int gld_frame_sort_idx = -1; - - assert(cm->seq_params.enable_order_hint); - assert(cm->seq_params.order_hint_bits_minus_1 >= 0); - const int cur_frame_offset = (int)cm->frame_offset; - const int cur_frame_sort_idx = 1 << cm->seq_params.order_hint_bits_minus_1; - - REF_FRAME_INFO ref_frame_info[REF_FRAMES]; - int ref_flag_list[INTER_REFS_PER_FRAME] = { 0, 0, 0, 0, 0, 0, 0 }; - - for (int i = 0; i < REF_FRAMES; ++i) { - const int map_idx = i; - - ref_frame_info[i].map_idx = map_idx; - ref_frame_info[i].sort_idx = -1; - - const int buf_idx = cm->ref_frame_map[map_idx]; - ref_frame_info[i].buf_idx = buf_idx; - - if (buf_idx < 0 || buf_idx >= FRAME_BUFFERS) continue; - // TODO(zoeliu@google.com): To verify the checking on ref_count. - if (frame_bufs[buf_idx].ref_count <= 0) continue; - - const int offset = (int)frame_bufs[buf_idx].cur_frame_offset; - ref_frame_info[i].sort_idx = - (offset == -1) ? -1 - : cur_frame_sort_idx + - get_relative_dist(cm, offset, cur_frame_offset); - assert(ref_frame_info[i].sort_idx >= -1); - - if (map_idx == lst_map_idx) lst_frame_sort_idx = ref_frame_info[i].sort_idx; - if (map_idx == gld_map_idx) gld_frame_sort_idx = ref_frame_info[i].sort_idx; - } - - // Confirm both LAST_FRAME and GOLDEN_FRAME are valid forward reference - // frames. - if (lst_frame_sort_idx == -1 || lst_frame_sort_idx >= cur_frame_sort_idx) { - aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, - "Inter frame requests a look-ahead frame as LAST"); - } - if (gld_frame_sort_idx == -1 || gld_frame_sort_idx >= cur_frame_sort_idx) { - aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, - "Inter frame requests a look-ahead frame as GOLDEN"); - } - - // Sort ref frames based on their frame_offset values. - qsort(ref_frame_info, REF_FRAMES, sizeof(REF_FRAME_INFO), - compare_ref_frame_info); - - // Identify forward and backward reference frames. - // Forward reference: offset < cur_frame_offset - // Backward reference: offset >= cur_frame_offset - int fwd_start_idx = 0, fwd_end_idx = REF_FRAMES - 1; - - for (int i = 0; i < REF_FRAMES; i++) { - if (ref_frame_info[i].sort_idx == -1) { - fwd_start_idx++; - continue; - } - - if (ref_frame_info[i].sort_idx >= cur_frame_sort_idx) { - fwd_end_idx = i - 1; - break; - } - } - - int bwd_start_idx = fwd_end_idx + 1; - int bwd_end_idx = REF_FRAMES - 1; - - // === Backward Reference Frames === - - // == ALTREF_FRAME == - if (bwd_start_idx <= bwd_end_idx) { - set_ref_frame_info(cm, ALTREF_FRAME - LAST_FRAME, - &ref_frame_info[bwd_end_idx]); - ref_flag_list[ALTREF_FRAME - LAST_FRAME] = 1; - bwd_end_idx--; - } - - // == BWDREF_FRAME == - if (bwd_start_idx <= bwd_end_idx) { - set_ref_frame_info(cm, BWDREF_FRAME - LAST_FRAME, - &ref_frame_info[bwd_start_idx]); - ref_flag_list[BWDREF_FRAME - LAST_FRAME] = 1; - bwd_start_idx++; - } - - // == ALTREF2_FRAME == - if (bwd_start_idx <= bwd_end_idx) { - set_ref_frame_info(cm, ALTREF2_FRAME - LAST_FRAME, - &ref_frame_info[bwd_start_idx]); - ref_flag_list[ALTREF2_FRAME - LAST_FRAME] = 1; - } - - // === Forward Reference Frames === - - for (int i = fwd_start_idx; i <= fwd_end_idx; ++i) { - // == LAST_FRAME == - if (ref_frame_info[i].map_idx == lst_map_idx) { - set_ref_frame_info(cm, LAST_FRAME - LAST_FRAME, &ref_frame_info[i]); - ref_flag_list[LAST_FRAME - LAST_FRAME] = 1; - } - - // == GOLDEN_FRAME == - if (ref_frame_info[i].map_idx == gld_map_idx) { - set_ref_frame_info(cm, GOLDEN_FRAME - LAST_FRAME, &ref_frame_info[i]); - ref_flag_list[GOLDEN_FRAME - LAST_FRAME] = 1; - } - } - - assert(ref_flag_list[LAST_FRAME - LAST_FRAME] == 1 && - ref_flag_list[GOLDEN_FRAME - LAST_FRAME] == 1); - - // == LAST2_FRAME == - // == LAST3_FRAME == - // == BWDREF_FRAME == - // == ALTREF2_FRAME == - // == ALTREF_FRAME == - - // Set up the reference frames in the anti-chronological order. - static const MV_REFERENCE_FRAME ref_frame_list[INTER_REFS_PER_FRAME - 2] = { - LAST2_FRAME, LAST3_FRAME, BWDREF_FRAME, ALTREF2_FRAME, ALTREF_FRAME - }; - - int ref_idx; - for (ref_idx = 0; ref_idx < (INTER_REFS_PER_FRAME - 2); ref_idx++) { - const MV_REFERENCE_FRAME ref_frame = ref_frame_list[ref_idx]; - - if (ref_flag_list[ref_frame - LAST_FRAME] == 1) continue; - - while (fwd_start_idx <= fwd_end_idx && - (ref_frame_info[fwd_end_idx].map_idx == lst_map_idx || - ref_frame_info[fwd_end_idx].map_idx == gld_map_idx)) { - fwd_end_idx--; - } - if (fwd_start_idx > fwd_end_idx) break; - - set_ref_frame_info(cm, ref_frame - LAST_FRAME, - &ref_frame_info[fwd_end_idx]); - ref_flag_list[ref_frame - LAST_FRAME] = 1; - - fwd_end_idx--; - } - - // Assign all the remaining frame(s), if any, to the earliest reference frame. - for (; ref_idx < (INTER_REFS_PER_FRAME - 2); ref_idx++) { - const MV_REFERENCE_FRAME ref_frame = ref_frame_list[ref_idx]; - if (ref_flag_list[ref_frame - LAST_FRAME] == 1) continue; - set_ref_frame_info(cm, ref_frame - LAST_FRAME, - &ref_frame_info[fwd_start_idx]); - ref_flag_list[ref_frame - LAST_FRAME] = 1; - } - - for (int i = 0; i < INTER_REFS_PER_FRAME; i++) { - assert(ref_flag_list[i] == 1); - } -} diff --git a/third_party/aom/av1/common/mvref_common.h b/third_party/aom/av1/common/mvref_common.h deleted file mode 100644 index 83f7a1ac0..000000000 --- a/third_party/aom/av1/common/mvref_common.h +++ /dev/null @@ -1,361 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#ifndef AOM_AV1_COMMON_MVREF_COMMON_H_ -#define AOM_AV1_COMMON_MVREF_COMMON_H_ - -#include "av1/common/onyxc_int.h" -#include "av1/common/blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MVREF_ROW_COLS 3 - -// Set the upper limit of the motion vector component magnitude. -// This would make a motion vector fit in 26 bits. Plus 3 bits for the -// reference frame index. A tuple of motion vector can hence be stored within -// 32 bit range for efficient load/store operations. -#define REFMVS_LIMIT ((1 << 12) - 1) - -typedef struct position { - int row; - int col; -} POSITION; - -// clamp_mv_ref -#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units - -static INLINE int get_relative_dist(const AV1_COMMON *cm, int a, int b) { - if (!cm->seq_params.enable_order_hint) return 0; - - const int bits = cm->seq_params.order_hint_bits_minus_1 + 1; - - assert(bits >= 1); - assert(a >= 0 && a < (1 << bits)); - assert(b >= 0 && b < (1 << bits)); - - int diff = a - b; - const int m = 1 << (bits - 1); - diff = (diff & (m - 1)) - (diff & m); - return diff; -} - -static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) { - clamp_mv(mv, xd->mb_to_left_edge - bw * 8 - MV_BORDER, - xd->mb_to_right_edge + bw * 8 + MV_BORDER, - xd->mb_to_top_edge - bh * 8 - MV_BORDER, - xd->mb_to_bottom_edge + bh * 8 + MV_BORDER); -} - -// This function returns either the appropriate sub block or block's mv -// on whether the block_size < 8x8 and we have check_sub_blocks set. -static INLINE int_mv get_sub_block_mv(const MB_MODE_INFO *candidate, - int which_mv, int search_col) { - (void)search_col; - return candidate->mv[which_mv]; -} - -static INLINE int_mv get_sub_block_pred_mv(const MB_MODE_INFO *candidate, - int which_mv, int search_col) { - (void)search_col; - return candidate->mv[which_mv]; -} - -// Performs mv sign inversion if indicated by the reference frame combination. -static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref, - const MV_REFERENCE_FRAME this_ref_frame, - const int *ref_sign_bias) { - int_mv mv = mbmi->mv[ref]; - if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) { - mv.as_mv.row *= -1; - mv.as_mv.col *= -1; - } - return mv; -} - -// Checks that the given mi_row, mi_col and search point -// are inside the borders of the tile. -static INLINE int is_inside(const TileInfo *const tile, int mi_col, int mi_row, - const POSITION *mi_pos) { - return !(mi_row + mi_pos->row < tile->mi_row_start || - mi_col + mi_pos->col < tile->mi_col_start || - mi_row + mi_pos->row >= tile->mi_row_end || - mi_col + mi_pos->col >= tile->mi_col_end); -} - -static INLINE int find_valid_row_offset(const TileInfo *const tile, int mi_row, - int row_offset) { - return clamp(row_offset, tile->mi_row_start - mi_row, - tile->mi_row_end - mi_row - 1); -} - -static INLINE int find_valid_col_offset(const TileInfo *const tile, int mi_col, - int col_offset) { - return clamp(col_offset, tile->mi_col_start - mi_col, - tile->mi_col_end - mi_col - 1); -} - -static INLINE void lower_mv_precision(MV *mv, int allow_hp, int is_integer) { - if (is_integer) { - integer_mv_precision(mv); - } else { - if (!allow_hp) { - if (mv->row & 1) mv->row += (mv->row > 0 ? -1 : 1); - if (mv->col & 1) mv->col += (mv->col > 0 ? -1 : 1); - } - } -} - -static INLINE int8_t get_uni_comp_ref_idx(const MV_REFERENCE_FRAME *const rf) { - // Single ref pred - if (rf[1] <= INTRA_FRAME) return -1; - - // Bi-directional comp ref pred - if ((rf[0] < BWDREF_FRAME) && (rf[1] >= BWDREF_FRAME)) return -1; - - for (int8_t ref_idx = 0; ref_idx < TOTAL_UNIDIR_COMP_REFS; ++ref_idx) { - if (rf[0] == comp_ref0(ref_idx) && rf[1] == comp_ref1(ref_idx)) - return ref_idx; - } - return -1; -} - -static INLINE int8_t av1_ref_frame_type(const MV_REFERENCE_FRAME *const rf) { - if (rf[1] > INTRA_FRAME) { - const int8_t uni_comp_ref_idx = get_uni_comp_ref_idx(rf); - if (uni_comp_ref_idx >= 0) { - assert((REF_FRAMES + FWD_REFS * BWD_REFS + uni_comp_ref_idx) < - MODE_CTX_REF_FRAMES); - return REF_FRAMES + FWD_REFS * BWD_REFS + uni_comp_ref_idx; - } else { - return REF_FRAMES + FWD_RF_OFFSET(rf[0]) + - BWD_RF_OFFSET(rf[1]) * FWD_REFS; - } - } - - return rf[0]; -} - -// clang-format off -static MV_REFERENCE_FRAME ref_frame_map[TOTAL_COMP_REFS][2] = { - { LAST_FRAME, BWDREF_FRAME }, { LAST2_FRAME, BWDREF_FRAME }, - { LAST3_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, BWDREF_FRAME }, - - { LAST_FRAME, ALTREF2_FRAME }, { LAST2_FRAME, ALTREF2_FRAME }, - { LAST3_FRAME, ALTREF2_FRAME }, { GOLDEN_FRAME, ALTREF2_FRAME }, - - { LAST_FRAME, ALTREF_FRAME }, { LAST2_FRAME, ALTREF_FRAME }, - { LAST3_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME }, - - { LAST_FRAME, LAST2_FRAME }, { LAST_FRAME, LAST3_FRAME }, - { LAST_FRAME, GOLDEN_FRAME }, { BWDREF_FRAME, ALTREF_FRAME }, - - // NOTE: Following reference frame pairs are not supported to be explicitly - // signalled, but they are possibly chosen by the use of skip_mode, - // which may use the most recent one-sided reference frame pair. - { LAST2_FRAME, LAST3_FRAME }, { LAST2_FRAME, GOLDEN_FRAME }, - { LAST3_FRAME, GOLDEN_FRAME }, {BWDREF_FRAME, ALTREF2_FRAME}, - { ALTREF2_FRAME, ALTREF_FRAME } -}; -// clang-format on - -static INLINE void av1_set_ref_frame(MV_REFERENCE_FRAME *rf, - int8_t ref_frame_type) { - if (ref_frame_type >= REF_FRAMES) { - rf[0] = ref_frame_map[ref_frame_type - REF_FRAMES][0]; - rf[1] = ref_frame_map[ref_frame_type - REF_FRAMES][1]; - } else { - rf[0] = ref_frame_type; - rf[1] = NONE_FRAME; - assert(ref_frame_type > NONE_FRAME); - } -} - -static uint16_t compound_mode_ctx_map[3][COMP_NEWMV_CTXS] = { - { 0, 1, 1, 1, 1 }, - { 1, 2, 3, 4, 4 }, - { 4, 4, 5, 6, 7 }, -}; - -static INLINE int16_t av1_mode_context_analyzer( - const int16_t *const mode_context, const MV_REFERENCE_FRAME *const rf) { - const int8_t ref_frame = av1_ref_frame_type(rf); - - if (rf[1] <= INTRA_FRAME) return mode_context[ref_frame]; - - const int16_t newmv_ctx = mode_context[ref_frame] & NEWMV_CTX_MASK; - const int16_t refmv_ctx = - (mode_context[ref_frame] >> REFMV_OFFSET) & REFMV_CTX_MASK; - - const int16_t comp_ctx = compound_mode_ctx_map[refmv_ctx >> 1][AOMMIN( - newmv_ctx, COMP_NEWMV_CTXS - 1)]; - return comp_ctx; -} - -static INLINE uint8_t av1_drl_ctx(const CANDIDATE_MV *ref_mv_stack, - int ref_idx) { - if (ref_mv_stack[ref_idx].weight >= REF_CAT_LEVEL && - ref_mv_stack[ref_idx + 1].weight >= REF_CAT_LEVEL) - return 0; - - if (ref_mv_stack[ref_idx].weight >= REF_CAT_LEVEL && - ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL) - return 1; - - if (ref_mv_stack[ref_idx].weight < REF_CAT_LEVEL && - ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL) - return 2; - - return 0; -} - -void av1_setup_frame_buf_refs(AV1_COMMON *cm); -void av1_setup_frame_sign_bias(AV1_COMMON *cm); -void av1_setup_skip_mode_allowed(AV1_COMMON *cm); -void av1_setup_motion_field(AV1_COMMON *cm); -void av1_set_frame_refs(AV1_COMMON *const cm, int lst_map_idx, int gld_map_idx); - -static INLINE void av1_collect_neighbors_ref_counts(MACROBLOCKD *const xd) { - av1_zero(xd->neighbors_ref_counts); - - uint8_t *const ref_counts = xd->neighbors_ref_counts; - - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int above_in_image = xd->up_available; - const int left_in_image = xd->left_available; - - // Above neighbor - if (above_in_image && is_inter_block(above_mbmi)) { - ref_counts[above_mbmi->ref_frame[0]]++; - if (has_second_ref(above_mbmi)) { - ref_counts[above_mbmi->ref_frame[1]]++; - } - } - - // Left neighbor - if (left_in_image && is_inter_block(left_mbmi)) { - ref_counts[left_mbmi->ref_frame[0]]++; - if (has_second_ref(left_mbmi)) { - ref_counts[left_mbmi->ref_frame[1]]++; - } - } -} - -void av1_copy_frame_mvs(const AV1_COMMON *const cm, - const MB_MODE_INFO *const mi, int mi_row, int mi_col, - int x_mis, int y_mis); - -void av1_find_mv_refs(const AV1_COMMON *cm, const MACROBLOCKD *xd, - MB_MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, - uint8_t ref_mv_count[MODE_CTX_REF_FRAMES], - CANDIDATE_MV ref_mv_stack[][MAX_REF_MV_STACK_SIZE], - int_mv mv_ref_list[][MAX_MV_REF_CANDIDATES], - int_mv *global_mvs, int mi_row, int mi_col, - int16_t *mode_context); - -// check a list of motion vectors by sad score using a number rows of pixels -// above and a number cols of pixels in the left to select the one with best -// score to use as ref motion vector -void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv, - int_mv *near_mv, int is_integer); - -int selectSamples(MV *mv, int *pts, int *pts_inref, int len, BLOCK_SIZE bsize); -int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, - int *pts, int *pts_inref); - -#define INTRABC_DELAY_PIXELS 256 // Delay of 256 pixels -#define INTRABC_DELAY_SB64 (INTRABC_DELAY_PIXELS / 64) - -static INLINE void av1_find_ref_dv(int_mv *ref_dv, const TileInfo *const tile, - int mib_size, int mi_row, int mi_col) { - (void)mi_col; - if (mi_row - mib_size < tile->mi_row_start) { - ref_dv->as_mv.row = 0; - ref_dv->as_mv.col = -MI_SIZE * mib_size - INTRABC_DELAY_PIXELS; - } else { - ref_dv->as_mv.row = -MI_SIZE * mib_size; - ref_dv->as_mv.col = 0; - } - ref_dv->as_mv.row *= 8; - ref_dv->as_mv.col *= 8; -} - -static INLINE int av1_is_dv_valid(const MV dv, const AV1_COMMON *cm, - const MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize, int mib_size_log2) { - const int bw = block_size_wide[bsize]; - const int bh = block_size_high[bsize]; - const int SCALE_PX_TO_MV = 8; - // Disallow subpixel for now - // SUBPEL_MASK is not the correct scale - if (((dv.row & (SCALE_PX_TO_MV - 1)) || (dv.col & (SCALE_PX_TO_MV - 1)))) - return 0; - - const TileInfo *const tile = &xd->tile; - // Is the source top-left inside the current tile? - const int src_top_edge = mi_row * MI_SIZE * SCALE_PX_TO_MV + dv.row; - const int tile_top_edge = tile->mi_row_start * MI_SIZE * SCALE_PX_TO_MV; - if (src_top_edge < tile_top_edge) return 0; - const int src_left_edge = mi_col * MI_SIZE * SCALE_PX_TO_MV + dv.col; - const int tile_left_edge = tile->mi_col_start * MI_SIZE * SCALE_PX_TO_MV; - if (src_left_edge < tile_left_edge) return 0; - // Is the bottom right inside the current tile? - const int src_bottom_edge = (mi_row * MI_SIZE + bh) * SCALE_PX_TO_MV + dv.row; - const int tile_bottom_edge = tile->mi_row_end * MI_SIZE * SCALE_PX_TO_MV; - if (src_bottom_edge > tile_bottom_edge) return 0; - const int src_right_edge = (mi_col * MI_SIZE + bw) * SCALE_PX_TO_MV + dv.col; - const int tile_right_edge = tile->mi_col_end * MI_SIZE * SCALE_PX_TO_MV; - if (src_right_edge > tile_right_edge) return 0; - - // Special case for sub 8x8 chroma cases, to prevent referring to chroma - // pixels outside current tile. - for (int plane = 1; plane < av1_num_planes(cm); ++plane) { - const struct macroblockd_plane *const pd = &xd->plane[plane]; - if (is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x, - pd->subsampling_y)) { - if (bw < 8 && pd->subsampling_x) - if (src_left_edge < tile_left_edge + 4 * SCALE_PX_TO_MV) return 0; - if (bh < 8 && pd->subsampling_y) - if (src_top_edge < tile_top_edge + 4 * SCALE_PX_TO_MV) return 0; - } - } - - // Is the bottom right within an already coded SB? Also consider additional - // constraints to facilitate HW decoder. - const int max_mib_size = 1 << mib_size_log2; - const int active_sb_row = mi_row >> mib_size_log2; - const int active_sb64_col = (mi_col * MI_SIZE) >> 6; - const int sb_size = max_mib_size * MI_SIZE; - const int src_sb_row = ((src_bottom_edge >> 3) - 1) / sb_size; - const int src_sb64_col = ((src_right_edge >> 3) - 1) >> 6; - const int total_sb64_per_row = - ((tile->mi_col_end - tile->mi_col_start - 1) >> 4) + 1; - const int active_sb64 = active_sb_row * total_sb64_per_row + active_sb64_col; - const int src_sb64 = src_sb_row * total_sb64_per_row + src_sb64_col; - if (src_sb64 >= active_sb64 - INTRABC_DELAY_SB64) return 0; - - // Wavefront constraint: use only top left area of frame for reference. - const int gradient = 1 + INTRABC_DELAY_SB64 + (sb_size > 64); - const int wf_offset = gradient * (active_sb_row - src_sb_row); - if (src_sb_row > active_sb_row || - src_sb64_col >= active_sb64_col - INTRABC_DELAY_SB64 + wf_offset) - return 0; - - return 1; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_MVREF_COMMON_H_ diff --git a/third_party/aom/av1/common/obmc.h b/third_party/aom/av1/common/obmc.h deleted file mode 100644 index 1c90cd93f..000000000 --- a/third_party/aom/av1/common/obmc.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_OBMC_H_ -#define AOM_AV1_COMMON_OBMC_H_ - -typedef void (*overlappable_nb_visitor_t)(MACROBLOCKD *xd, int rel_mi_pos, - uint8_t nb_mi_size, - MB_MODE_INFO *nb_mi, void *fun_ctxt, - const int num_planes); - -static INLINE void foreach_overlappable_nb_above(const AV1_COMMON *cm, - MACROBLOCKD *xd, int mi_col, - int nb_max, - overlappable_nb_visitor_t fun, - void *fun_ctxt) { - const int num_planes = av1_num_planes(cm); - if (!xd->up_available) return; - - int nb_count = 0; - - // prev_row_mi points into the mi array, starting at the beginning of the - // previous row. - MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride; - const int end_col = AOMMIN(mi_col + xd->n4_w, cm->mi_cols); - uint8_t mi_step; - for (int above_mi_col = mi_col; above_mi_col < end_col && nb_count < nb_max; - above_mi_col += mi_step) { - MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col; - mi_step = - AOMMIN(mi_size_wide[above_mi[0]->sb_type], mi_size_wide[BLOCK_64X64]); - // If we're considering a block with width 4, it should be treated as - // half of a pair of blocks with chroma information in the second. Move - // above_mi_col back to the start of the pair if needed, set above_mbmi - // to point at the block with chroma information, and set mi_step to 2 to - // step over the entire pair at the end of the iteration. - if (mi_step == 1) { - above_mi_col &= ~1; - above_mi = prev_row_mi + above_mi_col + 1; - mi_step = 2; - } - if (is_neighbor_overlappable(*above_mi)) { - ++nb_count; - fun(xd, above_mi_col - mi_col, AOMMIN(xd->n4_w, mi_step), *above_mi, - fun_ctxt, num_planes); - } - } -} - -static INLINE void foreach_overlappable_nb_left(const AV1_COMMON *cm, - MACROBLOCKD *xd, int mi_row, - int nb_max, - overlappable_nb_visitor_t fun, - void *fun_ctxt) { - const int num_planes = av1_num_planes(cm); - if (!xd->left_available) return; - - int nb_count = 0; - - // prev_col_mi points into the mi array, starting at the top of the - // previous column - MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride; - const int end_row = AOMMIN(mi_row + xd->n4_h, cm->mi_rows); - uint8_t mi_step; - for (int left_mi_row = mi_row; left_mi_row < end_row && nb_count < nb_max; - left_mi_row += mi_step) { - MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride; - mi_step = - AOMMIN(mi_size_high[left_mi[0]->sb_type], mi_size_high[BLOCK_64X64]); - if (mi_step == 1) { - left_mi_row &= ~1; - left_mi = prev_col_mi + (left_mi_row + 1) * xd->mi_stride; - mi_step = 2; - } - if (is_neighbor_overlappable(*left_mi)) { - ++nb_count; - fun(xd, left_mi_row - mi_row, AOMMIN(xd->n4_h, mi_step), *left_mi, - fun_ctxt, num_planes); - } - } -} - -#endif // AOM_AV1_COMMON_OBMC_H_ diff --git a/third_party/aom/av1/common/obu_util.c b/third_party/aom/av1/common/obu_util.c deleted file mode 100644 index 823b700b1..000000000 --- a/third_party/aom/av1/common/obu_util.c +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#include "av1/common/obu_util.h" - -#include "aom_dsp/bitreader_buffer.h" - -// Returns 1 when OBU type is valid, and 0 otherwise. -static int valid_obu_type(int obu_type) { - int valid_type = 0; - switch (obu_type) { - case OBU_SEQUENCE_HEADER: - case OBU_TEMPORAL_DELIMITER: - case OBU_FRAME_HEADER: - case OBU_TILE_GROUP: - case OBU_METADATA: - case OBU_FRAME: - case OBU_REDUNDANT_FRAME_HEADER: - case OBU_TILE_LIST: - case OBU_PADDING: valid_type = 1; break; - default: break; - } - return valid_type; -} - -static aom_codec_err_t read_obu_size(const uint8_t *data, - size_t bytes_available, - size_t *const obu_size, - size_t *const length_field_size) { - uint64_t u_obu_size = 0; - if (aom_uleb_decode(data, bytes_available, &u_obu_size, length_field_size) != - 0) { - return AOM_CODEC_CORRUPT_FRAME; - } - - if (u_obu_size > UINT32_MAX) return AOM_CODEC_CORRUPT_FRAME; - *obu_size = (size_t)u_obu_size; - return AOM_CODEC_OK; -} - -// Parses OBU header and stores values in 'header'. -static aom_codec_err_t read_obu_header(struct aom_read_bit_buffer *rb, - int is_annexb, ObuHeader *header) { - if (!rb || !header) return AOM_CODEC_INVALID_PARAM; - - const ptrdiff_t bit_buffer_byte_length = rb->bit_buffer_end - rb->bit_buffer; - if (bit_buffer_byte_length < 1) return AOM_CODEC_CORRUPT_FRAME; - - header->size = 1; - - if (aom_rb_read_bit(rb) != 0) { - // Forbidden bit. Must not be set. - return AOM_CODEC_CORRUPT_FRAME; - } - - header->type = (OBU_TYPE)aom_rb_read_literal(rb, 4); - - if (!valid_obu_type(header->type)) return AOM_CODEC_CORRUPT_FRAME; - - header->has_extension = aom_rb_read_bit(rb); - header->has_size_field = aom_rb_read_bit(rb); - - if (!header->has_size_field && !is_annexb) { - // section 5 obu streams must have obu_size field set. - return AOM_CODEC_UNSUP_BITSTREAM; - } - - if (aom_rb_read_bit(rb) != 0) { - // obu_reserved_1bit must be set to 0. - return AOM_CODEC_CORRUPT_FRAME; - } - - if (header->has_extension) { - if (bit_buffer_byte_length == 1) return AOM_CODEC_CORRUPT_FRAME; - - header->size += 1; - header->temporal_layer_id = aom_rb_read_literal(rb, 3); - header->spatial_layer_id = aom_rb_read_literal(rb, 2); - if (aom_rb_read_literal(rb, 3) != 0) { - // extension_header_reserved_3bits must be set to 0. - return AOM_CODEC_CORRUPT_FRAME; - } - } - - return AOM_CODEC_OK; -} - -aom_codec_err_t aom_read_obu_header(uint8_t *buffer, size_t buffer_length, - size_t *consumed, ObuHeader *header, - int is_annexb) { - if (buffer_length < 1 || !consumed || !header) return AOM_CODEC_INVALID_PARAM; - - // TODO(tomfinegan): Set the error handler here and throughout this file, and - // confirm parsing work done via aom_read_bit_buffer is successful. - struct aom_read_bit_buffer rb = { buffer, buffer + buffer_length, 0, NULL, - NULL }; - aom_codec_err_t parse_result = read_obu_header(&rb, is_annexb, header); - if (parse_result == AOM_CODEC_OK) *consumed = header->size; - return parse_result; -} - -aom_codec_err_t aom_read_obu_header_and_size(const uint8_t *data, - size_t bytes_available, - int is_annexb, - ObuHeader *obu_header, - size_t *const payload_size, - size_t *const bytes_read) { - size_t length_field_size = 0, obu_size = 0; - aom_codec_err_t status; - - if (is_annexb) { - // Size field comes before the OBU header, and includes the OBU header - status = - read_obu_size(data, bytes_available, &obu_size, &length_field_size); - - if (status != AOM_CODEC_OK) return status; - } - - struct aom_read_bit_buffer rb = { data + length_field_size, - data + bytes_available, 0, NULL, NULL }; - - status = read_obu_header(&rb, is_annexb, obu_header); - if (status != AOM_CODEC_OK) return status; - - if (is_annexb) { - // Derive the payload size from the data we've already read - if (obu_size < obu_header->size) return AOM_CODEC_CORRUPT_FRAME; - - *payload_size = obu_size - obu_header->size; - } else { - // Size field comes after the OBU header, and is just the payload size - status = read_obu_size(data + obu_header->size, - bytes_available - obu_header->size, payload_size, - &length_field_size); - if (status != AOM_CODEC_OK) return status; - } - - *bytes_read = length_field_size + obu_header->size; - return AOM_CODEC_OK; -} diff --git a/third_party/aom/av1/common/obu_util.h b/third_party/aom/av1/common/obu_util.h deleted file mode 100644 index 7c56904c8..000000000 --- a/third_party/aom/av1/common/obu_util.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#ifndef AOM_AV1_COMMON_OBU_UTIL_H_ -#define AOM_AV1_COMMON_OBU_UTIL_H_ - -#include "aom/aom_codec.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - size_t size; // Size (1 or 2 bytes) of the OBU header (including the - // optional OBU extension header) in the bitstream. - OBU_TYPE type; - int has_size_field; - int has_extension; - // The following fields come from the OBU extension header and therefore are - // only used if has_extension is true. - int temporal_layer_id; - int spatial_layer_id; -} ObuHeader; - -aom_codec_err_t aom_read_obu_header(uint8_t *buffer, size_t buffer_length, - size_t *consumed, ObuHeader *header, - int is_annexb); - -aom_codec_err_t aom_read_obu_header_and_size(const uint8_t *data, - size_t bytes_available, - int is_annexb, - ObuHeader *obu_header, - size_t *const payload_size, - size_t *const bytes_read); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_OBU_UTIL_H_ diff --git a/third_party/aom/av1/common/odintrin.c b/third_party/aom/av1/common/odintrin.c deleted file mode 100644 index 7584b2e52..000000000 --- a/third_party/aom/av1/common/odintrin.c +++ /dev/null @@ -1,541 +0,0 @@ -/* - * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -/* clang-format off */ - -#include "av1/common/odintrin.h" - -/*Constants for use with OD_DIVU_SMALL(). - See \cite{Rob05} for details on computing these constants. - @INPROCEEDINGS{Rob05, - author="Arch D. Robison", - title="{N}-bit Unsigned Division via {N}-bit Multiply-Add", - booktitle="Proc. of the 17th IEEE Symposium on Computer Arithmetic - (ARITH'05)", - pages="131--139", - address="Cape Cod, MA", - month=Jun, - year=2005 - }*/ -uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2] = { - { 0xFFFFFFFF, 0xFFFFFFFF }, { 0xFFFFFFFF, 0xFFFFFFFF }, - { 0xAAAAAAAB, 0 }, { 0xFFFFFFFF, 0xFFFFFFFF }, - { 0xCCCCCCCD, 0 }, { 0xAAAAAAAB, 0 }, - { 0x92492492, 0x92492492 }, { 0xFFFFFFFF, 0xFFFFFFFF }, - { 0xE38E38E4, 0 }, { 0xCCCCCCCD, 0 }, - { 0xBA2E8BA3, 0 }, { 0xAAAAAAAB, 0 }, - { 0x9D89D89E, 0 }, { 0x92492492, 0x92492492 }, - { 0x88888889, 0 }, { 0xFFFFFFFF, 0xFFFFFFFF }, - { 0xF0F0F0F1, 0 }, { 0xE38E38E4, 0 }, - { 0xD79435E5, 0xD79435E5 }, { 0xCCCCCCCD, 0 }, - { 0xC30C30C3, 0xC30C30C3 }, { 0xBA2E8BA3, 0 }, - { 0xB21642C9, 0 }, { 0xAAAAAAAB, 0 }, - { 0xA3D70A3E, 0 }, { 0x9D89D89E, 0 }, - { 0x97B425ED, 0x97B425ED }, { 0x92492492, 0x92492492 }, - { 0x8D3DCB09, 0 }, { 0x88888889, 0 }, - { 0x84210842, 0x84210842 }, { 0xFFFFFFFF, 0xFFFFFFFF }, - { 0xF83E0F84, 0 }, { 0xF0F0F0F1, 0 }, - { 0xEA0EA0EA, 0xEA0EA0EA }, { 0xE38E38E4, 0 }, - { 0xDD67C8A6, 0xDD67C8A6 }, { 0xD79435E5, 0xD79435E5 }, - { 0xD20D20D2, 0xD20D20D2 }, { 0xCCCCCCCD, 0 }, - { 0xC7CE0C7D, 0 }, { 0xC30C30C3, 0xC30C30C3 }, - { 0xBE82FA0C, 0 }, { 0xBA2E8BA3, 0 }, - { 0xB60B60B6, 0xB60B60B6 }, { 0xB21642C9, 0 }, - { 0xAE4C415D, 0 }, { 0xAAAAAAAB, 0 }, - { 0xA72F053A, 0 }, { 0xA3D70A3E, 0 }, - { 0xA0A0A0A1, 0 }, { 0x9D89D89E, 0 }, - { 0x9A90E7D9, 0x9A90E7D9 }, { 0x97B425ED, 0x97B425ED }, - { 0x94F2094F, 0x94F2094F }, { 0x92492492, 0x92492492 }, - { 0x8FB823EE, 0x8FB823EE }, { 0x8D3DCB09, 0 }, - { 0x8AD8F2FC, 0 }, { 0x88888889, 0 }, - { 0x864B8A7E, 0 }, { 0x84210842, 0x84210842 }, - { 0x82082082, 0x82082082 }, { 0xFFFFFFFF, 0xFFFFFFFF }, - { 0xFC0FC0FD, 0 }, { 0xF83E0F84, 0 }, - { 0xF4898D60, 0 }, { 0xF0F0F0F1, 0 }, - { 0xED7303B6, 0 }, { 0xEA0EA0EA, 0xEA0EA0EA }, - { 0xE6C2B449, 0 }, { 0xE38E38E4, 0 }, - { 0xE070381C, 0xE070381C }, { 0xDD67C8A6, 0xDD67C8A6 }, - { 0xDA740DA8, 0 }, { 0xD79435E5, 0xD79435E5 }, - { 0xD4C77B04, 0 }, { 0xD20D20D2, 0xD20D20D2 }, - { 0xCF6474A9, 0 }, { 0xCCCCCCCD, 0 }, - { 0xCA4587E7, 0 }, { 0xC7CE0C7D, 0 }, - { 0xC565C87C, 0 }, { 0xC30C30C3, 0xC30C30C3 }, - { 0xC0C0C0C1, 0 }, { 0xBE82FA0C, 0 }, - { 0xBC52640C, 0 }, { 0xBA2E8BA3, 0 }, - { 0xB81702E1, 0 }, { 0xB60B60B6, 0xB60B60B6 }, - { 0xB40B40B4, 0xB40B40B4 }, { 0xB21642C9, 0 }, - { 0xB02C0B03, 0 }, { 0xAE4C415D, 0 }, - { 0xAC769184, 0xAC769184 }, { 0xAAAAAAAB, 0 }, - { 0xA8E83F57, 0xA8E83F57 }, { 0xA72F053A, 0 }, - { 0xA57EB503, 0 }, { 0xA3D70A3E, 0 }, - { 0xA237C32B, 0xA237C32B }, { 0xA0A0A0A1, 0 }, - { 0x9F1165E7, 0x9F1165E7 }, { 0x9D89D89E, 0 }, - { 0x9C09C09C, 0x9C09C09C }, { 0x9A90E7D9, 0x9A90E7D9 }, - { 0x991F1A51, 0x991F1A51 }, { 0x97B425ED, 0x97B425ED }, - { 0x964FDA6C, 0x964FDA6C }, { 0x94F2094F, 0x94F2094F }, - { 0x939A85C4, 0x939A85C4 }, { 0x92492492, 0x92492492 }, - { 0x90FDBC09, 0x90FDBC09 }, { 0x8FB823EE, 0x8FB823EE }, - { 0x8E78356D, 0x8E78356D }, { 0x8D3DCB09, 0 }, - { 0x8C08C08C, 0x8C08C08C }, { 0x8AD8F2FC, 0 }, - { 0x89AE408A, 0 }, { 0x88888889, 0 }, - { 0x8767AB5F, 0x8767AB5F }, { 0x864B8A7E, 0 }, - { 0x85340853, 0x85340853 }, { 0x84210842, 0x84210842 }, - { 0x83126E98, 0 }, { 0x82082082, 0x82082082 }, - { 0x81020408, 0x81020408 }, { 0xFFFFFFFF, 0xFFFFFFFF }, - { 0xFE03F810, 0 }, { 0xFC0FC0FD, 0 }, - { 0xFA232CF3, 0 }, { 0xF83E0F84, 0 }, - { 0xF6603D99, 0 }, { 0xF4898D60, 0 }, - { 0xF2B9D649, 0 }, { 0xF0F0F0F1, 0 }, - { 0xEF2EB720, 0 }, { 0xED7303B6, 0 }, - { 0xEBBDB2A6, 0 }, { 0xEA0EA0EA, 0xEA0EA0EA }, - { 0xE865AC7C, 0 }, { 0xE6C2B449, 0 }, - { 0xE525982B, 0 }, { 0xE38E38E4, 0 }, - { 0xE1FC780F, 0 }, { 0xE070381C, 0xE070381C }, - { 0xDEE95C4D, 0 }, { 0xDD67C8A6, 0xDD67C8A6 }, - { 0xDBEB61EF, 0 }, { 0xDA740DA8, 0 }, - { 0xD901B204, 0 }, { 0xD79435E5, 0xD79435E5 }, - { 0xD62B80D7, 0 }, { 0xD4C77B04, 0 }, - { 0xD3680D37, 0 }, { 0xD20D20D2, 0xD20D20D2 }, - { 0xD0B69FCC, 0 }, { 0xCF6474A9, 0 }, - { 0xCE168A77, 0xCE168A77 }, { 0xCCCCCCCD, 0 }, - { 0xCB8727C1, 0 }, { 0xCA4587E7, 0 }, - { 0xC907DA4F, 0 }, { 0xC7CE0C7D, 0 }, - { 0xC6980C6A, 0 }, { 0xC565C87C, 0 }, - { 0xC4372F86, 0 }, { 0xC30C30C3, 0xC30C30C3 }, - { 0xC1E4BBD6, 0 }, { 0xC0C0C0C1, 0 }, - { 0xBFA02FE8, 0xBFA02FE8 }, { 0xBE82FA0C, 0 }, - { 0xBD691047, 0xBD691047 }, { 0xBC52640C, 0 }, - { 0xBB3EE722, 0 }, { 0xBA2E8BA3, 0 }, - { 0xB92143FA, 0xB92143FA }, { 0xB81702E1, 0 }, - { 0xB70FBB5A, 0xB70FBB5A }, { 0xB60B60B6, 0xB60B60B6 }, - { 0xB509E68B, 0 }, { 0xB40B40B4, 0xB40B40B4 }, - { 0xB30F6353, 0 }, { 0xB21642C9, 0 }, - { 0xB11FD3B8, 0xB11FD3B8 }, { 0xB02C0B03, 0 }, - { 0xAF3ADDC7, 0 }, { 0xAE4C415D, 0 }, - { 0xAD602B58, 0xAD602B58 }, { 0xAC769184, 0xAC769184 }, - { 0xAB8F69E3, 0 }, { 0xAAAAAAAB, 0 }, - { 0xA9C84A48, 0 }, { 0xA8E83F57, 0xA8E83F57 }, - { 0xA80A80A8, 0xA80A80A8 }, { 0xA72F053A, 0 }, - { 0xA655C439, 0xA655C439 }, { 0xA57EB503, 0 }, - { 0xA4A9CF1E, 0 }, { 0xA3D70A3E, 0 }, - { 0xA3065E40, 0 }, { 0xA237C32B, 0xA237C32B }, - { 0xA16B312F, 0 }, { 0xA0A0A0A1, 0 }, - { 0x9FD809FE, 0 }, { 0x9F1165E7, 0x9F1165E7 }, - { 0x9E4CAD24, 0 }, { 0x9D89D89E, 0 }, - { 0x9CC8E161, 0 }, { 0x9C09C09C, 0x9C09C09C }, - { 0x9B4C6F9F, 0 }, { 0x9A90E7D9, 0x9A90E7D9 }, - { 0x99D722DB, 0 }, { 0x991F1A51, 0x991F1A51 }, - { 0x9868C80A, 0 }, { 0x97B425ED, 0x97B425ED }, - { 0x97012E02, 0x97012E02 }, { 0x964FDA6C, 0x964FDA6C }, - { 0x95A02568, 0x95A02568 }, { 0x94F2094F, 0x94F2094F }, - { 0x94458094, 0x94458094 }, { 0x939A85C4, 0x939A85C4 }, - { 0x92F11384, 0x92F11384 }, { 0x92492492, 0x92492492 }, - { 0x91A2B3C5, 0 }, { 0x90FDBC09, 0x90FDBC09 }, - { 0x905A3863, 0x905A3863 }, { 0x8FB823EE, 0x8FB823EE }, - { 0x8F1779DA, 0 }, { 0x8E78356D, 0x8E78356D }, - { 0x8DDA5202, 0x8DDA5202 }, { 0x8D3DCB09, 0 }, - { 0x8CA29C04, 0x8CA29C04 }, { 0x8C08C08C, 0x8C08C08C }, - { 0x8B70344A, 0x8B70344A }, { 0x8AD8F2FC, 0 }, - { 0x8A42F870, 0x8A42F870 }, { 0x89AE408A, 0 }, - { 0x891AC73B, 0 }, { 0x88888889, 0 }, - { 0x87F78088, 0 }, { 0x8767AB5F, 0x8767AB5F }, - { 0x86D90545, 0 }, { 0x864B8A7E, 0 }, - { 0x85BF3761, 0x85BF3761 }, { 0x85340853, 0x85340853 }, - { 0x84A9F9C8, 0x84A9F9C8 }, { 0x84210842, 0x84210842 }, - { 0x83993052, 0x83993052 }, { 0x83126E98, 0 }, - { 0x828CBFBF, 0 }, { 0x82082082, 0x82082082 }, - { 0x81848DA9, 0 }, { 0x81020408, 0x81020408 }, - { 0x80808081, 0 }, { 0xFFFFFFFF, 0xFFFFFFFF }, - { 0xFF00FF01, 0 }, { 0xFE03F810, 0 }, - { 0xFD08E551, 0 }, { 0xFC0FC0FD, 0 }, - { 0xFB188566, 0 }, { 0xFA232CF3, 0 }, - { 0xF92FB222, 0 }, { 0xF83E0F84, 0 }, - { 0xF74E3FC3, 0 }, { 0xF6603D99, 0 }, - { 0xF57403D6, 0 }, { 0xF4898D60, 0 }, - { 0xF3A0D52D, 0 }, { 0xF2B9D649, 0 }, - { 0xF1D48BCF, 0 }, { 0xF0F0F0F1, 0 }, - { 0xF00F00F0, 0xF00F00F0 }, { 0xEF2EB720, 0 }, - { 0xEE500EE5, 0xEE500EE5 }, { 0xED7303B6, 0 }, - { 0xEC979119, 0 }, { 0xEBBDB2A6, 0 }, - { 0xEAE56404, 0 }, { 0xEA0EA0EA, 0xEA0EA0EA }, - { 0xE9396520, 0 }, { 0xE865AC7C, 0 }, - { 0xE79372E3, 0 }, { 0xE6C2B449, 0 }, - { 0xE5F36CB0, 0xE5F36CB0 }, { 0xE525982B, 0 }, - { 0xE45932D8, 0 }, { 0xE38E38E4, 0 }, - { 0xE2C4A689, 0 }, { 0xE1FC780F, 0 }, - { 0xE135A9CA, 0 }, { 0xE070381C, 0xE070381C }, - { 0xDFAC1F75, 0 }, { 0xDEE95C4D, 0 }, - { 0xDE27EB2D, 0 }, { 0xDD67C8A6, 0xDD67C8A6 }, - { 0xDCA8F159, 0 }, { 0xDBEB61EF, 0 }, - { 0xDB2F171E, 0 }, { 0xDA740DA8, 0 }, - { 0xD9BA4257, 0 }, { 0xD901B204, 0 }, - { 0xD84A598F, 0 }, { 0xD79435E5, 0xD79435E5 }, - { 0xD6DF43FD, 0 }, { 0xD62B80D7, 0 }, - { 0xD578E97D, 0 }, { 0xD4C77B04, 0 }, - { 0xD417328A, 0 }, { 0xD3680D37, 0 }, - { 0xD2BA083C, 0 }, { 0xD20D20D2, 0xD20D20D2 }, - { 0xD161543E, 0xD161543E }, { 0xD0B69FCC, 0 }, - { 0xD00D00D0, 0xD00D00D0 }, { 0xCF6474A9, 0 }, - { 0xCEBCF8BC, 0 }, { 0xCE168A77, 0xCE168A77 }, - { 0xCD712753, 0 }, { 0xCCCCCCCD, 0 }, - { 0xCC29786D, 0 }, { 0xCB8727C1, 0 }, - { 0xCAE5D85F, 0xCAE5D85F }, { 0xCA4587E7, 0 }, - { 0xC9A633FD, 0 }, { 0xC907DA4F, 0 }, - { 0xC86A7890, 0xC86A7890 }, { 0xC7CE0C7D, 0 }, - { 0xC73293D8, 0 }, { 0xC6980C6A, 0 }, - { 0xC5FE7403, 0xC5FE7403 }, { 0xC565C87C, 0 }, - { 0xC4CE07B0, 0xC4CE07B0 }, { 0xC4372F86, 0 }, - { 0xC3A13DE6, 0xC3A13DE6 }, { 0xC30C30C3, 0xC30C30C3 }, - { 0xC2780614, 0 }, { 0xC1E4BBD6, 0 }, - { 0xC152500C, 0xC152500C }, { 0xC0C0C0C1, 0 }, - { 0xC0300C03, 0xC0300C03 }, { 0xBFA02FE8, 0xBFA02FE8 }, - { 0xBF112A8B, 0 }, { 0xBE82FA0C, 0 }, - { 0xBDF59C92, 0 }, { 0xBD691047, 0xBD691047 }, - { 0xBCDD535E, 0 }, { 0xBC52640C, 0 }, - { 0xBBC8408D, 0 }, { 0xBB3EE722, 0 }, - { 0xBAB65610, 0xBAB65610 }, { 0xBA2E8BA3, 0 }, - { 0xB9A7862A, 0xB9A7862A }, { 0xB92143FA, 0xB92143FA }, - { 0xB89BC36D, 0 }, { 0xB81702E1, 0 }, - { 0xB79300B8, 0 }, { 0xB70FBB5A, 0xB70FBB5A }, - { 0xB68D3134, 0xB68D3134 }, { 0xB60B60B6, 0xB60B60B6 }, - { 0xB58A4855, 0xB58A4855 }, { 0xB509E68B, 0 }, - { 0xB48A39D4, 0xB48A39D4 }, { 0xB40B40B4, 0xB40B40B4 }, - { 0xB38CF9B0, 0xB38CF9B0 }, { 0xB30F6353, 0 }, - { 0xB2927C2A, 0 }, { 0xB21642C9, 0 }, - { 0xB19AB5C5, 0 }, { 0xB11FD3B8, 0xB11FD3B8 }, - { 0xB0A59B42, 0 }, { 0xB02C0B03, 0 }, - { 0xAFB321A1, 0xAFB321A1 }, { 0xAF3ADDC7, 0 }, - { 0xAEC33E20, 0 }, { 0xAE4C415D, 0 }, - { 0xADD5E632, 0xADD5E632 }, { 0xAD602B58, 0xAD602B58 }, - { 0xACEB0F89, 0xACEB0F89 }, { 0xAC769184, 0xAC769184 }, - { 0xAC02B00B, 0 }, { 0xAB8F69E3, 0 }, - { 0xAB1CBDD4, 0 }, { 0xAAAAAAAB, 0 }, - { 0xAA392F36, 0 }, { 0xA9C84A48, 0 }, - { 0xA957FAB5, 0xA957FAB5 }, { 0xA8E83F57, 0xA8E83F57 }, - { 0xA8791709, 0 }, { 0xA80A80A8, 0xA80A80A8 }, - { 0xA79C7B17, 0 }, { 0xA72F053A, 0 }, - { 0xA6C21DF7, 0 }, { 0xA655C439, 0xA655C439 }, - { 0xA5E9F6ED, 0xA5E9F6ED }, { 0xA57EB503, 0 }, - { 0xA513FD6C, 0 }, { 0xA4A9CF1E, 0 }, - { 0xA4402910, 0xA4402910 }, { 0xA3D70A3E, 0 }, - { 0xA36E71A3, 0 }, { 0xA3065E40, 0 }, - { 0xA29ECF16, 0xA29ECF16 }, { 0xA237C32B, 0xA237C32B }, - { 0xA1D13986, 0 }, { 0xA16B312F, 0 }, - { 0xA105A933, 0 }, { 0xA0A0A0A1, 0 }, - { 0xA03C1689, 0 }, { 0x9FD809FE, 0 }, - { 0x9F747A15, 0x9F747A15 }, { 0x9F1165E7, 0x9F1165E7 }, - { 0x9EAECC8D, 0x9EAECC8D }, { 0x9E4CAD24, 0 }, - { 0x9DEB06C9, 0x9DEB06C9 }, { 0x9D89D89E, 0 }, - { 0x9D2921C4, 0 }, { 0x9CC8E161, 0 }, - { 0x9C69169B, 0x9C69169B }, { 0x9C09C09C, 0x9C09C09C }, - { 0x9BAADE8E, 0x9BAADE8E }, { 0x9B4C6F9F, 0 }, - { 0x9AEE72FD, 0 }, { 0x9A90E7D9, 0x9A90E7D9 }, - { 0x9A33CD67, 0x9A33CD67 }, { 0x99D722DB, 0 }, - { 0x997AE76B, 0x997AE76B }, { 0x991F1A51, 0x991F1A51 }, - { 0x98C3BAC7, 0x98C3BAC7 }, { 0x9868C80A, 0 }, - { 0x980E4156, 0x980E4156 }, { 0x97B425ED, 0x97B425ED }, - { 0x975A7510, 0 }, { 0x97012E02, 0x97012E02 }, - { 0x96A8500A, 0 }, { 0x964FDA6C, 0x964FDA6C }, - { 0x95F7CC73, 0 }, { 0x95A02568, 0x95A02568 }, - { 0x9548E498, 0 }, { 0x94F2094F, 0x94F2094F }, - { 0x949B92DE, 0 }, { 0x94458094, 0x94458094 }, - { 0x93EFD1C5, 0x93EFD1C5 }, { 0x939A85C4, 0x939A85C4 }, - { 0x93459BE7, 0 }, { 0x92F11384, 0x92F11384 }, - { 0x929CEBF5, 0 }, { 0x92492492, 0x92492492 }, - { 0x91F5BCB9, 0 }, { 0x91A2B3C5, 0 }, - { 0x91500915, 0x91500915 }, { 0x90FDBC09, 0x90FDBC09 }, - { 0x90ABCC02, 0x90ABCC02 }, { 0x905A3863, 0x905A3863 }, - { 0x90090090, 0x90090090 }, { 0x8FB823EE, 0x8FB823EE }, - { 0x8F67A1E4, 0 }, { 0x8F1779DA, 0 }, - { 0x8EC7AB3A, 0 }, { 0x8E78356D, 0x8E78356D }, - { 0x8E2917E1, 0 }, { 0x8DDA5202, 0x8DDA5202 }, - { 0x8D8BE340, 0 }, { 0x8D3DCB09, 0 }, - { 0x8CF008CF, 0x8CF008CF }, { 0x8CA29C04, 0x8CA29C04 }, - { 0x8C55841D, 0 }, { 0x8C08C08C, 0x8C08C08C }, - { 0x8BBC50C9, 0 }, { 0x8B70344A, 0x8B70344A }, - { 0x8B246A88, 0 }, { 0x8AD8F2FC, 0 }, - { 0x8A8DCD20, 0 }, { 0x8A42F870, 0x8A42F870 }, - { 0x89F8746A, 0 }, { 0x89AE408A, 0 }, - { 0x89645C4F, 0x89645C4F }, { 0x891AC73B, 0 }, - { 0x88D180CD, 0x88D180CD }, { 0x88888889, 0 }, - { 0x883FDDF0, 0x883FDDF0 }, { 0x87F78088, 0 }, - { 0x87AF6FD6, 0 }, { 0x8767AB5F, 0x8767AB5F }, - { 0x872032AC, 0x872032AC }, { 0x86D90545, 0 }, - { 0x869222B2, 0 }, { 0x864B8A7E, 0 }, - { 0x86053C34, 0x86053C34 }, { 0x85BF3761, 0x85BF3761 }, - { 0x85797B91, 0x85797B91 }, { 0x85340853, 0x85340853 }, - { 0x84EEDD36, 0 }, { 0x84A9F9C8, 0x84A9F9C8 }, - { 0x84655D9C, 0 }, { 0x84210842, 0x84210842 }, - { 0x83DCF94E, 0 }, { 0x83993052, 0x83993052 }, - { 0x8355ACE4, 0 }, { 0x83126E98, 0 }, - { 0x82CF7504, 0 }, { 0x828CBFBF, 0 }, - { 0x824A4E61, 0 }, { 0x82082082, 0x82082082 }, - { 0x81C635BC, 0x81C635BC }, { 0x81848DA9, 0 }, - { 0x814327E4, 0 }, { 0x81020408, 0x81020408 }, - { 0x80C121B3, 0 }, { 0x80808081, 0 }, - { 0x80402010, 0x80402010 }, { 0xFFFFFFFF, 0xFFFFFFFF }, - { 0xFF803FE1, 0 }, { 0xFF00FF01, 0 }, - { 0xFE823CA6, 0 }, { 0xFE03F810, 0 }, - { 0xFD863087, 0 }, { 0xFD08E551, 0 }, - { 0xFC8C15B5, 0 }, { 0xFC0FC0FD, 0 }, - { 0xFB93E673, 0 }, { 0xFB188566, 0 }, - { 0xFA9D9D20, 0 }, { 0xFA232CF3, 0 }, - { 0xF9A9342D, 0 }, { 0xF92FB222, 0 }, - { 0xF8B6A622, 0xF8B6A622 }, { 0xF83E0F84, 0 }, - { 0xF7C5ED9D, 0 }, { 0xF74E3FC3, 0 }, - { 0xF6D7054E, 0 }, { 0xF6603D99, 0 }, - { 0xF5E9E7FD, 0 }, { 0xF57403D6, 0 }, - { 0xF4FE9083, 0 }, { 0xF4898D60, 0 }, - { 0xF414F9CE, 0 }, { 0xF3A0D52D, 0 }, - { 0xF32D1EE0, 0 }, { 0xF2B9D649, 0 }, - { 0xF246FACC, 0 }, { 0xF1D48BCF, 0 }, - { 0xF16288B9, 0 }, { 0xF0F0F0F1, 0 }, - { 0xF07FC3E0, 0xF07FC3E0 }, { 0xF00F00F0, 0xF00F00F0 }, - { 0xEF9EA78C, 0 }, { 0xEF2EB720, 0 }, - { 0xEEBF2F19, 0 }, { 0xEE500EE5, 0xEE500EE5 }, - { 0xEDE155F4, 0 }, { 0xED7303B6, 0 }, - { 0xED05179C, 0xED05179C }, { 0xEC979119, 0 }, - { 0xEC2A6FA0, 0xEC2A6FA0 }, { 0xEBBDB2A6, 0 }, - { 0xEB5159A0, 0 }, { 0xEAE56404, 0 }, - { 0xEA79D14A, 0 }, { 0xEA0EA0EA, 0xEA0EA0EA }, - { 0xE9A3D25E, 0xE9A3D25E }, { 0xE9396520, 0 }, - { 0xE8CF58AB, 0 }, { 0xE865AC7C, 0 }, - { 0xE7FC600F, 0 }, { 0xE79372E3, 0 }, - { 0xE72AE476, 0 }, { 0xE6C2B449, 0 }, - { 0xE65AE1DC, 0 }, { 0xE5F36CB0, 0xE5F36CB0 }, - { 0xE58C544A, 0 }, { 0xE525982B, 0 }, - { 0xE4BF37D9, 0 }, { 0xE45932D8, 0 }, - { 0xE3F388AF, 0 }, { 0xE38E38E4, 0 }, - { 0xE32942FF, 0 }, { 0xE2C4A689, 0 }, - { 0xE260630B, 0 }, { 0xE1FC780F, 0 }, - { 0xE198E520, 0 }, { 0xE135A9CA, 0 }, - { 0xE0D2C59A, 0 }, { 0xE070381C, 0xE070381C }, - { 0xE00E00E0, 0xE00E00E0 }, { 0xDFAC1F75, 0 }, - { 0xDF4A9369, 0 }, { 0xDEE95C4D, 0 }, - { 0xDE8879B3, 0 }, { 0xDE27EB2D, 0 }, - { 0xDDC7B04D, 0 }, { 0xDD67C8A6, 0xDD67C8A6 }, - { 0xDD0833CE, 0 }, { 0xDCA8F159, 0 }, - { 0xDC4A00DD, 0 }, { 0xDBEB61EF, 0 }, - { 0xDB8D1428, 0 }, { 0xDB2F171E, 0 }, - { 0xDAD16A6B, 0 }, { 0xDA740DA8, 0 }, - { 0xDA17006D, 0xDA17006D }, { 0xD9BA4257, 0 }, - { 0xD95DD300, 0 }, { 0xD901B204, 0 }, - { 0xD8A5DEFF, 0 }, { 0xD84A598F, 0 }, - { 0xD7EF2152, 0 }, { 0xD79435E5, 0xD79435E5 }, - { 0xD73996E9, 0 }, { 0xD6DF43FD, 0 }, - { 0xD6853CC1, 0 }, { 0xD62B80D7, 0 }, - { 0xD5D20FDF, 0 }, { 0xD578E97D, 0 }, - { 0xD5200D52, 0xD5200D52 }, { 0xD4C77B04, 0 }, - { 0xD46F3235, 0 }, { 0xD417328A, 0 }, - { 0xD3BF7BA9, 0 }, { 0xD3680D37, 0 }, - { 0xD310E6DB, 0 }, { 0xD2BA083C, 0 }, - { 0xD2637101, 0 }, { 0xD20D20D2, 0xD20D20D2 }, - { 0xD1B71759, 0 }, { 0xD161543E, 0xD161543E }, - { 0xD10BD72C, 0 }, { 0xD0B69FCC, 0 }, - { 0xD061ADCA, 0 }, { 0xD00D00D0, 0xD00D00D0 }, - { 0xCFB8988C, 0 }, { 0xCF6474A9, 0 }, - { 0xCF1094D4, 0 }, { 0xCEBCF8BC, 0 }, - { 0xCE69A00D, 0 }, { 0xCE168A77, 0xCE168A77 }, - { 0xCDC3B7A9, 0xCDC3B7A9 }, { 0xCD712753, 0 }, - { 0xCD1ED924, 0 }, { 0xCCCCCCCD, 0 }, - { 0xCC7B0200, 0 }, { 0xCC29786D, 0 }, - { 0xCBD82FC7, 0 }, { 0xCB8727C1, 0 }, - { 0xCB36600D, 0 }, { 0xCAE5D85F, 0xCAE5D85F }, - { 0xCA95906C, 0 }, { 0xCA4587E7, 0 }, - { 0xC9F5BE86, 0 }, { 0xC9A633FD, 0 }, - { 0xC956E803, 0xC956E803 }, { 0xC907DA4F, 0 }, - { 0xC8B90A96, 0 }, { 0xC86A7890, 0xC86A7890 }, - { 0xC81C23F5, 0xC81C23F5 }, { 0xC7CE0C7D, 0 }, - { 0xC78031E0, 0xC78031E0 }, { 0xC73293D8, 0 }, - { 0xC6E5321D, 0 }, { 0xC6980C6A, 0 }, - { 0xC64B2278, 0xC64B2278 }, { 0xC5FE7403, 0xC5FE7403 }, - { 0xC5B200C6, 0 }, { 0xC565C87C, 0 }, - { 0xC519CAE0, 0xC519CAE0 }, { 0xC4CE07B0, 0xC4CE07B0 }, - { 0xC4827EA8, 0xC4827EA8 }, { 0xC4372F86, 0 }, - { 0xC3EC1A06, 0 }, { 0xC3A13DE6, 0xC3A13DE6 }, - { 0xC3569AE6, 0 }, { 0xC30C30C3, 0xC30C30C3 }, - { 0xC2C1FF3E, 0 }, { 0xC2780614, 0 }, - { 0xC22E4507, 0 }, { 0xC1E4BBD6, 0 }, - { 0xC19B6A42, 0 }, { 0xC152500C, 0xC152500C }, - { 0xC1096CF6, 0 }, { 0xC0C0C0C1, 0 }, - { 0xC0784B2F, 0 }, { 0xC0300C03, 0xC0300C03 }, - { 0xBFE80300, 0 }, { 0xBFA02FE8, 0xBFA02FE8 }, - { 0xBF589280, 0 }, { 0xBF112A8B, 0 }, - { 0xBEC9F7CE, 0 }, { 0xBE82FA0C, 0 }, - { 0xBE3C310C, 0 }, { 0xBDF59C92, 0 }, - { 0xBDAF3C64, 0 }, { 0xBD691047, 0xBD691047 }, - { 0xBD231803, 0 }, { 0xBCDD535E, 0 }, - { 0xBC97C21E, 0xBC97C21E }, { 0xBC52640C, 0 }, - { 0xBC0D38EE, 0xBC0D38EE }, { 0xBBC8408D, 0 }, - { 0xBB837AB1, 0 }, { 0xBB3EE722, 0 }, - { 0xBAFA85A9, 0xBAFA85A9 }, { 0xBAB65610, 0xBAB65610 }, - { 0xBA725820, 0xBA725820 }, { 0xBA2E8BA3, 0 }, - { 0xB9EAF063, 0 }, { 0xB9A7862A, 0xB9A7862A }, - { 0xB9644CC4, 0 }, { 0xB92143FA, 0xB92143FA }, - { 0xB8DE6B9A, 0 }, { 0xB89BC36D, 0 }, - { 0xB8594B41, 0 }, { 0xB81702E1, 0 }, - { 0xB7D4EA19, 0xB7D4EA19 }, { 0xB79300B8, 0 }, - { 0xB7514689, 0 }, { 0xB70FBB5A, 0xB70FBB5A }, - { 0xB6CE5EF9, 0xB6CE5EF9 }, { 0xB68D3134, 0xB68D3134 }, - { 0xB64C31D9, 0 }, { 0xB60B60B6, 0xB60B60B6 }, - { 0xB5CABD9B, 0 }, { 0xB58A4855, 0xB58A4855 }, - { 0xB54A00B5, 0xB54A00B5 }, { 0xB509E68B, 0 }, - { 0xB4C9F9A5, 0 }, { 0xB48A39D4, 0xB48A39D4 }, - { 0xB44AA6E9, 0xB44AA6E9 }, { 0xB40B40B4, 0xB40B40B4 }, - { 0xB3CC0706, 0 }, { 0xB38CF9B0, 0xB38CF9B0 }, - { 0xB34E1884, 0 }, { 0xB30F6353, 0 }, - { 0xB2D0D9EF, 0 }, { 0xB2927C2A, 0 }, - { 0xB25449D7, 0 }, { 0xB21642C9, 0 }, - { 0xB1D866D1, 0xB1D866D1 }, { 0xB19AB5C5, 0 }, - { 0xB15D2F76, 0 }, { 0xB11FD3B8, 0xB11FD3B8 }, - { 0xB0E2A260, 0xB0E2A260 }, { 0xB0A59B42, 0 }, - { 0xB068BE31, 0 }, { 0xB02C0B03, 0 }, - { 0xAFEF818C, 0 }, { 0xAFB321A1, 0xAFB321A1 }, - { 0xAF76EB19, 0 }, { 0xAF3ADDC7, 0 }, - { 0xAEFEF982, 0 }, { 0xAEC33E20, 0 }, - { 0xAE87AB76, 0xAE87AB76 }, { 0xAE4C415D, 0 }, - { 0xAE10FFA9, 0 }, { 0xADD5E632, 0xADD5E632 }, - { 0xAD9AF4D0, 0 }, { 0xAD602B58, 0xAD602B58 }, - { 0xAD2589A4, 0 }, { 0xACEB0F89, 0xACEB0F89 }, - { 0xACB0BCE1, 0xACB0BCE1 }, { 0xAC769184, 0xAC769184 }, - { 0xAC3C8D4A, 0 }, { 0xAC02B00B, 0 }, - { 0xABC8F9A0, 0xABC8F9A0 }, { 0xAB8F69E3, 0 }, - { 0xAB5600AC, 0 }, { 0xAB1CBDD4, 0 }, - { 0xAAE3A136, 0 }, { 0xAAAAAAAB, 0 }, - { 0xAA71DA0D, 0 }, { 0xAA392F36, 0 }, - { 0xAA00AA01, 0 }, { 0xA9C84A48, 0 }, - { 0xA9900FE6, 0 }, { 0xA957FAB5, 0xA957FAB5 }, - { 0xA9200A92, 0xA9200A92 }, { 0xA8E83F57, 0xA8E83F57 }, - { 0xA8B098E0, 0xA8B098E0 }, { 0xA8791709, 0 }, - { 0xA841B9AD, 0 }, { 0xA80A80A8, 0xA80A80A8 }, - { 0xA7D36BD8, 0 }, { 0xA79C7B17, 0 }, - { 0xA765AE44, 0 }, { 0xA72F053A, 0 }, - { 0xA6F87FD6, 0xA6F87FD6 }, { 0xA6C21DF7, 0 }, - { 0xA68BDF79, 0 }, { 0xA655C439, 0xA655C439 }, - { 0xA61FCC16, 0xA61FCC16 }, { 0xA5E9F6ED, 0xA5E9F6ED }, - { 0xA5B4449D, 0 }, { 0xA57EB503, 0 }, - { 0xA54947FE, 0 }, { 0xA513FD6C, 0 }, - { 0xA4DED52C, 0xA4DED52C }, { 0xA4A9CF1E, 0 }, - { 0xA474EB1F, 0xA474EB1F }, { 0xA4402910, 0xA4402910 }, - { 0xA40B88D0, 0 }, { 0xA3D70A3E, 0 }, - { 0xA3A2AD39, 0xA3A2AD39 }, { 0xA36E71A3, 0 }, - { 0xA33A575A, 0xA33A575A }, { 0xA3065E40, 0 }, - { 0xA2D28634, 0 }, { 0xA29ECF16, 0xA29ECF16 }, - { 0xA26B38C9, 0 }, { 0xA237C32B, 0xA237C32B }, - { 0xA2046E1F, 0xA2046E1F }, { 0xA1D13986, 0 }, - { 0xA19E2540, 0 }, { 0xA16B312F, 0 }, - { 0xA1385D35, 0 }, { 0xA105A933, 0 }, - { 0xA0D3150C, 0 }, { 0xA0A0A0A1, 0 }, - { 0xA06E4BD4, 0xA06E4BD4 }, { 0xA03C1689, 0 }, - { 0xA00A00A0, 0xA00A00A0 }, { 0x9FD809FE, 0 }, - { 0x9FA63284, 0 }, { 0x9F747A15, 0x9F747A15 }, - { 0x9F42E095, 0x9F42E095 }, { 0x9F1165E7, 0x9F1165E7 }, - { 0x9EE009EE, 0x9EE009EE }, { 0x9EAECC8D, 0x9EAECC8D }, - { 0x9E7DADA9, 0 }, { 0x9E4CAD24, 0 }, - { 0x9E1BCAE3, 0 }, { 0x9DEB06C9, 0x9DEB06C9 }, - { 0x9DBA60BB, 0x9DBA60BB }, { 0x9D89D89E, 0 }, - { 0x9D596E54, 0x9D596E54 }, { 0x9D2921C4, 0 }, - { 0x9CF8F2D1, 0x9CF8F2D1 }, { 0x9CC8E161, 0 }, - { 0x9C98ED58, 0 }, { 0x9C69169B, 0x9C69169B }, - { 0x9C395D10, 0x9C395D10 }, { 0x9C09C09C, 0x9C09C09C }, - { 0x9BDA4124, 0x9BDA4124 }, { 0x9BAADE8E, 0x9BAADE8E }, - { 0x9B7B98C0, 0 }, { 0x9B4C6F9F, 0 }, - { 0x9B1D6311, 0x9B1D6311 }, { 0x9AEE72FD, 0 }, - { 0x9ABF9F48, 0x9ABF9F48 }, { 0x9A90E7D9, 0x9A90E7D9 }, - { 0x9A624C97, 0 }, { 0x9A33CD67, 0x9A33CD67 }, - { 0x9A056A31, 0 }, { 0x99D722DB, 0 }, - { 0x99A8F74C, 0 }, { 0x997AE76B, 0x997AE76B }, - { 0x994CF320, 0x994CF320 }, { 0x991F1A51, 0x991F1A51 }, - { 0x98F15CE7, 0 }, { 0x98C3BAC7, 0x98C3BAC7 }, - { 0x989633DB, 0x989633DB }, { 0x9868C80A, 0 }, - { 0x983B773B, 0 }, { 0x980E4156, 0x980E4156 }, - { 0x97E12644, 0x97E12644 }, { 0x97B425ED, 0x97B425ED }, - { 0x97874039, 0 }, { 0x975A7510, 0 }, - { 0x972DC45B, 0 }, { 0x97012E02, 0x97012E02 }, - { 0x96D4B1EF, 0 }, { 0x96A8500A, 0 }, - { 0x967C083B, 0 }, { 0x964FDA6C, 0x964FDA6C }, - { 0x9623C686, 0x9623C686 }, { 0x95F7CC73, 0 }, - { 0x95CBEC1B, 0 }, { 0x95A02568, 0x95A02568 }, - { 0x95747844, 0 }, { 0x9548E498, 0 }, - { 0x951D6A4E, 0 }, { 0x94F2094F, 0x94F2094F }, - { 0x94C6C187, 0 }, { 0x949B92DE, 0 }, - { 0x94707D3F, 0 }, { 0x94458094, 0x94458094 }, - { 0x941A9CC8, 0x941A9CC8 }, { 0x93EFD1C5, 0x93EFD1C5 }, - { 0x93C51F76, 0 }, { 0x939A85C4, 0x939A85C4 }, - { 0x9370049C, 0 }, { 0x93459BE7, 0 }, - { 0x931B4B91, 0 }, { 0x92F11384, 0x92F11384 }, - { 0x92C6F3AC, 0x92C6F3AC }, { 0x929CEBF5, 0 }, - { 0x9272FC48, 0x9272FC48 }, { 0x92492492, 0x92492492 }, - { 0x921F64BF, 0 }, { 0x91F5BCB9, 0 }, - { 0x91CC2C6C, 0x91CC2C6C }, { 0x91A2B3C5, 0 }, - { 0x917952AF, 0 }, { 0x91500915, 0x91500915 }, - { 0x9126D6E5, 0 }, { 0x90FDBC09, 0x90FDBC09 }, - { 0x90D4B86F, 0 }, { 0x90ABCC02, 0x90ABCC02 }, - { 0x9082F6B0, 0 }, { 0x905A3863, 0x905A3863 }, - { 0x9031910A, 0 }, { 0x90090090, 0x90090090 }, - { 0x8FE086E3, 0 }, { 0x8FB823EE, 0x8FB823EE }, - { 0x8F8FD7A0, 0 }, { 0x8F67A1E4, 0 }, - { 0x8F3F82A8, 0x8F3F82A8 }, { 0x8F1779DA, 0 }, - { 0x8EEF8766, 0 }, { 0x8EC7AB3A, 0 }, - { 0x8E9FE542, 0x8E9FE542 }, { 0x8E78356D, 0x8E78356D }, - { 0x8E509BA8, 0x8E509BA8 }, { 0x8E2917E1, 0 }, - { 0x8E01AA05, 0 }, { 0x8DDA5202, 0x8DDA5202 }, - { 0x8DB30FC6, 0x8DB30FC6 }, { 0x8D8BE340, 0 }, - { 0x8D64CC5C, 0 }, { 0x8D3DCB09, 0 }, - { 0x8D16DF35, 0x8D16DF35 }, { 0x8CF008CF, 0x8CF008CF }, - { 0x8CC947C5, 0 }, { 0x8CA29C04, 0x8CA29C04 }, - { 0x8C7C057D, 0 }, { 0x8C55841D, 0 }, - { 0x8C2F17D2, 0x8C2F17D2 }, { 0x8C08C08C, 0x8C08C08C }, - { 0x8BE27E39, 0x8BE27E39 }, { 0x8BBC50C9, 0 }, - { 0x8B963829, 0x8B963829 }, { 0x8B70344A, 0x8B70344A }, - { 0x8B4A451A, 0 }, { 0x8B246A88, 0 }, - { 0x8AFEA483, 0x8AFEA483 }, { 0x8AD8F2FC, 0 }, - { 0x8AB355E0, 0x8AB355E0 }, { 0x8A8DCD20, 0 }, - { 0x8A6858AB, 0 }, { 0x8A42F870, 0x8A42F870 }, - { 0x8A1DAC60, 0x8A1DAC60 }, { 0x89F8746A, 0 }, - { 0x89D3507D, 0 }, { 0x89AE408A, 0 }, - { 0x89894480, 0 }, { 0x89645C4F, 0x89645C4F }, - { 0x893F87E8, 0x893F87E8 }, { 0x891AC73B, 0 }, - { 0x88F61A37, 0x88F61A37 }, { 0x88D180CD, 0x88D180CD }, - { 0x88ACFAEE, 0 }, { 0x88888889, 0 }, - { 0x8864298F, 0 }, { 0x883FDDF0, 0x883FDDF0 }, - { 0x881BA59E, 0 }, { 0x87F78088, 0 }, - { 0x87D36EA0, 0 }, { 0x87AF6FD6, 0 }, - { 0x878B841B, 0 }, { 0x8767AB5F, 0x8767AB5F }, - { 0x8743E595, 0 }, { 0x872032AC, 0x872032AC }, - { 0x86FC9296, 0x86FC9296 }, { 0x86D90545, 0 }, - { 0x86B58AA8, 0 }, { 0x869222B2, 0 }, - { 0x866ECD53, 0x866ECD53 }, { 0x864B8A7E, 0 }, - { 0x86285A23, 0x86285A23 }, { 0x86053C34, 0x86053C34 }, - { 0x85E230A3, 0x85E230A3 }, { 0x85BF3761, 0x85BF3761 }, - { 0x859C5060, 0x859C5060 }, { 0x85797B91, 0x85797B91 }, - { 0x8556B8E7, 0x8556B8E7 }, { 0x85340853, 0x85340853 }, - { 0x851169C7, 0x851169C7 }, { 0x84EEDD36, 0 }, - { 0x84CC6290, 0 }, { 0x84A9F9C8, 0x84A9F9C8 }, - { 0x8487A2D1, 0 }, { 0x84655D9C, 0 }, - { 0x84432A1B, 0x84432A1B }, { 0x84210842, 0x84210842 }, - { 0x83FEF802, 0x83FEF802 }, { 0x83DCF94E, 0 }, - { 0x83BB0C18, 0 }, { 0x83993052, 0x83993052 }, - { 0x837765F0, 0x837765F0 }, { 0x8355ACE4, 0 }, - { 0x83340520, 0x83340520 }, { 0x83126E98, 0 }, - { 0x82F0E93D, 0x82F0E93D }, { 0x82CF7504, 0 }, - { 0x82AE11DE, 0 }, { 0x828CBFBF, 0 }, - { 0x826B7E99, 0x826B7E99 }, { 0x824A4E61, 0 }, - { 0x82292F08, 0 }, { 0x82082082, 0x82082082 }, - { 0x81E722C2, 0x81E722C2 }, { 0x81C635BC, 0x81C635BC }, - { 0x81A55963, 0 }, { 0x81848DA9, 0 }, - { 0x8163D283, 0 }, { 0x814327E4, 0 }, - { 0x81228DBF, 0 }, { 0x81020408, 0x81020408 }, - { 0x80E18AB3, 0 }, { 0x80C121B3, 0 }, - { 0x80A0C8FB, 0x80A0C8FB }, { 0x80808081, 0 }, - { 0x80604836, 0x80604836 }, { 0x80402010, 0x80402010 }, - { 0x80200802, 0x80200802 }, { 0xFFFFFFFF, 0xFFFFFFFF } -}; diff --git a/third_party/aom/av1/common/odintrin.h b/third_party/aom/av1/common/odintrin.h deleted file mode 100644 index e1db0f44d..000000000 --- a/third_party/aom/av1/common/odintrin.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -/* clang-format off */ - -#ifndef AOM_AV1_COMMON_ODINTRIN_H_ -#define AOM_AV1_COMMON_ODINTRIN_H_ - -#include <stdlib.h> -#include <string.h> - -#include "aom/aom_integer.h" -#include "aom_dsp/aom_dsp_common.h" -#include "aom_ports/bitops.h" -#include "av1/common/enums.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef int od_coeff; - -#define OD_DIVU_DMAX (1024) - -extern uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2]; - -/*Translate unsigned division by small divisors into multiplications.*/ -#define OD_DIVU_SMALL(_x, _d) \ - ((uint32_t)((OD_DIVU_SMALL_CONSTS[(_d)-1][0] * (uint64_t)(_x) + \ - OD_DIVU_SMALL_CONSTS[(_d)-1][1]) >> \ - 32) >> \ - (OD_ILOG_NZ(_d) - 1)) - -#define OD_DIVU(_x, _d) \ - (((_d) < OD_DIVU_DMAX) ? (OD_DIVU_SMALL((_x), (_d))) : ((_x) / (_d))) - -#define OD_MINI AOMMIN -#define OD_MAXI AOMMAX -#define OD_CLAMPI(min, val, max) (OD_MAXI(min, OD_MINI(val, max))) - -/*Integer logarithm (base 2) of a nonzero unsigned 32-bit integer. - OD_ILOG_NZ(x) = (int)floor(log2(x)) + 1.*/ -#define OD_ILOG_NZ(x) (1 + get_msb(x)) - -/*Enable special features for gcc and compatible compilers.*/ -#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) -#define OD_GNUC_PREREQ(maj, min, pat) \ - ((__GNUC__ << 16) + (__GNUC_MINOR__ << 8) + __GNUC_PATCHLEVEL__ >= \ - ((maj) << 16) + ((min) << 8) + pat) // NOLINT -#else -#define OD_GNUC_PREREQ(maj, min, pat) (0) -#endif - -#if OD_GNUC_PREREQ(3, 4, 0) -#define OD_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -#else -#define OD_WARN_UNUSED_RESULT -#endif - -#if OD_GNUC_PREREQ(3, 4, 0) -#define OD_ARG_NONNULL(x) __attribute__((__nonnull__(x))) -#else -#define OD_ARG_NONNULL(x) -#endif - -/** Copy n elements of memory from src to dst. The 0* term provides - compile-time type checking */ -#if !defined(OVERRIDE_OD_COPY) -#define OD_COPY(dst, src, n) \ - (memcpy((dst), (src), sizeof(*(dst)) * (n) + 0 * ((dst) - (src)))) -#endif - -/** Copy n elements of memory from src to dst, allowing overlapping regions. - The 0* term provides compile-time type checking */ -#if !defined(OVERRIDE_OD_MOVE) -# define OD_MOVE(dst, src, n) \ - (memmove((dst), (src), sizeof(*(dst))*(n) + 0*((dst) - (src)) )) -#endif - -/*All of these macros should expect floats as arguments.*/ -# define OD_SIGNMASK(a) (-((a) < 0)) -# define OD_FLIPSIGNI(a, b) (((a) + OD_SIGNMASK(b)) ^ OD_SIGNMASK(b)) - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_ODINTRIN_H_ diff --git a/third_party/aom/av1/common/onyxc_int.h b/third_party/aom/av1/common/onyxc_int.h deleted file mode 100644 index ff011c89e..000000000 --- a/third_party/aom/av1/common/onyxc_int.h +++ /dev/null @@ -1,1342 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_ONYXC_INT_H_ -#define AOM_AV1_COMMON_ONYXC_INT_H_ - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" - -#include "aom/internal/aom_codec_internal.h" -#include "aom_util/aom_thread.h" -#include "av1/common/alloccommon.h" -#include "av1/common/av1_loopfilter.h" -#include "av1/common/entropy.h" -#include "av1/common/entropymode.h" -#include "av1/common/entropymv.h" -#include "av1/common/enums.h" -#include "av1/common/frame_buffers.h" -#include "av1/common/mv.h" -#include "av1/common/quant_common.h" -#include "av1/common/restoration.h" -#include "av1/common/tile_common.h" -#include "av1/common/timing.h" -#include "av1/common/odintrin.h" -#include "av1/encoder/hash_motion.h" -#include "aom_dsp/grain_synthesis.h" -#include "aom_dsp/grain_table.h" -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(__clang__) && defined(__has_warning) -#if __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough") -#define AOM_FALLTHROUGH_INTENDED [[clang::fallthrough]] // NOLINT -#endif -#elif defined(__GNUC__) && __GNUC__ >= 7 -#define AOM_FALLTHROUGH_INTENDED __attribute__((fallthrough)) // NOLINT -#endif - -#ifndef AOM_FALLTHROUGH_INTENDED -#define AOM_FALLTHROUGH_INTENDED \ - do { \ - } while (0) -#endif - -#define CDEF_MAX_STRENGTHS 16 - -/* Constant values while waiting for the sequence header */ -#define FRAME_ID_LENGTH 15 -#define DELTA_FRAME_ID_LENGTH 14 - -#define FRAME_CONTEXTS (FRAME_BUFFERS + 1) -// Extra frame context which is always kept at default values -#define FRAME_CONTEXT_DEFAULTS (FRAME_CONTEXTS - 1) -#define PRIMARY_REF_BITS 3 -#define PRIMARY_REF_NONE 7 - -#define NUM_PING_PONG_BUFFERS 2 - -#define MAX_NUM_TEMPORAL_LAYERS 8 -#define MAX_NUM_SPATIAL_LAYERS 4 -/* clang-format off */ -// clang-format seems to think this is a pointer dereference and not a -// multiplication. -#define MAX_NUM_OPERATING_POINTS \ - MAX_NUM_TEMPORAL_LAYERS * MAX_NUM_SPATIAL_LAYERS -/* clang-format on*/ - -// TODO(jingning): Turning this on to set up transform coefficient -// processing timer. -#define TXCOEFF_TIMER 0 -#define TXCOEFF_COST_TIMER 0 - -typedef enum { - SINGLE_REFERENCE = 0, - COMPOUND_REFERENCE = 1, - REFERENCE_MODE_SELECT = 2, - REFERENCE_MODES = 3, -} REFERENCE_MODE; - -typedef enum { - /** - * Frame context updates are disabled - */ - REFRESH_FRAME_CONTEXT_DISABLED, - /** - * Update frame context to values resulting from backward probability - * updates based on entropy/counts in the decoded frame - */ - REFRESH_FRAME_CONTEXT_BACKWARD, -} REFRESH_FRAME_CONTEXT_MODE; - -#define MFMV_STACK_SIZE 3 -typedef struct { - int_mv mfmv0; - uint8_t ref_frame_offset; -} TPL_MV_REF; - -typedef struct { - int_mv mv; - MV_REFERENCE_FRAME ref_frame; -} MV_REF; - -typedef struct { - int ref_count; - - unsigned int cur_frame_offset; - unsigned int ref_frame_offset[INTER_REFS_PER_FRAME]; - - MV_REF *mvs; - uint8_t *seg_map; - struct segmentation seg; - int mi_rows; - int mi_cols; - // Width and height give the size of the buffer (before any upscaling, unlike - // the sizes that can be derived from the buf structure) - int width; - int height; - WarpedMotionParams global_motion[REF_FRAMES]; - int showable_frame; // frame can be used as show existing frame in future - int film_grain_params_present; - aom_film_grain_t film_grain_params; - aom_codec_frame_buffer_t raw_frame_buffer; - YV12_BUFFER_CONFIG buf; - hash_table hash_table; - uint8_t intra_only; - FRAME_TYPE frame_type; - // The Following variables will only be used in frame parallel decode. - - // frame_worker_owner indicates which FrameWorker owns this buffer. NULL means - // that no FrameWorker owns, or is decoding, this buffer. - AVxWorker *frame_worker_owner; - - // row and col indicate which position frame has been decoded to in real - // pixel unit. They are reset to -1 when decoding begins and set to INT_MAX - // when the frame is fully decoded. - int row; - int col; - - // Inter frame reference frame delta for loop filter - int8_t ref_deltas[REF_FRAMES]; - - // 0 = ZERO_MV, MV - int8_t mode_deltas[MAX_MODE_LF_DELTAS]; -} RefCntBuffer; - -typedef struct BufferPool { -// Protect BufferPool from being accessed by several FrameWorkers at -// the same time during frame parallel decode. -// TODO(hkuang): Try to use atomic variable instead of locking the whole pool. -#if CONFIG_MULTITHREAD - pthread_mutex_t pool_mutex; -#endif - - // Private data associated with the frame buffer callbacks. - void *cb_priv; - - aom_get_frame_buffer_cb_fn_t get_fb_cb; - aom_release_frame_buffer_cb_fn_t release_fb_cb; - - RefCntBuffer frame_bufs[FRAME_BUFFERS]; - - // Frame buffers allocated internally by the codec. - InternalFrameBufferList int_frame_buffers; -} BufferPool; - -typedef struct { - int base_ctx_table[2 /*row*/][2 /*col*/][3 /*sig_map*/] - [BASE_CONTEXT_POSITION_NUM + 1]; -} LV_MAP_CTX_TABLE; -typedef int BASE_CTX_TABLE[2 /*col*/][3 /*sig_map*/] - [BASE_CONTEXT_POSITION_NUM + 1]; - -typedef struct BitstreamLevel { - uint8_t major; - uint8_t minor; -} BitstreamLevel; - -// Sequence header structure. -// Note: All syntax elements of sequence_header_obu that need to be -// bit-identical across multiple sequence headers must be part of this struct, -// so that consistency is checked by are_seq_headers_consistent() function. -typedef struct SequenceHeader { - int num_bits_width; - int num_bits_height; - int max_frame_width; - int max_frame_height; - int frame_id_numbers_present_flag; - int frame_id_length; - int delta_frame_id_length; - BLOCK_SIZE sb_size; // Size of the superblock used for this frame - int mib_size; // Size of the superblock in units of MI blocks - int mib_size_log2; // Log 2 of above. - int order_hint_bits_minus_1; - int force_screen_content_tools; // 0 - force off - // 1 - force on - // 2 - adaptive - int force_integer_mv; // 0 - Not to force. MV can be in 1/4 or 1/8 - // 1 - force to integer - // 2 - adaptive - int still_picture; // Video is a single frame still picture - int reduced_still_picture_hdr; // Use reduced header for still picture - int enable_filter_intra; // enables/disables filterintra - int enable_intra_edge_filter; // enables/disables corner/edge/upsampling - int enable_interintra_compound; // enables/disables interintra_compound - int enable_masked_compound; // enables/disables masked compound - int enable_dual_filter; // 0 - disable dual interpolation filter - // 1 - enable vert/horiz filter selection - int enable_order_hint; // 0 - disable order hint, and related tools - // jnt_comp, ref_frame_mvs, frame_sign_bias - // if 0, enable_jnt_comp and - // enable_ref_frame_mvs must be set zs 0. - int enable_jnt_comp; // 0 - disable joint compound modes - // 1 - enable it - int enable_ref_frame_mvs; // 0 - disable ref frame mvs - // 1 - enable it - int enable_warped_motion; // 0 - disable warped motion for sequence - // 1 - enable it for the sequence - int enable_superres; // 0 - Disable superres for the sequence, and disable - // transmitting per-frame superres enabled flag. - // 1 - Enable superres for the sequence, and also - // enable per-frame flag to denote if superres is - // enabled for that frame. - int enable_cdef; // To turn on/off CDEF - int enable_restoration; // To turn on/off loop restoration - BITSTREAM_PROFILE profile; - - // Operating point info. - int operating_points_cnt_minus_1; - int operating_point_idc[MAX_NUM_OPERATING_POINTS]; - int display_model_info_present_flag; - int decoder_model_info_present_flag; - BitstreamLevel level[MAX_NUM_OPERATING_POINTS]; - uint8_t tier[MAX_NUM_OPERATING_POINTS]; // seq_tier in the spec. One bit: 0 - // or 1. - - // Color config. - aom_bit_depth_t bit_depth; // AOM_BITS_8 in profile 0 or 1, - // AOM_BITS_10 or AOM_BITS_12 in profile 2 or 3. - int use_highbitdepth; // If true, we need to use 16bit frame buffers. - int monochrome; // Monochorme video - aom_color_primaries_t color_primaries; - aom_transfer_characteristics_t transfer_characteristics; - aom_matrix_coefficients_t matrix_coefficients; - int color_range; - int subsampling_x; // Chroma subsampling for x - int subsampling_y; // Chroma subsampling for y - aom_chroma_sample_position_t chroma_sample_position; - int separate_uv_delta_q; - - int film_grain_params_present; -} SequenceHeader; - -typedef struct AV1Common { - struct aom_internal_error_info error; - int width; - int height; - int render_width; - int render_height; - int last_width; - int last_height; - int timing_info_present; - aom_timing_info_t timing_info; - int buffer_removal_time_present; - aom_dec_model_info_t buffer_model; - aom_dec_model_op_parameters_t op_params[MAX_NUM_OPERATING_POINTS + 1]; - aom_op_timing_info_t op_frame_timing[MAX_NUM_OPERATING_POINTS + 1]; - uint32_t frame_presentation_time; - - int largest_tile_id; - size_t largest_tile_size; - int context_update_tile_id; - - // Scale of the current frame with respect to itself. - struct scale_factors sf_identity; - - YV12_BUFFER_CONFIG *frame_to_show; - RefCntBuffer *prev_frame; - - // TODO(hkuang): Combine this with cur_buf in macroblockd. - RefCntBuffer *cur_frame; - - int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */ - - // Prepare ref_frame_map for the next frame. - // Only used in frame parallel decode. - int next_ref_frame_map[REF_FRAMES]; - - // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and - // roll new_fb_idx into it. - - // Each Inter frame can reference INTER_REFS_PER_FRAME buffers - RefBuffer frame_refs[INTER_REFS_PER_FRAME]; - int is_skip_mode_allowed; - int skip_mode_flag; - int ref_frame_idx_0; - int ref_frame_idx_1; - - int new_fb_idx; - - FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/ - FRAME_TYPE frame_type; - - int show_frame; - int showable_frame; // frame can be used as show existing frame in future - int last_show_frame; - int show_existing_frame; - // Flag for a frame used as a reference - not written to the bitstream - int is_reference_frame; - int reset_decoder_state; - - // Flag signaling that the frame is encoded using only INTRA modes. - uint8_t intra_only; - uint8_t last_intra_only; - uint8_t disable_cdf_update; - int allow_high_precision_mv; - int cur_frame_force_integer_mv; // 0 the default in AOM, 1 only integer - - int allow_screen_content_tools; - int allow_intrabc; - int allow_warped_motion; - - // MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in - // MB_MODE_INFO (8-pixel) units. - int MBs; - int mb_rows, mi_rows; - int mb_cols, mi_cols; - int mi_stride; - - /* profile settings */ - TX_MODE tx_mode; - -#if CONFIG_ENTROPY_STATS - int coef_cdf_category; -#endif - - int base_qindex; - int y_dc_delta_q; - int u_dc_delta_q; - int v_dc_delta_q; - int u_ac_delta_q; - int v_ac_delta_q; - - // The dequantizers below are true dequntizers used only in the - // dequantization process. They have the same coefficient - // shift/scale as TX. - int16_t y_dequant_QTX[MAX_SEGMENTS][2]; - int16_t u_dequant_QTX[MAX_SEGMENTS][2]; - int16_t v_dequant_QTX[MAX_SEGMENTS][2]; - - // Global quant matrix tables - const qm_val_t *giqmatrix[NUM_QM_LEVELS][3][TX_SIZES_ALL]; - const qm_val_t *gqmatrix[NUM_QM_LEVELS][3][TX_SIZES_ALL]; - - // Local quant matrix tables for each frame - const qm_val_t *y_iqmatrix[MAX_SEGMENTS][TX_SIZES_ALL]; - const qm_val_t *u_iqmatrix[MAX_SEGMENTS][TX_SIZES_ALL]; - const qm_val_t *v_iqmatrix[MAX_SEGMENTS][TX_SIZES_ALL]; - - // Encoder - int using_qmatrix; - int qm_y; - int qm_u; - int qm_v; - int min_qmlevel; - int max_qmlevel; - - /* We allocate a MB_MODE_INFO struct for each macroblock, together with - an extra row on top and column on the left to simplify prediction. */ - int mi_alloc_size; - MB_MODE_INFO *mip; /* Base of allocated array */ - MB_MODE_INFO *mi; /* Corresponds to upper left visible macroblock */ - - // TODO(agrange): Move prev_mi into encoder structure. - // prev_mip and prev_mi will only be allocated in encoder. - MB_MODE_INFO *prev_mip; /* MB_MODE_INFO array 'mip' from last decoded frame */ - MB_MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */ - - // Separate mi functions between encoder and decoder. - int (*alloc_mi)(struct AV1Common *cm, int mi_size); - void (*free_mi)(struct AV1Common *cm); - void (*setup_mi)(struct AV1Common *cm); - - // Grid of pointers to 8x8 MB_MODE_INFO structs. Any 8x8 not in the visible - // area will be NULL. - MB_MODE_INFO **mi_grid_base; - MB_MODE_INFO **mi_grid_visible; - MB_MODE_INFO **prev_mi_grid_base; - MB_MODE_INFO **prev_mi_grid_visible; - - // Whether to use previous frames' motion vectors for prediction. - int allow_ref_frame_mvs; - - uint8_t *last_frame_seg_map; - uint8_t *current_frame_seg_map; - int seg_map_alloc_size; - - InterpFilter interp_filter; - - int switchable_motion_mode; - - loop_filter_info_n lf_info; - // The denominator of the superres scale; the numerator is fixed. - uint8_t superres_scale_denominator; - int superres_upscaled_width; - int superres_upscaled_height; - RestorationInfo rst_info[MAX_MB_PLANE]; - - // rst_end_stripe[i] is one more than the index of the bottom stripe - // for tile row i. - int rst_end_stripe[MAX_TILE_ROWS]; - - // Pointer to a scratch buffer used by self-guided restoration - int32_t *rst_tmpbuf; - RestorationLineBuffers *rlbs; - - // Output of loop restoration - YV12_BUFFER_CONFIG rst_frame; - - // Flag signaling how frame contexts should be updated at the end of - // a frame decode - REFRESH_FRAME_CONTEXT_MODE refresh_frame_context; - - int ref_frame_sign_bias[REF_FRAMES]; /* Two state 0, 1 */ - - struct loopfilter lf; - struct segmentation seg; - int coded_lossless; // frame is fully lossless at the coded resolution. - int all_lossless; // frame is fully lossless at the upscaled resolution. - - int reduced_tx_set_used; - - // Context probabilities for reference frame prediction - MV_REFERENCE_FRAME comp_fwd_ref[FWD_REFS]; - MV_REFERENCE_FRAME comp_bwd_ref[BWD_REFS]; - REFERENCE_MODE reference_mode; - - FRAME_CONTEXT *fc; /* this frame entropy */ - FRAME_CONTEXT *frame_contexts; // FRAME_CONTEXTS - unsigned int frame_context_idx; /* Context to use/update */ - int fb_of_context_type[REF_FRAMES]; - int primary_ref_frame; - - unsigned int frame_offset; - - unsigned int current_video_frame; - - aom_bit_depth_t dequant_bit_depth; // bit_depth of current dequantizer - - int error_resilient_mode; - int force_primary_ref_none; - - int tile_cols, tile_rows; - int last_tile_cols, last_tile_rows; - - int max_tile_width_sb; - int min_log2_tile_cols; - int max_log2_tile_cols; - int max_log2_tile_rows; - int min_log2_tile_rows; - int min_log2_tiles; - int max_tile_height_sb; - int uniform_tile_spacing_flag; - int log2_tile_cols; // only valid for uniform tiles - int log2_tile_rows; // only valid for uniform tiles - int tile_col_start_sb[MAX_TILE_COLS + 1]; // valid for 0 <= i <= tile_cols - int tile_row_start_sb[MAX_TILE_ROWS + 1]; // valid for 0 <= i <= tile_rows - int tile_width, tile_height; // In MI units - - unsigned int large_scale_tile; - unsigned int single_tile_decoding; - - int byte_alignment; - int skip_loop_filter; - int skip_film_grain; - - // Private data associated with the frame buffer callbacks. - void *cb_priv; - aom_get_frame_buffer_cb_fn_t get_fb_cb; - aom_release_frame_buffer_cb_fn_t release_fb_cb; - - // Handles memory for the codec. - InternalFrameBufferList int_frame_buffers; - - // External BufferPool passed from outside. - BufferPool *buffer_pool; - - PARTITION_CONTEXT **above_seg_context; - ENTROPY_CONTEXT **above_context[MAX_MB_PLANE]; - TXFM_CONTEXT **above_txfm_context; - WarpedMotionParams global_motion[REF_FRAMES]; - aom_film_grain_t film_grain_params; - - int cdef_pri_damping; - int cdef_sec_damping; - int nb_cdef_strengths; - int cdef_strengths[CDEF_MAX_STRENGTHS]; - int cdef_uv_strengths[CDEF_MAX_STRENGTHS]; - int cdef_bits; - - int delta_q_present_flag; - // Resolution of delta quant - int delta_q_res; - int delta_lf_present_flag; - // Resolution of delta lf level - int delta_lf_res; - // This is a flag for number of deltas of loop filter level - // 0: use 1 delta, for y_vertical, y_horizontal, u, and v - // 1: use separate deltas for each filter level - int delta_lf_multi; - int num_tg; - SequenceHeader seq_params; - int current_frame_id; - int ref_frame_id[REF_FRAMES]; - int valid_for_referencing[REF_FRAMES]; - int invalid_delta_frame_id_minus_1; - LV_MAP_CTX_TABLE coeff_ctx_table; - TPL_MV_REF *tpl_mvs; - int tpl_mvs_mem_size; - // TODO(jingning): This can be combined with sign_bias later. - int8_t ref_frame_side[REF_FRAMES]; - - int is_annexb; - - int frame_refs_short_signaling; - int temporal_layer_id; - int spatial_layer_id; - unsigned int number_temporal_layers; - unsigned int number_spatial_layers; - int num_allocated_above_context_mi_col; - int num_allocated_above_contexts; - int num_allocated_above_context_planes; - -#if TXCOEFF_TIMER - int64_t cum_txcoeff_timer; - int64_t txcoeff_timer; - int txb_count; -#endif - -#if TXCOEFF_COST_TIMER - int64_t cum_txcoeff_cost_timer; - int64_t txcoeff_cost_timer; - int64_t txcoeff_cost_count; -#endif - const cfg_options_t *options; -} AV1_COMMON; - -// TODO(hkuang): Don't need to lock the whole pool after implementing atomic -// frame reference count. -static void lock_buffer_pool(BufferPool *const pool) { -#if CONFIG_MULTITHREAD - pthread_mutex_lock(&pool->pool_mutex); -#else - (void)pool; -#endif -} - -static void unlock_buffer_pool(BufferPool *const pool) { -#if CONFIG_MULTITHREAD - pthread_mutex_unlock(&pool->pool_mutex); -#else - (void)pool; -#endif -} - -static INLINE YV12_BUFFER_CONFIG *get_ref_frame(AV1_COMMON *cm, int index) { - if (index < 0 || index >= REF_FRAMES) return NULL; - if (cm->ref_frame_map[index] < 0) return NULL; - assert(cm->ref_frame_map[index] < FRAME_BUFFERS); - return &cm->buffer_pool->frame_bufs[cm->ref_frame_map[index]].buf; -} - -static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer( - const AV1_COMMON *const cm) { - return &cm->buffer_pool->frame_bufs[cm->new_fb_idx].buf; -} - -static INLINE int get_free_fb(AV1_COMMON *cm) { - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - int i; - - lock_buffer_pool(cm->buffer_pool); - for (i = 0; i < FRAME_BUFFERS; ++i) - if (frame_bufs[i].ref_count == 0) break; - - if (i != FRAME_BUFFERS) { - if (frame_bufs[i].buf.use_external_reference_buffers) { - // If this frame buffer's y_buffer, u_buffer, and v_buffer point to the - // external reference buffers. Restore the buffer pointers to point to the - // internally allocated memory. - YV12_BUFFER_CONFIG *ybf = &frame_bufs[i].buf; - ybf->y_buffer = ybf->store_buf_adr[0]; - ybf->u_buffer = ybf->store_buf_adr[1]; - ybf->v_buffer = ybf->store_buf_adr[2]; - ybf->use_external_reference_buffers = 0; - } - - frame_bufs[i].ref_count = 1; - } else { - // Reset i to be INVALID_IDX to indicate no free buffer found. - i = INVALID_IDX; - } - - unlock_buffer_pool(cm->buffer_pool); - return i; -} - -static INLINE void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) { - const int ref_index = *idx; - - if (ref_index >= 0 && bufs[ref_index].ref_count > 0) - bufs[ref_index].ref_count--; - - *idx = new_idx; - - bufs[new_idx].ref_count++; -} - -static INLINE int frame_is_intra_only(const AV1_COMMON *const cm) { - return cm->frame_type == KEY_FRAME || cm->intra_only; -} - -static INLINE int frame_is_sframe(const AV1_COMMON *cm) { - return cm->frame_type == S_FRAME; -} - -static INLINE RefCntBuffer *get_prev_frame(const AV1_COMMON *const cm) { - if (cm->primary_ref_frame == PRIMARY_REF_NONE || - cm->frame_refs[cm->primary_ref_frame].idx == INVALID_IDX) { - return NULL; - } else { - return &cm->buffer_pool - ->frame_bufs[cm->frame_refs[cm->primary_ref_frame].idx]; - } -} - -// Returns 1 if this frame might allow mvs from some reference frame. -static INLINE int frame_might_allow_ref_frame_mvs(const AV1_COMMON *cm) { - return !cm->error_resilient_mode && cm->seq_params.enable_ref_frame_mvs && - cm->seq_params.enable_order_hint && !frame_is_intra_only(cm); -} - -// Returns 1 if this frame might use warped_motion -static INLINE int frame_might_allow_warped_motion(const AV1_COMMON *cm) { - return !cm->error_resilient_mode && !frame_is_intra_only(cm) && - cm->seq_params.enable_warped_motion; -} - -static INLINE void ensure_mv_buffer(RefCntBuffer *buf, AV1_COMMON *cm) { - const int buf_rows = buf->mi_rows; - const int buf_cols = buf->mi_cols; - - if (buf->mvs == NULL || buf_rows != cm->mi_rows || buf_cols != cm->mi_cols) { - aom_free(buf->mvs); - buf->mi_rows = cm->mi_rows; - buf->mi_cols = cm->mi_cols; - CHECK_MEM_ERROR(cm, buf->mvs, - (MV_REF *)aom_calloc( - ((cm->mi_rows + 1) >> 1) * ((cm->mi_cols + 1) >> 1), - sizeof(*buf->mvs))); - aom_free(buf->seg_map); - CHECK_MEM_ERROR(cm, buf->seg_map, - (uint8_t *)aom_calloc(cm->mi_rows * cm->mi_cols, - sizeof(*buf->seg_map))); - } - - const int mem_size = - ((cm->mi_rows + MAX_MIB_SIZE) >> 1) * (cm->mi_stride >> 1); - int realloc = cm->tpl_mvs == NULL; - if (cm->tpl_mvs) realloc |= cm->tpl_mvs_mem_size < mem_size; - - if (realloc) { - aom_free(cm->tpl_mvs); - CHECK_MEM_ERROR(cm, cm->tpl_mvs, - (TPL_MV_REF *)aom_calloc(mem_size, sizeof(*cm->tpl_mvs))); - cm->tpl_mvs_mem_size = mem_size; - } -} - -void cfl_init(CFL_CTX *cfl, const SequenceHeader *seq_params); - -static INLINE int av1_num_planes(const AV1_COMMON *cm) { - return cm->seq_params.monochrome ? 1 : MAX_MB_PLANE; -} - -static INLINE void av1_init_above_context(AV1_COMMON *cm, MACROBLOCKD *xd, - const int tile_row) { - const int num_planes = av1_num_planes(cm); - for (int i = 0; i < num_planes; ++i) { - xd->above_context[i] = cm->above_context[i][tile_row]; - } - xd->above_seg_context = cm->above_seg_context[tile_row]; - xd->above_txfm_context = cm->above_txfm_context[tile_row]; -} - -static INLINE void av1_init_macroblockd(AV1_COMMON *cm, MACROBLOCKD *xd, - tran_low_t *dqcoeff) { - const int num_planes = av1_num_planes(cm); - for (int i = 0; i < num_planes; ++i) { - xd->plane[i].dqcoeff = dqcoeff; - - if (xd->plane[i].plane_type == PLANE_TYPE_Y) { - memcpy(xd->plane[i].seg_dequant_QTX, cm->y_dequant_QTX, - sizeof(cm->y_dequant_QTX)); - memcpy(xd->plane[i].seg_iqmatrix, cm->y_iqmatrix, sizeof(cm->y_iqmatrix)); - - } else { - if (i == AOM_PLANE_U) { - memcpy(xd->plane[i].seg_dequant_QTX, cm->u_dequant_QTX, - sizeof(cm->u_dequant_QTX)); - memcpy(xd->plane[i].seg_iqmatrix, cm->u_iqmatrix, - sizeof(cm->u_iqmatrix)); - } else { - memcpy(xd->plane[i].seg_dequant_QTX, cm->v_dequant_QTX, - sizeof(cm->v_dequant_QTX)); - memcpy(xd->plane[i].seg_iqmatrix, cm->v_iqmatrix, - sizeof(cm->v_iqmatrix)); - } - } - } - xd->mi_stride = cm->mi_stride; - xd->error_info = &cm->error; - cfl_init(&xd->cfl, &cm->seq_params); -} - -static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col, - const int num_planes) { - int i; - int row_offset = mi_row; - int col_offset = mi_col; - for (i = 0; i < num_planes; ++i) { - struct macroblockd_plane *const pd = &xd->plane[i]; - // Offset the buffer pointer - const BLOCK_SIZE bsize = xd->mi[0]->sb_type; - if (pd->subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1)) - row_offset = mi_row - 1; - if (pd->subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1)) - col_offset = mi_col - 1; - int above_idx = col_offset; - int left_idx = row_offset & MAX_MIB_MASK; - pd->above_context = &xd->above_context[i][above_idx >> pd->subsampling_x]; - pd->left_context = &xd->left_context[i][left_idx >> pd->subsampling_y]; - } -} - -static INLINE int calc_mi_size(int len) { - // len is in mi units. Align to a multiple of SBs. - return ALIGN_POWER_OF_TWO(len, MAX_MIB_SIZE_LOG2); -} - -static INLINE void set_plane_n4(MACROBLOCKD *const xd, int bw, int bh, - const int num_planes) { - int i; - for (i = 0; i < num_planes; i++) { - xd->plane[i].width = (bw * MI_SIZE) >> xd->plane[i].subsampling_x; - xd->plane[i].height = (bh * MI_SIZE) >> xd->plane[i].subsampling_y; - - xd->plane[i].width = AOMMAX(xd->plane[i].width, 4); - xd->plane[i].height = AOMMAX(xd->plane[i].height, 4); - } -} - -static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, - int mi_row, int bh, int mi_col, int bw, - int mi_rows, int mi_cols) { - xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); - xd->mb_to_bottom_edge = ((mi_rows - bh - mi_row) * MI_SIZE) * 8; - xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); - xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8; - - // Are edges available for intra prediction? - xd->up_available = (mi_row > tile->mi_row_start); - - const int ss_x = xd->plane[1].subsampling_x; - const int ss_y = xd->plane[1].subsampling_y; - - xd->left_available = (mi_col > tile->mi_col_start); - xd->chroma_up_available = xd->up_available; - xd->chroma_left_available = xd->left_available; - if (ss_x && bw < mi_size_wide[BLOCK_8X8]) - xd->chroma_left_available = (mi_col - 1) > tile->mi_col_start; - if (ss_y && bh < mi_size_high[BLOCK_8X8]) - xd->chroma_up_available = (mi_row - 1) > tile->mi_row_start; - if (xd->up_available) { - xd->above_mbmi = xd->mi[-xd->mi_stride]; - } else { - xd->above_mbmi = NULL; - } - - if (xd->left_available) { - xd->left_mbmi = xd->mi[-1]; - } else { - xd->left_mbmi = NULL; - } - - const int chroma_ref = ((mi_row & 0x01) || !(bh & 0x01) || !ss_y) && - ((mi_col & 0x01) || !(bw & 0x01) || !ss_x); - if (chroma_ref) { - // To help calculate the "above" and "left" chroma blocks, note that the - // current block may cover multiple luma blocks (eg, if partitioned into - // 4x4 luma blocks). - // First, find the top-left-most luma block covered by this chroma block - MB_MODE_INFO **base_mi = - &xd->mi[-(mi_row & ss_y) * xd->mi_stride - (mi_col & ss_x)]; - - // Then, we consider the luma region covered by the left or above 4x4 chroma - // prediction. We want to point to the chroma reference block in that - // region, which is the bottom-right-most mi unit. - // This leads to the following offsets: - MB_MODE_INFO *chroma_above_mi = - xd->chroma_up_available ? base_mi[-xd->mi_stride + ss_x] : NULL; - xd->chroma_above_mbmi = chroma_above_mi; - - MB_MODE_INFO *chroma_left_mi = - xd->chroma_left_available ? base_mi[ss_y * xd->mi_stride - 1] : NULL; - xd->chroma_left_mbmi = chroma_left_mi; - } - - xd->n4_h = bh; - xd->n4_w = bw; - xd->is_sec_rect = 0; - if (xd->n4_w < xd->n4_h) { - // Only mark is_sec_rect as 1 for the last block. - // For PARTITION_VERT_4, it would be (0, 0, 0, 1); - // For other partitions, it would be (0, 1). - if (!((mi_col + xd->n4_w) & (xd->n4_h - 1))) xd->is_sec_rect = 1; - } - - if (xd->n4_w > xd->n4_h) - if (mi_row & (xd->n4_w - 1)) xd->is_sec_rect = 1; -} - -static INLINE aom_cdf_prob *get_y_mode_cdf(FRAME_CONTEXT *tile_ctx, - const MB_MODE_INFO *above_mi, - const MB_MODE_INFO *left_mi) { - const PREDICTION_MODE above = av1_above_block_mode(above_mi); - const PREDICTION_MODE left = av1_left_block_mode(left_mi); - const int above_ctx = intra_mode_context[above]; - const int left_ctx = intra_mode_context[left]; - return tile_ctx->kf_y_cdf[above_ctx][left_ctx]; -} - -static INLINE void update_partition_context(MACROBLOCKD *xd, int mi_row, - int mi_col, BLOCK_SIZE subsize, - BLOCK_SIZE bsize) { - PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col; - PARTITION_CONTEXT *const left_ctx = - xd->left_seg_context + (mi_row & MAX_MIB_MASK); - - const int bw = mi_size_wide[bsize]; - const int bh = mi_size_high[bsize]; - memset(above_ctx, partition_context_lookup[subsize].above, bw); - memset(left_ctx, partition_context_lookup[subsize].left, bh); -} - -static INLINE int is_chroma_reference(int mi_row, int mi_col, BLOCK_SIZE bsize, - int subsampling_x, int subsampling_y) { - const int bw = mi_size_wide[bsize]; - const int bh = mi_size_high[bsize]; - int ref_pos = ((mi_row & 0x01) || !(bh & 0x01) || !subsampling_y) && - ((mi_col & 0x01) || !(bw & 0x01) || !subsampling_x); - return ref_pos; -} - -static INLINE BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x, - int subsampling_y) { - BLOCK_SIZE bs = bsize; - switch (bsize) { - case BLOCK_4X4: - if (subsampling_x == 1 && subsampling_y == 1) - bs = BLOCK_8X8; - else if (subsampling_x == 1) - bs = BLOCK_8X4; - else if (subsampling_y == 1) - bs = BLOCK_4X8; - break; - case BLOCK_4X8: - if (subsampling_x == 1 && subsampling_y == 1) - bs = BLOCK_8X8; - else if (subsampling_x == 1) - bs = BLOCK_8X8; - else if (subsampling_y == 1) - bs = BLOCK_4X8; - break; - case BLOCK_8X4: - if (subsampling_x == 1 && subsampling_y == 1) - bs = BLOCK_8X8; - else if (subsampling_x == 1) - bs = BLOCK_8X4; - else if (subsampling_y == 1) - bs = BLOCK_8X8; - break; - case BLOCK_4X16: - if (subsampling_x == 1 && subsampling_y == 1) - bs = BLOCK_8X16; - else if (subsampling_x == 1) - bs = BLOCK_8X16; - else if (subsampling_y == 1) - bs = BLOCK_4X16; - break; - case BLOCK_16X4: - if (subsampling_x == 1 && subsampling_y == 1) - bs = BLOCK_16X8; - else if (subsampling_x == 1) - bs = BLOCK_16X4; - else if (subsampling_y == 1) - bs = BLOCK_16X8; - break; - default: break; - } - return bs; -} - -static INLINE aom_cdf_prob cdf_element_prob(const aom_cdf_prob *cdf, - size_t element) { - assert(cdf != NULL); - return (element > 0 ? cdf[element - 1] : CDF_PROB_TOP) - cdf[element]; -} - -static INLINE void partition_gather_horz_alike(aom_cdf_prob *out, - const aom_cdf_prob *const in, - BLOCK_SIZE bsize) { - (void)bsize; - out[0] = CDF_PROB_TOP; - out[0] -= cdf_element_prob(in, PARTITION_HORZ); - out[0] -= cdf_element_prob(in, PARTITION_SPLIT); - out[0] -= cdf_element_prob(in, PARTITION_HORZ_A); - out[0] -= cdf_element_prob(in, PARTITION_HORZ_B); - out[0] -= cdf_element_prob(in, PARTITION_VERT_A); - if (bsize != BLOCK_128X128) out[0] -= cdf_element_prob(in, PARTITION_HORZ_4); - out[0] = AOM_ICDF(out[0]); - out[1] = AOM_ICDF(CDF_PROB_TOP); -} - -static INLINE void partition_gather_vert_alike(aom_cdf_prob *out, - const aom_cdf_prob *const in, - BLOCK_SIZE bsize) { - (void)bsize; - out[0] = CDF_PROB_TOP; - out[0] -= cdf_element_prob(in, PARTITION_VERT); - out[0] -= cdf_element_prob(in, PARTITION_SPLIT); - out[0] -= cdf_element_prob(in, PARTITION_HORZ_A); - out[0] -= cdf_element_prob(in, PARTITION_VERT_A); - out[0] -= cdf_element_prob(in, PARTITION_VERT_B); - if (bsize != BLOCK_128X128) out[0] -= cdf_element_prob(in, PARTITION_VERT_4); - out[0] = AOM_ICDF(out[0]); - out[1] = AOM_ICDF(CDF_PROB_TOP); -} - -static INLINE void update_ext_partition_context(MACROBLOCKD *xd, int mi_row, - int mi_col, BLOCK_SIZE subsize, - BLOCK_SIZE bsize, - PARTITION_TYPE partition) { - if (bsize >= BLOCK_8X8) { - const int hbs = mi_size_wide[bsize] / 2; - BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT); - switch (partition) { - case PARTITION_SPLIT: - if (bsize != BLOCK_8X8) break; - AOM_FALLTHROUGH_INTENDED; - case PARTITION_NONE: - case PARTITION_HORZ: - case PARTITION_VERT: - case PARTITION_HORZ_4: - case PARTITION_VERT_4: - update_partition_context(xd, mi_row, mi_col, subsize, bsize); - break; - case PARTITION_HORZ_A: - update_partition_context(xd, mi_row, mi_col, bsize2, subsize); - update_partition_context(xd, mi_row + hbs, mi_col, subsize, subsize); - break; - case PARTITION_HORZ_B: - update_partition_context(xd, mi_row, mi_col, subsize, subsize); - update_partition_context(xd, mi_row + hbs, mi_col, bsize2, subsize); - break; - case PARTITION_VERT_A: - update_partition_context(xd, mi_row, mi_col, bsize2, subsize); - update_partition_context(xd, mi_row, mi_col + hbs, subsize, subsize); - break; - case PARTITION_VERT_B: - update_partition_context(xd, mi_row, mi_col, subsize, subsize); - update_partition_context(xd, mi_row, mi_col + hbs, bsize2, subsize); - break; - default: assert(0 && "Invalid partition type"); - } - } -} - -static INLINE int partition_plane_context(const MACROBLOCKD *xd, int mi_row, - int mi_col, BLOCK_SIZE bsize) { - const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col; - const PARTITION_CONTEXT *left_ctx = - xd->left_seg_context + (mi_row & MAX_MIB_MASK); - // Minimum partition point is 8x8. Offset the bsl accordingly. - const int bsl = mi_size_wide_log2[bsize] - mi_size_wide_log2[BLOCK_8X8]; - int above = (*above_ctx >> bsl) & 1, left = (*left_ctx >> bsl) & 1; - - assert(mi_size_wide_log2[bsize] == mi_size_high_log2[bsize]); - assert(bsl >= 0); - - return (left * 2 + above) + bsl * PARTITION_PLOFFSET; -} - -// Return the number of elements in the partition CDF when -// partitioning the (square) block with luma block size of bsize. -static INLINE int partition_cdf_length(BLOCK_SIZE bsize) { - if (bsize <= BLOCK_8X8) - return PARTITION_TYPES; - else if (bsize == BLOCK_128X128) - return EXT_PARTITION_TYPES - 2; - else - return EXT_PARTITION_TYPES; -} - -static INLINE int max_block_wide(const MACROBLOCKD *xd, BLOCK_SIZE bsize, - int plane) { - int max_blocks_wide = block_size_wide[bsize]; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - - if (xd->mb_to_right_edge < 0) - max_blocks_wide += xd->mb_to_right_edge >> (3 + pd->subsampling_x); - - // Scale the width in the transform block unit. - return max_blocks_wide >> tx_size_wide_log2[0]; -} - -static INLINE int max_block_high(const MACROBLOCKD *xd, BLOCK_SIZE bsize, - int plane) { - int max_blocks_high = block_size_high[bsize]; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - - if (xd->mb_to_bottom_edge < 0) - max_blocks_high += xd->mb_to_bottom_edge >> (3 + pd->subsampling_y); - - // Scale the height in the transform block unit. - return max_blocks_high >> tx_size_high_log2[0]; -} - -static INLINE int max_intra_block_width(const MACROBLOCKD *xd, - BLOCK_SIZE plane_bsize, int plane, - TX_SIZE tx_size) { - const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane) - << tx_size_wide_log2[0]; - return ALIGN_POWER_OF_TWO(max_blocks_wide, tx_size_wide_log2[tx_size]); -} - -static INLINE int max_intra_block_height(const MACROBLOCKD *xd, - BLOCK_SIZE plane_bsize, int plane, - TX_SIZE tx_size) { - const int max_blocks_high = max_block_high(xd, plane_bsize, plane) - << tx_size_high_log2[0]; - return ALIGN_POWER_OF_TWO(max_blocks_high, tx_size_high_log2[tx_size]); -} - -static INLINE void av1_zero_above_context(AV1_COMMON *const cm, const MACROBLOCKD *xd, - int mi_col_start, int mi_col_end, const int tile_row) { - const SequenceHeader *const seq_params = &cm->seq_params; - const int num_planes = av1_num_planes(cm); - const int width = mi_col_end - mi_col_start; - const int aligned_width = - ALIGN_POWER_OF_TWO(width, seq_params->mib_size_log2); - - const int offset_y = mi_col_start; - const int width_y = aligned_width; - const int offset_uv = offset_y >> seq_params->subsampling_x; - const int width_uv = width_y >> seq_params->subsampling_x; - - av1_zero_array(cm->above_context[0][tile_row] + offset_y, width_y); - if (num_planes > 1) { - if (cm->above_context[1][tile_row] && cm->above_context[2][tile_row]) { - av1_zero_array(cm->above_context[1][tile_row] + offset_uv, width_uv); - av1_zero_array(cm->above_context[2][tile_row] + offset_uv, width_uv); - } else { - aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME, - "Invalid value of planes"); - } - } - - av1_zero_array(cm->above_seg_context[tile_row] + mi_col_start, aligned_width); - - memset(cm->above_txfm_context[tile_row] + mi_col_start, - tx_size_wide[TX_SIZES_LARGEST], - aligned_width * sizeof(TXFM_CONTEXT)); -} - -static INLINE void av1_zero_left_context(MACROBLOCKD *const xd) { - av1_zero(xd->left_context); - av1_zero(xd->left_seg_context); - - memset(xd->left_txfm_context_buffer, tx_size_high[TX_SIZES_LARGEST], - sizeof(xd->left_txfm_context_buffer)); -} - -// Disable array-bounds checks as the TX_SIZE enum contains values larger than -// TX_SIZES_ALL (TX_INVALID) which make extending the array as a workaround -// infeasible. The assert is enough for static analysis and this or other tools -// asan, valgrind would catch oob access at runtime. -#if defined(__GNUC__) && __GNUC__ >= 4 -#pragma GCC diagnostic ignored "-Warray-bounds" -#endif - -#if defined(__GNUC__) && __GNUC__ >= 4 -#pragma GCC diagnostic warning "-Warray-bounds" -#endif - -static INLINE void set_txfm_ctx(TXFM_CONTEXT *txfm_ctx, uint8_t txs, int len) { - int i; - for (i = 0; i < len; ++i) txfm_ctx[i] = txs; -} - -static INLINE void set_txfm_ctxs(TX_SIZE tx_size, int n4_w, int n4_h, int skip, - const MACROBLOCKD *xd) { - uint8_t bw = tx_size_wide[tx_size]; - uint8_t bh = tx_size_high[tx_size]; - - if (skip) { - bw = n4_w * MI_SIZE; - bh = n4_h * MI_SIZE; - } - - set_txfm_ctx(xd->above_txfm_context, bw, n4_w); - set_txfm_ctx(xd->left_txfm_context, bh, n4_h); -} - -static INLINE void txfm_partition_update(TXFM_CONTEXT *above_ctx, - TXFM_CONTEXT *left_ctx, - TX_SIZE tx_size, TX_SIZE txb_size) { - BLOCK_SIZE bsize = txsize_to_bsize[txb_size]; - int bh = mi_size_high[bsize]; - int bw = mi_size_wide[bsize]; - uint8_t txw = tx_size_wide[tx_size]; - uint8_t txh = tx_size_high[tx_size]; - int i; - for (i = 0; i < bh; ++i) left_ctx[i] = txh; - for (i = 0; i < bw; ++i) above_ctx[i] = txw; -} - -static INLINE TX_SIZE get_sqr_tx_size(int tx_dim) { - switch (tx_dim) { - case 128: - case 64: return TX_64X64; break; - case 32: return TX_32X32; break; - case 16: return TX_16X16; break; - case 8: return TX_8X8; break; - default: return TX_4X4; - } -} - -static INLINE TX_SIZE get_tx_size(int width, int height) { - if (width == height) { - return get_sqr_tx_size(width); - } - if (width < height) { - if (width + width == height) { - switch (width) { - case 4: return TX_4X8; break; - case 8: return TX_8X16; break; - case 16: return TX_16X32; break; - case 32: return TX_32X64; break; - } - } else { - switch (width) { - case 4: return TX_4X16; break; - case 8: return TX_8X32; break; - case 16: return TX_16X64; break; - } - } - } else { - if (height + height == width) { - switch (height) { - case 4: return TX_8X4; break; - case 8: return TX_16X8; break; - case 16: return TX_32X16; break; - case 32: return TX_64X32; break; - } - } else { - switch (height) { - case 4: return TX_16X4; break; - case 8: return TX_32X8; break; - case 16: return TX_64X16; break; - } - } - } - assert(0); - return TX_4X4; -} - -static INLINE int txfm_partition_context(TXFM_CONTEXT *above_ctx, - TXFM_CONTEXT *left_ctx, - BLOCK_SIZE bsize, TX_SIZE tx_size) { - const uint8_t txw = tx_size_wide[tx_size]; - const uint8_t txh = tx_size_high[tx_size]; - const int above = *above_ctx < txw; - const int left = *left_ctx < txh; - int category = TXFM_PARTITION_CONTEXTS; - - // dummy return, not used by others. - if (tx_size <= TX_4X4) return 0; - - TX_SIZE max_tx_size = - get_sqr_tx_size(AOMMAX(block_size_wide[bsize], block_size_high[bsize])); - - if (max_tx_size >= TX_8X8) { - category = - (txsize_sqr_up_map[tx_size] != max_tx_size && max_tx_size > TX_8X8) + - (TX_SIZES - 1 - max_tx_size) * 2; - } - assert(category != TXFM_PARTITION_CONTEXTS); - return category * 3 + above + left; -} - -// Compute the next partition in the direction of the sb_type stored in the mi -// array, starting with bsize. -static INLINE PARTITION_TYPE get_partition(const AV1_COMMON *const cm, - int mi_row, int mi_col, - BLOCK_SIZE bsize) { - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return PARTITION_INVALID; - - const int offset = mi_row * cm->mi_stride + mi_col; - MB_MODE_INFO **mi = cm->mi_grid_visible + offset; - const BLOCK_SIZE subsize = mi[0]->sb_type; - - if (subsize == bsize) return PARTITION_NONE; - - const int bhigh = mi_size_high[bsize]; - const int bwide = mi_size_wide[bsize]; - const int sshigh = mi_size_high[subsize]; - const int sswide = mi_size_wide[subsize]; - - if (bsize > BLOCK_8X8 && mi_row + bwide / 2 < cm->mi_rows && - mi_col + bhigh / 2 < cm->mi_cols) { - // In this case, the block might be using an extended partition - // type. - const MB_MODE_INFO *const mbmi_right = mi[bwide / 2]; - const MB_MODE_INFO *const mbmi_below = mi[bhigh / 2 * cm->mi_stride]; - - if (sswide == bwide) { - // Smaller height but same width. Is PARTITION_HORZ_4, PARTITION_HORZ or - // PARTITION_HORZ_B. To distinguish the latter two, check if the lower - // half was split. - if (sshigh * 4 == bhigh) return PARTITION_HORZ_4; - assert(sshigh * 2 == bhigh); - - if (mbmi_below->sb_type == subsize) - return PARTITION_HORZ; - else - return PARTITION_HORZ_B; - } else if (sshigh == bhigh) { - // Smaller width but same height. Is PARTITION_VERT_4, PARTITION_VERT or - // PARTITION_VERT_B. To distinguish the latter two, check if the right - // half was split. - if (sswide * 4 == bwide) return PARTITION_VERT_4; - assert(sswide * 2 == bhigh); - - if (mbmi_right->sb_type == subsize) - return PARTITION_VERT; - else - return PARTITION_VERT_B; - } else { - // Smaller width and smaller height. Might be PARTITION_SPLIT or could be - // PARTITION_HORZ_A or PARTITION_VERT_A. If subsize isn't halved in both - // dimensions, we immediately know this is a split (which will recurse to - // get to subsize). Otherwise look down and to the right. With - // PARTITION_VERT_A, the right block will have height bhigh; with - // PARTITION_HORZ_A, the lower block with have width bwide. Otherwise - // it's PARTITION_SPLIT. - if (sswide * 2 != bwide || sshigh * 2 != bhigh) return PARTITION_SPLIT; - - if (mi_size_wide[mbmi_below->sb_type] == bwide) return PARTITION_HORZ_A; - if (mi_size_high[mbmi_right->sb_type] == bhigh) return PARTITION_VERT_A; - - return PARTITION_SPLIT; - } - } - const int vert_split = sswide < bwide; - const int horz_split = sshigh < bhigh; - const int split_idx = (vert_split << 1) | horz_split; - assert(split_idx != 0); - - static const PARTITION_TYPE base_partitions[4] = { - PARTITION_INVALID, PARTITION_HORZ, PARTITION_VERT, PARTITION_SPLIT - }; - - return base_partitions[split_idx]; -} - -static INLINE void set_use_reference_buffer(AV1_COMMON *const cm, int use) { - cm->seq_params.frame_id_numbers_present_flag = use; -} - -static INLINE void set_sb_size(SequenceHeader *const seq_params, - BLOCK_SIZE sb_size) { - seq_params->sb_size = sb_size; - seq_params->mib_size = mi_size_wide[seq_params->sb_size]; - seq_params->mib_size_log2 = mi_size_wide_log2[seq_params->sb_size]; -} - -// Returns true if the frame is fully lossless at the coded resolution. -// Note: If super-resolution is used, such a frame will still NOT be lossless at -// the upscaled resolution. -static INLINE int is_coded_lossless(const AV1_COMMON *cm, - const MACROBLOCKD *xd) { - int coded_lossless = 1; - if (cm->seg.enabled) { - for (int i = 0; i < MAX_SEGMENTS; ++i) { - if (!xd->lossless[i]) { - coded_lossless = 0; - break; - } - } - } else { - coded_lossless = xd->lossless[0]; - } - return coded_lossless; -} - -static INLINE int is_valid_seq_level_idx(uint8_t seq_level_idx) { - return seq_level_idx < 24 || seq_level_idx == 31; -} - -static INLINE uint8_t major_minor_to_seq_level_idx(BitstreamLevel bl) { - assert(bl.major >= LEVEL_MAJOR_MIN && bl.major <= LEVEL_MAJOR_MAX); - // Since bl.minor is unsigned a comparison will return a warning: - // comparison is always true due to limited range of data type - assert(LEVEL_MINOR_MIN == 0); - assert(bl.minor <= LEVEL_MINOR_MAX); - return ((bl.major - LEVEL_MAJOR_MIN) << LEVEL_MINOR_BITS) + bl.minor; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_ONYXC_INT_H_ diff --git a/third_party/aom/av1/common/ppc/cfl_ppc.c b/third_party/aom/av1/common/ppc/cfl_ppc.c deleted file mode 100644 index 026a07809..000000000 --- a/third_party/aom/av1/common/ppc/cfl_ppc.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <altivec.h> - -#include "config/av1_rtcd.h" - -#include "av1/common/cfl.h" - -#define OFF_0 0 -#define OFF_1 16 -#define OFF_2 32 -#define OFF_3 48 -#define CFL_BUF_LINE_BYTES 64 -#define CFL_LINE_1 64 -#define CFL_LINE_2 128 -#define CFL_LINE_3 192 - -typedef vector signed char int8x16_t; // NOLINT(runtime/int) -typedef vector unsigned char uint8x16_t; // NOLINT(runtime/int) -typedef vector signed short int16x8_t; // NOLINT(runtime/int) -typedef vector unsigned short uint16x8_t; // NOLINT(runtime/int) -typedef vector signed int int32x4_t; // NOLINT(runtime/int) -typedef vector unsigned int uint32x4_t; // NOLINT(runtime/int) -typedef vector unsigned long long uint64x2_t; // NOLINT(runtime/int) - -static INLINE void subtract_average_vsx(const uint16_t *src_ptr, int16_t *dst, - int width, int height, int round_offset, - int num_pel_log2) { - // int16_t *dst = dst_ptr; - const int16_t *dst_end = dst + height * CFL_BUF_LINE; - const int16_t *sum_buf = (const int16_t *)src_ptr; - const int16_t *end = sum_buf + height * CFL_BUF_LINE; - const uint32x4_t div_shift = vec_splats((uint32_t)num_pel_log2); - const uint8x16_t mask_64 = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 }; - const uint8x16_t mask_32 = { 0x14, 0x15, 0x16, 0x17, 0x00, 0x01, 0x02, 0x03, - 0x1C, 0x1D, 0x1E, 0x1F, 0x08, 0x09, 0x0A, 0x0B }; - - int32x4_t sum_32x4_0 = { 0, 0, 0, round_offset }; - int32x4_t sum_32x4_1 = { 0, 0, 0, 0 }; - do { - sum_32x4_0 = vec_sum4s(vec_vsx_ld(OFF_0, sum_buf), sum_32x4_0); - sum_32x4_1 = vec_sum4s(vec_vsx_ld(OFF_0 + CFL_LINE_1, sum_buf), sum_32x4_1); - if (width >= 16) { - sum_32x4_0 = vec_sum4s(vec_vsx_ld(OFF_1, sum_buf), sum_32x4_0); - sum_32x4_1 = - vec_sum4s(vec_vsx_ld(OFF_1 + CFL_LINE_1, sum_buf), sum_32x4_1); - } - if (width == 32) { - sum_32x4_0 = vec_sum4s(vec_vsx_ld(OFF_2, sum_buf), sum_32x4_0); - sum_32x4_1 = - vec_sum4s(vec_vsx_ld(OFF_2 + CFL_LINE_1, sum_buf), sum_32x4_1); - sum_32x4_0 = vec_sum4s(vec_vsx_ld(OFF_3, sum_buf), sum_32x4_0); - sum_32x4_1 = - vec_sum4s(vec_vsx_ld(OFF_3 + CFL_LINE_1, sum_buf), sum_32x4_1); - } - } while ((sum_buf += (CFL_BUF_LINE * 2)) < end); - int32x4_t sum_32x4 = vec_add(sum_32x4_0, sum_32x4_1); - - const int32x4_t perm_64 = vec_perm(sum_32x4, sum_32x4, mask_64); - sum_32x4 = vec_add(sum_32x4, perm_64); - const int32x4_t perm_32 = vec_perm(sum_32x4, sum_32x4, mask_32); - sum_32x4 = vec_add(sum_32x4, perm_32); - const int32x4_t avg = vec_sr(sum_32x4, div_shift); - const int16x8_t vec_avg = vec_pack(avg, avg); - do { - vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0, dst), vec_avg), OFF_0, dst); - vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_1, dst), vec_avg), - OFF_0 + CFL_BUF_LINE_BYTES, dst); - vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_2, dst), vec_avg), - OFF_0 + CFL_LINE_2, dst); - vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_3, dst), vec_avg), - OFF_0 + CFL_LINE_3, dst); - if (width >= 16) { - vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1, dst), vec_avg), OFF_1, dst); - vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_1, dst), vec_avg), - OFF_1 + CFL_LINE_1, dst); - vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_2, dst), vec_avg), - OFF_1 + CFL_LINE_2, dst); - vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_3, dst), vec_avg), - OFF_1 + CFL_LINE_3, dst); - } - if (width == 32) { - vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2, dst), vec_avg), OFF_2, dst); - vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_1, dst), vec_avg), - OFF_2 + CFL_LINE_1, dst); - vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_2, dst), vec_avg), - OFF_2 + CFL_LINE_2, dst); - vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_3, dst), vec_avg), - OFF_2 + CFL_LINE_3, dst); - - vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3, dst), vec_avg), OFF_3, dst); - vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_1, dst), vec_avg), - OFF_3 + CFL_LINE_1, dst); - vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_2, dst), vec_avg), - OFF_3 + CFL_LINE_2, dst); - vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_3, dst), vec_avg), - OFF_3 + CFL_LINE_3, dst); - } - } while ((dst += CFL_BUF_LINE * 4) < dst_end); -} - -// Declare wrappers for VSX sizes -CFL_SUB_AVG_X(vsx, 8, 4, 16, 5) -CFL_SUB_AVG_X(vsx, 8, 8, 32, 6) -CFL_SUB_AVG_X(vsx, 8, 16, 64, 7) -CFL_SUB_AVG_X(vsx, 8, 32, 128, 8) -CFL_SUB_AVG_X(vsx, 16, 4, 32, 6) -CFL_SUB_AVG_X(vsx, 16, 8, 64, 7) -CFL_SUB_AVG_X(vsx, 16, 16, 128, 8) -CFL_SUB_AVG_X(vsx, 16, 32, 256, 9) -CFL_SUB_AVG_X(vsx, 32, 8, 128, 8) -CFL_SUB_AVG_X(vsx, 32, 16, 256, 9) -CFL_SUB_AVG_X(vsx, 32, 32, 512, 10) - -// Based on observation, for small blocks VSX does not outperform C (no 64bit -// load and store intrinsics). So we call the C code for block widths 4. -cfl_subtract_average_fn get_subtract_average_fn_vsx(TX_SIZE tx_size) { - static const cfl_subtract_average_fn sub_avg[TX_SIZES_ALL] = { - subtract_average_4x4_c, /* 4x4 */ - subtract_average_8x8_vsx, /* 8x8 */ - subtract_average_16x16_vsx, /* 16x16 */ - subtract_average_32x32_vsx, /* 32x32 */ - cfl_subtract_average_null, /* 64x64 (invalid CFL size) */ - subtract_average_4x8_c, /* 4x8 */ - subtract_average_8x4_vsx, /* 8x4 */ - subtract_average_8x16_vsx, /* 8x16 */ - subtract_average_16x8_vsx, /* 16x8 */ - subtract_average_16x32_vsx, /* 16x32 */ - subtract_average_32x16_vsx, /* 32x16 */ - cfl_subtract_average_null, /* 32x64 (invalid CFL size) */ - cfl_subtract_average_null, /* 64x32 (invalid CFL size) */ - subtract_average_4x16_c, /* 4x16 */ - subtract_average_16x4_vsx, /* 16x4 */ - subtract_average_8x32_vsx, /* 8x32 */ - subtract_average_32x8_vsx, /* 32x8 */ - cfl_subtract_average_null, /* 16x64 (invalid CFL size) */ - cfl_subtract_average_null, /* 64x16 (invalid CFL size) */ - }; - // Modulo TX_SIZES_ALL to ensure that an attacker won't be able to - // index the function pointer array out of bounds. - return sub_avg[tx_size % TX_SIZES_ALL]; -} diff --git a/third_party/aom/av1/common/pred_common.c b/third_party/aom/av1/common/pred_common.c deleted file mode 100644 index 5952441d1..000000000 --- a/third_party/aom/av1/common/pred_common.c +++ /dev/null @@ -1,501 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "av1/common/common.h" -#include "av1/common/pred_common.h" -#include "av1/common/reconinter.h" -#include "av1/common/reconintra.h" -#include "av1/common/seg_common.h" - -// Returns a context number for the given MB prediction signal -static InterpFilter get_ref_filter_type(const MB_MODE_INFO *ref_mbmi, - const MACROBLOCKD *xd, int dir, - MV_REFERENCE_FRAME ref_frame) { - (void)xd; - - return ((ref_mbmi->ref_frame[0] == ref_frame || - ref_mbmi->ref_frame[1] == ref_frame) - ? av1_extract_interp_filter(ref_mbmi->interp_filters, dir & 0x01) - : SWITCHABLE_FILTERS); -} - -int av1_get_pred_context_switchable_interp(const MACROBLOCKD *xd, int dir) { - const MB_MODE_INFO *const mbmi = xd->mi[0]; - const int ctx_offset = - (mbmi->ref_frame[1] > INTRA_FRAME) * INTER_FILTER_COMP_OFFSET; - assert(dir == 0 || dir == 1); - const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0]; - // Note: - // The mode info data structure has a one element border above and to the - // left of the entries corresponding to real macroblocks. - // The prediction flags in these dummy entries are initialized to 0. - int filter_type_ctx = ctx_offset + (dir & 0x01) * INTER_FILTER_DIR_OFFSET; - int left_type = SWITCHABLE_FILTERS; - int above_type = SWITCHABLE_FILTERS; - - if (xd->left_available) - left_type = get_ref_filter_type(xd->mi[-1], xd, dir, ref_frame); - - if (xd->up_available) - above_type = - get_ref_filter_type(xd->mi[-xd->mi_stride], xd, dir, ref_frame); - - if (left_type == above_type) { - filter_type_ctx += left_type; - } else if (left_type == SWITCHABLE_FILTERS) { - assert(above_type != SWITCHABLE_FILTERS); - filter_type_ctx += above_type; - } else if (above_type == SWITCHABLE_FILTERS) { - assert(left_type != SWITCHABLE_FILTERS); - filter_type_ctx += left_type; - } else { - filter_type_ctx += SWITCHABLE_FILTERS; - } - - return filter_type_ctx; -} - -static void palette_add_to_cache(uint16_t *cache, int *n, uint16_t val) { - // Do not add an already existing value - if (*n > 0 && val == cache[*n - 1]) return; - - cache[(*n)++] = val; -} - -int av1_get_palette_cache(const MACROBLOCKD *const xd, int plane, - uint16_t *cache) { - const int row = -xd->mb_to_top_edge >> 3; - // Do not refer to above SB row when on SB boundary. - const MB_MODE_INFO *const above_mi = - (row % (1 << MIN_SB_SIZE_LOG2)) ? xd->above_mbmi : NULL; - const MB_MODE_INFO *const left_mi = xd->left_mbmi; - int above_n = 0, left_n = 0; - if (above_mi) above_n = above_mi->palette_mode_info.palette_size[plane != 0]; - if (left_mi) left_n = left_mi->palette_mode_info.palette_size[plane != 0]; - if (above_n == 0 && left_n == 0) return 0; - int above_idx = plane * PALETTE_MAX_SIZE; - int left_idx = plane * PALETTE_MAX_SIZE; - int n = 0; - const uint16_t *above_colors = - above_mi ? above_mi->palette_mode_info.palette_colors : NULL; - const uint16_t *left_colors = - left_mi ? left_mi->palette_mode_info.palette_colors : NULL; - // Merge the sorted lists of base colors from above and left to get - // combined sorted color cache. - while (above_n > 0 && left_n > 0) { - uint16_t v_above = above_colors[above_idx]; - uint16_t v_left = left_colors[left_idx]; - if (v_left < v_above) { - palette_add_to_cache(cache, &n, v_left); - ++left_idx, --left_n; - } else { - palette_add_to_cache(cache, &n, v_above); - ++above_idx, --above_n; - if (v_left == v_above) ++left_idx, --left_n; - } - } - while (above_n-- > 0) { - uint16_t val = above_colors[above_idx++]; - palette_add_to_cache(cache, &n, val); - } - while (left_n-- > 0) { - uint16_t val = left_colors[left_idx++]; - palette_add_to_cache(cache, &n, val); - } - assert(n <= 2 * PALETTE_MAX_SIZE); - return n; -} - -// The mode info data structure has a one element border above and to the -// left of the entries corresponding to real macroblocks. -// The prediction flags in these dummy entries are initialized to 0. -// 0 - inter/inter, inter/--, --/inter, --/-- -// 1 - intra/inter, inter/intra -// 2 - intra/--, --/intra -// 3 - intra/intra -int av1_get_intra_inter_context(const MACROBLOCKD *xd) { - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int has_above = xd->up_available; - const int has_left = xd->left_available; - - if (has_above && has_left) { // both edges available - const int above_intra = !is_inter_block(above_mbmi); - const int left_intra = !is_inter_block(left_mbmi); - return left_intra && above_intra ? 3 : left_intra || above_intra; - } else if (has_above || has_left) { // one edge available - return 2 * !is_inter_block(has_above ? above_mbmi : left_mbmi); - } else { - return 0; - } -} - -#define CHECK_BACKWARD_REFS(ref_frame) \ - (((ref_frame) >= BWDREF_FRAME) && ((ref_frame) <= ALTREF_FRAME)) -#define IS_BACKWARD_REF_FRAME(ref_frame) CHECK_BACKWARD_REFS(ref_frame) - -int av1_get_reference_mode_context(const MACROBLOCKD *xd) { - int ctx; - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int has_above = xd->up_available; - const int has_left = xd->left_available; - - // Note: - // The mode info data structure has a one element border above and to the - // left of the entries corresponding to real macroblocks. - // The prediction flags in these dummy entries are initialized to 0. - if (has_above && has_left) { // both edges available - if (!has_second_ref(above_mbmi) && !has_second_ref(left_mbmi)) - // neither edge uses comp pred (0/1) - ctx = IS_BACKWARD_REF_FRAME(above_mbmi->ref_frame[0]) ^ - IS_BACKWARD_REF_FRAME(left_mbmi->ref_frame[0]); - else if (!has_second_ref(above_mbmi)) - // one of two edges uses comp pred (2/3) - ctx = 2 + (IS_BACKWARD_REF_FRAME(above_mbmi->ref_frame[0]) || - !is_inter_block(above_mbmi)); - else if (!has_second_ref(left_mbmi)) - // one of two edges uses comp pred (2/3) - ctx = 2 + (IS_BACKWARD_REF_FRAME(left_mbmi->ref_frame[0]) || - !is_inter_block(left_mbmi)); - else // both edges use comp pred (4) - ctx = 4; - } else if (has_above || has_left) { // one edge available - const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi; - - if (!has_second_ref(edge_mbmi)) - // edge does not use comp pred (0/1) - ctx = IS_BACKWARD_REF_FRAME(edge_mbmi->ref_frame[0]); - else - // edge uses comp pred (3) - ctx = 3; - } else { // no edges available (1) - ctx = 1; - } - assert(ctx >= 0 && ctx < COMP_INTER_CONTEXTS); - return ctx; -} - -int av1_get_comp_reference_type_context(const MACROBLOCKD *xd) { - int pred_context; - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int above_in_image = xd->up_available; - const int left_in_image = xd->left_available; - - if (above_in_image && left_in_image) { // both edges available - const int above_intra = !is_inter_block(above_mbmi); - const int left_intra = !is_inter_block(left_mbmi); - - if (above_intra && left_intra) { // intra/intra - pred_context = 2; - } else if (above_intra || left_intra) { // intra/inter - const MB_MODE_INFO *inter_mbmi = above_intra ? left_mbmi : above_mbmi; - - if (!has_second_ref(inter_mbmi)) // single pred - pred_context = 2; - else // comp pred - pred_context = 1 + 2 * has_uni_comp_refs(inter_mbmi); - } else { // inter/inter - const int a_sg = !has_second_ref(above_mbmi); - const int l_sg = !has_second_ref(left_mbmi); - const MV_REFERENCE_FRAME frfa = above_mbmi->ref_frame[0]; - const MV_REFERENCE_FRAME frfl = left_mbmi->ref_frame[0]; - - if (a_sg && l_sg) { // single/single - pred_context = 1 + 2 * (!(IS_BACKWARD_REF_FRAME(frfa) ^ - IS_BACKWARD_REF_FRAME(frfl))); - } else if (l_sg || a_sg) { // single/comp - const int uni_rfc = - a_sg ? has_uni_comp_refs(left_mbmi) : has_uni_comp_refs(above_mbmi); - - if (!uni_rfc) // comp bidir - pred_context = 1; - else // comp unidir - pred_context = 3 + (!(IS_BACKWARD_REF_FRAME(frfa) ^ - IS_BACKWARD_REF_FRAME(frfl))); - } else { // comp/comp - const int a_uni_rfc = has_uni_comp_refs(above_mbmi); - const int l_uni_rfc = has_uni_comp_refs(left_mbmi); - - if (!a_uni_rfc && !l_uni_rfc) // bidir/bidir - pred_context = 0; - else if (!a_uni_rfc || !l_uni_rfc) // unidir/bidir - pred_context = 2; - else // unidir/unidir - pred_context = - 3 + (!((frfa == BWDREF_FRAME) ^ (frfl == BWDREF_FRAME))); - } - } - } else if (above_in_image || left_in_image) { // one edge available - const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi; - - if (!is_inter_block(edge_mbmi)) { // intra - pred_context = 2; - } else { // inter - if (!has_second_ref(edge_mbmi)) // single pred - pred_context = 2; - else // comp pred - pred_context = 4 * has_uni_comp_refs(edge_mbmi); - } - } else { // no edges available - pred_context = 2; - } - - assert(pred_context >= 0 && pred_context < COMP_REF_TYPE_CONTEXTS); - return pred_context; -} - -// Returns a context number for the given MB prediction signal -// -// Signal the uni-directional compound reference frame pair as either -// (BWDREF, ALTREF), or (LAST, LAST2) / (LAST, LAST3) / (LAST, GOLDEN), -// conditioning on the pair is known as uni-directional. -// -// 3 contexts: Voting is used to compare the count of forward references with -// that of backward references from the spatial neighbors. -int av1_get_pred_context_uni_comp_ref_p(const MACROBLOCKD *xd) { - const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0]; - - // Count of forward references (L, L2, L3, or G) - const int frf_count = ref_counts[LAST_FRAME] + ref_counts[LAST2_FRAME] + - ref_counts[LAST3_FRAME] + ref_counts[GOLDEN_FRAME]; - // Count of backward references (B or A) - const int brf_count = ref_counts[BWDREF_FRAME] + ref_counts[ALTREF2_FRAME] + - ref_counts[ALTREF_FRAME]; - - const int pred_context = - (frf_count == brf_count) ? 1 : ((frf_count < brf_count) ? 0 : 2); - - assert(pred_context >= 0 && pred_context < UNI_COMP_REF_CONTEXTS); - return pred_context; -} - -// Returns a context number for the given MB prediction signal -// -// Signal the uni-directional compound reference frame pair as -// either (LAST, LAST2), or (LAST, LAST3) / (LAST, GOLDEN), -// conditioning on the pair is known as one of the above three. -// -// 3 contexts: Voting is used to compare the count of LAST2_FRAME with the -// total count of LAST3/GOLDEN from the spatial neighbors. -int av1_get_pred_context_uni_comp_ref_p1(const MACROBLOCKD *xd) { - const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0]; - - // Count of LAST2 - const int last2_count = ref_counts[LAST2_FRAME]; - // Count of LAST3 or GOLDEN - const int last3_or_gld_count = - ref_counts[LAST3_FRAME] + ref_counts[GOLDEN_FRAME]; - - const int pred_context = (last2_count == last3_or_gld_count) - ? 1 - : ((last2_count < last3_or_gld_count) ? 0 : 2); - - assert(pred_context >= 0 && pred_context < UNI_COMP_REF_CONTEXTS); - return pred_context; -} - -// Returns a context number for the given MB prediction signal -// -// Signal the uni-directional compound reference frame pair as -// either (LAST, LAST3) or (LAST, GOLDEN), -// conditioning on the pair is known as one of the above two. -// -// 3 contexts: Voting is used to compare the count of LAST3_FRAME with the -// total count of GOLDEN_FRAME from the spatial neighbors. -int av1_get_pred_context_uni_comp_ref_p2(const MACROBLOCKD *xd) { - const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0]; - - // Count of LAST3 - const int last3_count = ref_counts[LAST3_FRAME]; - // Count of GOLDEN - const int gld_count = ref_counts[GOLDEN_FRAME]; - - const int pred_context = - (last3_count == gld_count) ? 1 : ((last3_count < gld_count) ? 0 : 2); - - assert(pred_context >= 0 && pred_context < UNI_COMP_REF_CONTEXTS); - return pred_context; -} - -// == Common context functions for both comp and single ref == -// -// Obtain contexts to signal a reference frame to be either LAST/LAST2 or -// LAST3/GOLDEN. -static int get_pred_context_ll2_or_l3gld(const MACROBLOCKD *xd) { - const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0]; - - // Count of LAST + LAST2 - const int last_last2_count = ref_counts[LAST_FRAME] + ref_counts[LAST2_FRAME]; - // Count of LAST3 + GOLDEN - const int last3_gld_count = - ref_counts[LAST3_FRAME] + ref_counts[GOLDEN_FRAME]; - - const int pred_context = (last_last2_count == last3_gld_count) - ? 1 - : ((last_last2_count < last3_gld_count) ? 0 : 2); - - assert(pred_context >= 0 && pred_context < REF_CONTEXTS); - return pred_context; -} - -// Obtain contexts to signal a reference frame to be either LAST or LAST2. -static int get_pred_context_last_or_last2(const MACROBLOCKD *xd) { - const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0]; - - // Count of LAST - const int last_count = ref_counts[LAST_FRAME]; - // Count of LAST2 - const int last2_count = ref_counts[LAST2_FRAME]; - - const int pred_context = - (last_count == last2_count) ? 1 : ((last_count < last2_count) ? 0 : 2); - - assert(pred_context >= 0 && pred_context < REF_CONTEXTS); - return pred_context; -} - -// Obtain contexts to signal a reference frame to be either LAST3 or GOLDEN. -static int get_pred_context_last3_or_gld(const MACROBLOCKD *xd) { - const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0]; - - // Count of LAST3 - const int last3_count = ref_counts[LAST3_FRAME]; - // Count of GOLDEN - const int gld_count = ref_counts[GOLDEN_FRAME]; - - const int pred_context = - (last3_count == gld_count) ? 1 : ((last3_count < gld_count) ? 0 : 2); - - assert(pred_context >= 0 && pred_context < REF_CONTEXTS); - return pred_context; -} - -// Obtain contexts to signal a reference frame be either BWDREF/ALTREF2, or -// ALTREF. -static int get_pred_context_brfarf2_or_arf(const MACROBLOCKD *xd) { - const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0]; - - // Counts of BWDREF, ALTREF2, or ALTREF frames (B, A2, or A) - const int brfarf2_count = - ref_counts[BWDREF_FRAME] + ref_counts[ALTREF2_FRAME]; - const int arf_count = ref_counts[ALTREF_FRAME]; - - const int pred_context = - (brfarf2_count == arf_count) ? 1 : ((brfarf2_count < arf_count) ? 0 : 2); - - assert(pred_context >= 0 && pred_context < REF_CONTEXTS); - return pred_context; -} - -// Obtain contexts to signal a reference frame be either BWDREF or ALTREF2. -static int get_pred_context_brf_or_arf2(const MACROBLOCKD *xd) { - const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0]; - - // Count of BWDREF frames (B) - const int brf_count = ref_counts[BWDREF_FRAME]; - // Count of ALTREF2 frames (A2) - const int arf2_count = ref_counts[ALTREF2_FRAME]; - - const int pred_context = - (brf_count == arf2_count) ? 1 : ((brf_count < arf2_count) ? 0 : 2); - - assert(pred_context >= 0 && pred_context < REF_CONTEXTS); - return pred_context; -} - -// == Context functions for comp ref == -// -// Returns a context number for the given MB prediction signal -// Signal the first reference frame for a compound mode be either -// GOLDEN/LAST3, or LAST/LAST2. -int av1_get_pred_context_comp_ref_p(const MACROBLOCKD *xd) { - return get_pred_context_ll2_or_l3gld(xd); -} - -// Returns a context number for the given MB prediction signal -// Signal the first reference frame for a compound mode be LAST, -// conditioning on that it is known either LAST/LAST2. -int av1_get_pred_context_comp_ref_p1(const MACROBLOCKD *xd) { - return get_pred_context_last_or_last2(xd); -} - -// Returns a context number for the given MB prediction signal -// Signal the first reference frame for a compound mode be GOLDEN, -// conditioning on that it is known either GOLDEN or LAST3. -int av1_get_pred_context_comp_ref_p2(const MACROBLOCKD *xd) { - return get_pred_context_last3_or_gld(xd); -} - -// Signal the 2nd reference frame for a compound mode be either -// ALTREF, or ALTREF2/BWDREF. -int av1_get_pred_context_comp_bwdref_p(const MACROBLOCKD *xd) { - return get_pred_context_brfarf2_or_arf(xd); -} - -// Signal the 2nd reference frame for a compound mode be either -// ALTREF2 or BWDREF. -int av1_get_pred_context_comp_bwdref_p1(const MACROBLOCKD *xd) { - return get_pred_context_brf_or_arf2(xd); -} - -// == Context functions for single ref == -// -// For the bit to signal whether the single reference is a forward reference -// frame or a backward reference frame. -int av1_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) { - const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0]; - - // Count of forward reference frames - const int fwd_count = ref_counts[LAST_FRAME] + ref_counts[LAST2_FRAME] + - ref_counts[LAST3_FRAME] + ref_counts[GOLDEN_FRAME]; - // Count of backward reference frames - const int bwd_count = ref_counts[BWDREF_FRAME] + ref_counts[ALTREF2_FRAME] + - ref_counts[ALTREF_FRAME]; - - const int pred_context = - (fwd_count == bwd_count) ? 1 : ((fwd_count < bwd_count) ? 0 : 2); - - assert(pred_context >= 0 && pred_context < REF_CONTEXTS); - return pred_context; -} - -// For the bit to signal whether the single reference is ALTREF_FRAME or -// non-ALTREF backward reference frame, knowing that it shall be either of -// these 2 choices. -int av1_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { - return get_pred_context_brfarf2_or_arf(xd); -} - -// For the bit to signal whether the single reference is LAST3/GOLDEN or -// LAST2/LAST, knowing that it shall be either of these 2 choices. -int av1_get_pred_context_single_ref_p3(const MACROBLOCKD *xd) { - return get_pred_context_ll2_or_l3gld(xd); -} - -// For the bit to signal whether the single reference is LAST2_FRAME or -// LAST_FRAME, knowing that it shall be either of these 2 choices. -int av1_get_pred_context_single_ref_p4(const MACROBLOCKD *xd) { - return get_pred_context_last_or_last2(xd); -} - -// For the bit to signal whether the single reference is GOLDEN_FRAME or -// LAST3_FRAME, knowing that it shall be either of these 2 choices. -int av1_get_pred_context_single_ref_p5(const MACROBLOCKD *xd) { - return get_pred_context_last3_or_gld(xd); -} - -// For the bit to signal whether the single reference is ALTREF2_FRAME or -// BWDREF_FRAME, knowing that it shall be either of these 2 choices. -int av1_get_pred_context_single_ref_p6(const MACROBLOCKD *xd) { - return get_pred_context_brf_or_arf2(xd); -} diff --git a/third_party/aom/av1/common/pred_common.h b/third_party/aom/av1/common/pred_common.h deleted file mode 100644 index 6dba2322d..000000000 --- a/third_party/aom/av1/common/pred_common.h +++ /dev/null @@ -1,360 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_PRED_COMMON_H_ -#define AOM_AV1_COMMON_PRED_COMMON_H_ - -#include "av1/common/blockd.h" -#include "av1/common/mvref_common.h" -#include "av1/common/onyxc_int.h" -#include "aom_dsp/aom_dsp_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -static INLINE int get_segment_id(const AV1_COMMON *const cm, - const uint8_t *segment_ids, BLOCK_SIZE bsize, - int mi_row, int mi_col) { - const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int bw = mi_size_wide[bsize]; - const int bh = mi_size_high[bsize]; - const int xmis = AOMMIN(cm->mi_cols - mi_col, bw); - const int ymis = AOMMIN(cm->mi_rows - mi_row, bh); - int x, y, segment_id = MAX_SEGMENTS; - - for (y = 0; y < ymis; ++y) - for (x = 0; x < xmis; ++x) - segment_id = - AOMMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]); - - assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); - return segment_id; -} - -static INLINE int av1_get_spatial_seg_pred(const AV1_COMMON *const cm, - const MACROBLOCKD *const xd, - int mi_row, int mi_col, - int *cdf_index) { - int prev_ul = -1; // top left segment_id - int prev_l = -1; // left segment_id - int prev_u = -1; // top segment_id - if ((xd->up_available) && (xd->left_available)) { - prev_ul = get_segment_id(cm, cm->current_frame_seg_map, BLOCK_4X4, - mi_row - 1, mi_col - 1); - } - if (xd->up_available) { - prev_u = get_segment_id(cm, cm->current_frame_seg_map, BLOCK_4X4, - mi_row - 1, mi_col - 0); - } - if (xd->left_available) { - prev_l = get_segment_id(cm, cm->current_frame_seg_map, BLOCK_4X4, - mi_row - 0, mi_col - 1); - } - - // Pick CDF index based on number of matching/out-of-bounds segment IDs. - if (prev_ul < 0 || prev_u < 0 || prev_l < 0) /* Edge case */ - *cdf_index = 0; - else if ((prev_ul == prev_u) && (prev_ul == prev_l)) - *cdf_index = 2; - else if ((prev_ul == prev_u) || (prev_ul == prev_l) || (prev_u == prev_l)) - *cdf_index = 1; - else - *cdf_index = 0; - - // If 2 or more are identical returns that as predictor, otherwise prev_l. - if (prev_u == -1) // edge case - return prev_l == -1 ? 0 : prev_l; - if (prev_l == -1) // edge case - return prev_u; - return (prev_ul == prev_u) ? prev_u : prev_l; -} - -static INLINE int av1_get_pred_context_seg_id(const MACROBLOCKD *xd) { - const MB_MODE_INFO *const above_mi = xd->above_mbmi; - const MB_MODE_INFO *const left_mi = xd->left_mbmi; - const int above_sip = (above_mi != NULL) ? above_mi->seg_id_predicted : 0; - const int left_sip = (left_mi != NULL) ? left_mi->seg_id_predicted : 0; - - return above_sip + left_sip; -} - -static INLINE int get_comp_index_context(const AV1_COMMON *cm, - const MACROBLOCKD *xd) { - MB_MODE_INFO *mbmi = xd->mi[0]; - int bck_idx = cm->frame_refs[mbmi->ref_frame[0] - LAST_FRAME].idx; - int fwd_idx = cm->frame_refs[mbmi->ref_frame[1] - LAST_FRAME].idx; - int bck_frame_index = 0, fwd_frame_index = 0; - int cur_frame_index = cm->cur_frame->cur_frame_offset; - - if (bck_idx >= 0) - bck_frame_index = cm->buffer_pool->frame_bufs[bck_idx].cur_frame_offset; - - if (fwd_idx >= 0) - fwd_frame_index = cm->buffer_pool->frame_bufs[fwd_idx].cur_frame_offset; - int fwd = abs(get_relative_dist(cm, fwd_frame_index, cur_frame_index)); - int bck = abs(get_relative_dist(cm, cur_frame_index, bck_frame_index)); - - const MB_MODE_INFO *const above_mi = xd->above_mbmi; - const MB_MODE_INFO *const left_mi = xd->left_mbmi; - - int above_ctx = 0, left_ctx = 0; - const int offset = (fwd == bck); - - if (above_mi) { - if (has_second_ref(above_mi)) - above_ctx = above_mi->compound_idx; - else if (above_mi->ref_frame[0] == ALTREF_FRAME) - above_ctx = 1; - } - - if (left_mi) { - if (has_second_ref(left_mi)) - left_ctx = left_mi->compound_idx; - else if (left_mi->ref_frame[0] == ALTREF_FRAME) - left_ctx = 1; - } - - return above_ctx + left_ctx + 3 * offset; -} - -static INLINE int get_comp_group_idx_context(const MACROBLOCKD *xd) { - const MB_MODE_INFO *const above_mi = xd->above_mbmi; - const MB_MODE_INFO *const left_mi = xd->left_mbmi; - int above_ctx = 0, left_ctx = 0; - - if (above_mi) { - if (has_second_ref(above_mi)) - above_ctx = above_mi->comp_group_idx; - else if (above_mi->ref_frame[0] == ALTREF_FRAME) - above_ctx = 3; - } - if (left_mi) { - if (has_second_ref(left_mi)) - left_ctx = left_mi->comp_group_idx; - else if (left_mi->ref_frame[0] == ALTREF_FRAME) - left_ctx = 3; - } - - return AOMMIN(5, above_ctx + left_ctx); -} - -static INLINE aom_cdf_prob *av1_get_pred_cdf_seg_id( - struct segmentation_probs *segp, const MACROBLOCKD *xd) { - return segp->pred_cdf[av1_get_pred_context_seg_id(xd)]; -} - -static INLINE int av1_get_skip_mode_context(const MACROBLOCKD *xd) { - const MB_MODE_INFO *const above_mi = xd->above_mbmi; - const MB_MODE_INFO *const left_mi = xd->left_mbmi; - const int above_skip_mode = above_mi ? above_mi->skip_mode : 0; - const int left_skip_mode = left_mi ? left_mi->skip_mode : 0; - return above_skip_mode + left_skip_mode; -} - -static INLINE int av1_get_skip_context(const MACROBLOCKD *xd) { - const MB_MODE_INFO *const above_mi = xd->above_mbmi; - const MB_MODE_INFO *const left_mi = xd->left_mbmi; - const int above_skip = above_mi ? above_mi->skip : 0; - const int left_skip = left_mi ? left_mi->skip : 0; - return above_skip + left_skip; -} - -int av1_get_pred_context_switchable_interp(const MACROBLOCKD *xd, int dir); - -// Get a list of palette base colors that are used in the above and left blocks, -// referred to as "color cache". The return value is the number of colors in the -// cache (<= 2 * PALETTE_MAX_SIZE). The color values are stored in "cache" -// in ascending order. -int av1_get_palette_cache(const MACROBLOCKD *const xd, int plane, - uint16_t *cache); - -static INLINE int av1_get_palette_bsize_ctx(BLOCK_SIZE bsize) { - return num_pels_log2_lookup[bsize] - num_pels_log2_lookup[BLOCK_8X8]; -} - -static INLINE int av1_get_palette_mode_ctx(const MACROBLOCKD *xd) { - const MB_MODE_INFO *const above_mi = xd->above_mbmi; - const MB_MODE_INFO *const left_mi = xd->left_mbmi; - int ctx = 0; - if (above_mi) ctx += (above_mi->palette_mode_info.palette_size[0] > 0); - if (left_mi) ctx += (left_mi->palette_mode_info.palette_size[0] > 0); - return ctx; -} - -int av1_get_intra_inter_context(const MACROBLOCKD *xd); - -int av1_get_reference_mode_context(const MACROBLOCKD *xd); - -static INLINE aom_cdf_prob *av1_get_reference_mode_cdf(const MACROBLOCKD *xd) { - return xd->tile_ctx->comp_inter_cdf[av1_get_reference_mode_context(xd)]; -} - -int av1_get_comp_reference_type_context(const MACROBLOCKD *xd); - -// == Uni-directional contexts == - -int av1_get_pred_context_uni_comp_ref_p(const MACROBLOCKD *xd); - -int av1_get_pred_context_uni_comp_ref_p1(const MACROBLOCKD *xd); - -int av1_get_pred_context_uni_comp_ref_p2(const MACROBLOCKD *xd); - -static INLINE aom_cdf_prob *av1_get_comp_reference_type_cdf( - const MACROBLOCKD *xd) { - const int pred_context = av1_get_comp_reference_type_context(xd); - return xd->tile_ctx->comp_ref_type_cdf[pred_context]; -} - -static INLINE aom_cdf_prob *av1_get_pred_cdf_uni_comp_ref_p( - const MACROBLOCKD *xd) { - const int pred_context = av1_get_pred_context_uni_comp_ref_p(xd); - return xd->tile_ctx->uni_comp_ref_cdf[pred_context][0]; -} - -static INLINE aom_cdf_prob *av1_get_pred_cdf_uni_comp_ref_p1( - const MACROBLOCKD *xd) { - const int pred_context = av1_get_pred_context_uni_comp_ref_p1(xd); - return xd->tile_ctx->uni_comp_ref_cdf[pred_context][1]; -} - -static INLINE aom_cdf_prob *av1_get_pred_cdf_uni_comp_ref_p2( - const MACROBLOCKD *xd) { - const int pred_context = av1_get_pred_context_uni_comp_ref_p2(xd); - return xd->tile_ctx->uni_comp_ref_cdf[pred_context][2]; -} - -// == Bi-directional contexts == - -int av1_get_pred_context_comp_ref_p(const MACROBLOCKD *xd); - -int av1_get_pred_context_comp_ref_p1(const MACROBLOCKD *xd); - -int av1_get_pred_context_comp_ref_p2(const MACROBLOCKD *xd); - -int av1_get_pred_context_comp_bwdref_p(const MACROBLOCKD *xd); - -int av1_get_pred_context_comp_bwdref_p1(const MACROBLOCKD *xd); - -static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_ref_p(const MACROBLOCKD *xd) { - const int pred_context = av1_get_pred_context_comp_ref_p(xd); - return xd->tile_ctx->comp_ref_cdf[pred_context][0]; -} - -static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_ref_p1( - const MACROBLOCKD *xd) { - const int pred_context = av1_get_pred_context_comp_ref_p1(xd); - return xd->tile_ctx->comp_ref_cdf[pred_context][1]; -} - -static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_ref_p2( - const MACROBLOCKD *xd) { - const int pred_context = av1_get_pred_context_comp_ref_p2(xd); - return xd->tile_ctx->comp_ref_cdf[pred_context][2]; -} - -static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_bwdref_p( - const MACROBLOCKD *xd) { - const int pred_context = av1_get_pred_context_comp_bwdref_p(xd); - return xd->tile_ctx->comp_bwdref_cdf[pred_context][0]; -} - -static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_bwdref_p1( - const MACROBLOCKD *xd) { - const int pred_context = av1_get_pred_context_comp_bwdref_p1(xd); - return xd->tile_ctx->comp_bwdref_cdf[pred_context][1]; -} - -// == Single contexts == - -int av1_get_pred_context_single_ref_p1(const MACROBLOCKD *xd); - -int av1_get_pred_context_single_ref_p2(const MACROBLOCKD *xd); - -int av1_get_pred_context_single_ref_p3(const MACROBLOCKD *xd); - -int av1_get_pred_context_single_ref_p4(const MACROBLOCKD *xd); - -int av1_get_pred_context_single_ref_p5(const MACROBLOCKD *xd); - -int av1_get_pred_context_single_ref_p6(const MACROBLOCKD *xd); - -static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p1( - const MACROBLOCKD *xd) { - return xd->tile_ctx - ->single_ref_cdf[av1_get_pred_context_single_ref_p1(xd)][0]; -} -static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p2( - const MACROBLOCKD *xd) { - return xd->tile_ctx - ->single_ref_cdf[av1_get_pred_context_single_ref_p2(xd)][1]; -} -static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p3( - const MACROBLOCKD *xd) { - return xd->tile_ctx - ->single_ref_cdf[av1_get_pred_context_single_ref_p3(xd)][2]; -} -static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p4( - const MACROBLOCKD *xd) { - return xd->tile_ctx - ->single_ref_cdf[av1_get_pred_context_single_ref_p4(xd)][3]; -} -static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p5( - const MACROBLOCKD *xd) { - return xd->tile_ctx - ->single_ref_cdf[av1_get_pred_context_single_ref_p5(xd)][4]; -} -static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p6( - const MACROBLOCKD *xd) { - return xd->tile_ctx - ->single_ref_cdf[av1_get_pred_context_single_ref_p6(xd)][5]; -} - -// Returns a context number for the given MB prediction signal -// The mode info data structure has a one element border above and to the -// left of the entries corresponding to real blocks. -// The prediction flags in these dummy entries are initialized to 0. -static INLINE int get_tx_size_context(const MACROBLOCKD *xd) { - const MB_MODE_INFO *mbmi = xd->mi[0]; - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const TX_SIZE max_tx_size = max_txsize_rect_lookup[mbmi->sb_type]; - const int max_tx_wide = tx_size_wide[max_tx_size]; - const int max_tx_high = tx_size_high[max_tx_size]; - const int has_above = xd->up_available; - const int has_left = xd->left_available; - - int above = xd->above_txfm_context[0] >= max_tx_wide; - int left = xd->left_txfm_context[0] >= max_tx_high; - - if (has_above) - if (is_inter_block(above_mbmi)) - above = block_size_wide[above_mbmi->sb_type] >= max_tx_wide; - - if (has_left) - if (is_inter_block(left_mbmi)) - left = block_size_high[left_mbmi->sb_type] >= max_tx_high; - - if (has_above && has_left) - return (above + left); - else if (has_above) - return above; - else if (has_left) - return left; - else - return 0; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_PRED_COMMON_H_ diff --git a/third_party/aom/av1/common/quant_common.c b/third_party/aom/av1/common/quant_common.c deleted file mode 100644 index 0e14da7a3..000000000 --- a/third_party/aom/av1/common/quant_common.c +++ /dev/null @@ -1,13676 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "av1/common/common.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/entropy.h" -#include "av1/common/quant_common.h" -#include "av1/common/seg_common.h" -#include "av1/common/blockd.h" - -static const int16_t dc_qlookup_Q3[QINDEX_RANGE] = { - 4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18, - 19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30, - 31, 32, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42, - 43, 43, 44, 45, 46, 47, 48, 48, 49, 50, 51, 52, 53, 53, - 54, 55, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65, - 66, 66, 67, 68, 69, 70, 70, 71, 72, 73, 74, 74, 75, 76, - 77, 78, 78, 79, 80, 81, 81, 82, 83, 84, 85, 85, 87, 88, - 90, 92, 93, 95, 96, 98, 99, 101, 102, 104, 105, 107, 108, 110, - 111, 113, 114, 116, 117, 118, 120, 121, 123, 125, 127, 129, 131, 134, - 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 161, 164, - 166, 169, 172, 174, 177, 180, 182, 185, 187, 190, 192, 195, 199, 202, - 205, 208, 211, 214, 217, 220, 223, 226, 230, 233, 237, 240, 243, 247, - 250, 253, 257, 261, 265, 269, 272, 276, 280, 284, 288, 292, 296, 300, - 304, 309, 313, 317, 322, 326, 330, 335, 340, 344, 349, 354, 359, 364, - 369, 374, 379, 384, 389, 395, 400, 406, 411, 417, 423, 429, 435, 441, - 447, 454, 461, 467, 475, 482, 489, 497, 505, 513, 522, 530, 539, 549, - 559, 569, 579, 590, 602, 614, 626, 640, 654, 668, 684, 700, 717, 736, - 755, 775, 796, 819, 843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139, - 1184, 1232, 1282, 1336, -}; - -static const int16_t dc_qlookup_10_Q3[QINDEX_RANGE] = { - 4, 9, 10, 13, 15, 17, 20, 22, 25, 28, 31, 34, 37, - 40, 43, 47, 50, 53, 57, 60, 64, 68, 71, 75, 78, 82, - 86, 90, 93, 97, 101, 105, 109, 113, 116, 120, 124, 128, 132, - 136, 140, 143, 147, 151, 155, 159, 163, 166, 170, 174, 178, 182, - 185, 189, 193, 197, 200, 204, 208, 212, 215, 219, 223, 226, 230, - 233, 237, 241, 244, 248, 251, 255, 259, 262, 266, 269, 273, 276, - 280, 283, 287, 290, 293, 297, 300, 304, 307, 310, 314, 317, 321, - 324, 327, 331, 334, 337, 343, 350, 356, 362, 369, 375, 381, 387, - 394, 400, 406, 412, 418, 424, 430, 436, 442, 448, 454, 460, 466, - 472, 478, 484, 490, 499, 507, 516, 525, 533, 542, 550, 559, 567, - 576, 584, 592, 601, 609, 617, 625, 634, 644, 655, 666, 676, 687, - 698, 708, 718, 729, 739, 749, 759, 770, 782, 795, 807, 819, 831, - 844, 856, 868, 880, 891, 906, 920, 933, 947, 961, 975, 988, 1001, - 1015, 1030, 1045, 1061, 1076, 1090, 1105, 1120, 1137, 1153, 1170, 1186, 1202, - 1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, 1398, 1416, 1436, - 1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717, - 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088, - 2123, 2159, 2197, 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675, - 2737, 2802, 2871, 2944, 3020, 3102, 3188, 3280, 3375, 3478, 3586, 3702, 3823, - 3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347, -}; - -static const int16_t dc_qlookup_12_Q3[QINDEX_RANGE] = { - 4, 12, 18, 25, 33, 41, 50, 60, 70, 80, 91, - 103, 115, 127, 140, 153, 166, 180, 194, 208, 222, 237, - 251, 266, 281, 296, 312, 327, 343, 358, 374, 390, 405, - 421, 437, 453, 469, 484, 500, 516, 532, 548, 564, 580, - 596, 611, 627, 643, 659, 674, 690, 706, 721, 737, 752, - 768, 783, 798, 814, 829, 844, 859, 874, 889, 904, 919, - 934, 949, 964, 978, 993, 1008, 1022, 1037, 1051, 1065, 1080, - 1094, 1108, 1122, 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234, - 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, 1368, 1393, 1419, - 1444, 1469, 1494, 1519, 1544, 1569, 1594, 1618, 1643, 1668, 1692, - 1717, 1741, 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, 1957, - 1992, 2027, 2061, 2096, 2130, 2165, 2199, 2233, 2267, 2300, 2334, - 2367, 2400, 2434, 2467, 2499, 2532, 2575, 2618, 2661, 2704, 2746, - 2788, 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, 3177, 3226, - 3275, 3324, 3373, 3421, 3469, 3517, 3565, 3621, 3677, 3733, 3788, - 3843, 3897, 3951, 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420, - 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, 5013, 5083, 5153, - 5222, 5291, 5367, 5442, 5517, 5591, 5665, 5745, 5825, 5905, 5984, - 6063, 6149, 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, 6966, - 7064, 7163, 7269, 7376, 7483, 7599, 7715, 7832, 7958, 8085, 8214, - 8352, 8492, 8635, 8788, 8945, 9104, 9275, 9450, 9639, 9832, 10031, - 10245, 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118, - 13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949, - 19718, 20521, 21387, -}; - -static const int16_t ac_qlookup_Q3[QINDEX_RANGE] = { - 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, - 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, - 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, - 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, - 98, 99, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118, - 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, - 146, 148, 150, 152, 155, 158, 161, 164, 167, 170, 173, 176, 179, - 182, 185, 188, 191, 194, 197, 200, 203, 207, 211, 215, 219, 223, - 227, 231, 235, 239, 243, 247, 251, 255, 260, 265, 270, 275, 280, - 285, 290, 295, 300, 305, 311, 317, 323, 329, 335, 341, 347, 353, - 359, 366, 373, 380, 387, 394, 401, 408, 416, 424, 432, 440, 448, - 456, 465, 474, 483, 492, 501, 510, 520, 530, 540, 550, 560, 571, - 582, 593, 604, 615, 627, 639, 651, 663, 676, 689, 702, 715, 729, - 743, 757, 771, 786, 801, 816, 832, 848, 864, 881, 898, 915, 933, - 951, 969, 988, 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, 1173, 1196, - 1219, 1243, 1267, 1292, 1317, 1343, 1369, 1396, 1423, 1451, 1479, 1508, 1537, - 1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828, -}; - -static const int16_t ac_qlookup_10_Q3[QINDEX_RANGE] = { - 4, 9, 11, 13, 16, 18, 21, 24, 27, 30, 33, 37, 40, - 44, 48, 51, 55, 59, 63, 67, 71, 75, 79, 83, 88, 92, - 96, 100, 105, 109, 114, 118, 122, 127, 131, 136, 140, 145, 149, - 154, 158, 163, 168, 172, 177, 181, 186, 190, 195, 199, 204, 208, - 213, 217, 222, 226, 231, 235, 240, 244, 249, 253, 258, 262, 267, - 271, 275, 280, 284, 289, 293, 297, 302, 306, 311, 315, 319, 324, - 328, 332, 337, 341, 345, 349, 354, 358, 362, 367, 371, 375, 379, - 384, 388, 392, 396, 401, 409, 417, 425, 433, 441, 449, 458, 466, - 474, 482, 490, 498, 506, 514, 523, 531, 539, 547, 555, 563, 571, - 579, 588, 596, 604, 616, 628, 640, 652, 664, 676, 688, 700, 713, - 725, 737, 749, 761, 773, 785, 797, 809, 825, 841, 857, 873, 889, - 905, 922, 938, 954, 970, 986, 1002, 1018, 1038, 1058, 1078, 1098, 1118, - 1138, 1158, 1178, 1198, 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, 1411, - 1435, 1463, 1491, 1519, 1547, 1575, 1603, 1631, 1663, 1695, 1727, 1759, 1791, - 1823, 1859, 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, 2199, 2239, 2283, - 2327, 2371, 2415, 2459, 2507, 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915, - 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, 3455, 3523, 3591, 3659, 3731, - 3803, 3876, 3952, 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, 4692, 4784, - 4876, 4972, 5068, 5168, 5268, 5372, 5476, 5584, 5692, 5804, 5916, 6032, 6148, - 6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312, -}; - -static const int16_t ac_qlookup_12_Q3[QINDEX_RANGE] = { - 4, 13, 19, 27, 35, 44, 54, 64, 75, 87, 99, - 112, 126, 139, 154, 168, 183, 199, 214, 230, 247, 263, - 280, 297, 314, 331, 349, 366, 384, 402, 420, 438, 456, - 475, 493, 511, 530, 548, 567, 586, 604, 623, 642, 660, - 679, 698, 716, 735, 753, 772, 791, 809, 828, 846, 865, - 884, 902, 920, 939, 957, 976, 994, 1012, 1030, 1049, 1067, - 1085, 1103, 1121, 1139, 1157, 1175, 1193, 1211, 1229, 1246, 1264, - 1282, 1299, 1317, 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457, - 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, 1627, 1660, 1693, - 1725, 1758, 1791, 1824, 1856, 1889, 1922, 1954, 1987, 2020, 2052, - 2085, 2118, 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, 2411, - 2459, 2508, 2556, 2605, 2653, 2701, 2750, 2798, 2847, 2895, 2943, - 2992, 3040, 3088, 3137, 3185, 3234, 3298, 3362, 3426, 3491, 3555, - 3619, 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, 4230, 4310, - 4390, 4470, 4550, 4631, 4711, 4791, 4871, 4967, 5064, 5160, 5256, - 5352, 5448, 5544, 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410, - 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, 7579, 7723, 7867, - 8011, 8155, 8315, 8475, 8635, 8795, 8956, 9132, 9308, 9484, 9660, - 9836, 10028, 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, 11885, - 12109, 12333, 12573, 12813, 13053, 13309, 13565, 13821, 14093, 14365, 14637, - 14925, 15213, 15502, 15806, 16110, 16414, 16734, 17054, 17390, 17726, 18062, - 18414, 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, 21902, 22334, - 22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599, - 28143, 28687, 29247, -}; - -// Coefficient scaling and quantization with AV1 TX are tailored to -// the AV1 TX transforms. Regardless of the bit-depth of the input, -// the transform stages scale the coefficient values up by a factor of -// 8 (3 bits) over the scale of the pixel values. Thus, for 8-bit -// input, the coefficients have effectively 11 bits of scale depth -// (8+3), 10-bit input pixels result in 13-bit coefficient depth -// (10+3) and 12-bit pixels yield 15-bit (12+3) coefficient depth. -// All quantizers are built using this invariant of x8, 3-bit scaling, -// thus the Q3 suffix. - -// A partial exception to this rule is large transforms; to avoid -// overflow, TX blocks with > 256 pels (>16x16) are scaled only -// 4-times unity (2 bits) over the pixel depth, and TX blocks with -// over 1024 pixels (>32x32) are scaled up only 2x unity (1 bit). -// This descaling is found via av1_tx_get_scale(). Thus, 16x32, 32x16 -// and 32x32 transforms actually return Q2 coefficients, and 32x64, -// 64x32 and 64x64 transforms return Q1 coefficients. However, the -// quantizers are de-scaled down on-the-fly by the same amount -// (av1_tx_get_scale()) during quantization, and as such the -// dequantized/decoded coefficients, even for large TX blocks, are always -// effectively Q3. Meanwhile, quantized/coded coefficients are Q0 -// because Qn quantizers are applied to Qn tx coefficients. - -// Note that encoder decision making (which uses the quantizer to -// generate several bespoke lamdas for RDO and other heuristics) -// expects quantizers to be larger for higher-bitdepth input. In -// addition, the minimum allowable quantizer is 4; smaller values will -// underflow to 0 in the actual quantization routines. - -int16_t av1_dc_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth) { - switch (bit_depth) { - case AOM_BITS_8: return dc_qlookup_Q3[clamp(qindex + delta, 0, MAXQ)]; - case AOM_BITS_10: return dc_qlookup_10_Q3[clamp(qindex + delta, 0, MAXQ)]; - case AOM_BITS_12: return dc_qlookup_12_Q3[clamp(qindex + delta, 0, MAXQ)]; - default: - assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12"); - return -1; - } -} - -int16_t av1_ac_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth) { - switch (bit_depth) { - case AOM_BITS_8: return ac_qlookup_Q3[clamp(qindex + delta, 0, MAXQ)]; - case AOM_BITS_10: return ac_qlookup_10_Q3[clamp(qindex + delta, 0, MAXQ)]; - case AOM_BITS_12: return ac_qlookup_12_Q3[clamp(qindex + delta, 0, MAXQ)]; - default: - assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12"); - return -1; - } -} - -// In AV1 TX, the coefficients are always scaled up a factor of 8 (3 -// bits), so QTX == Q3. - -int16_t av1_dc_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth) { - return av1_dc_quant_Q3(qindex, delta, bit_depth); -} - -int16_t av1_ac_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth) { - return av1_ac_quant_Q3(qindex, delta, bit_depth); -} - -int av1_get_qindex(const struct segmentation *seg, int segment_id, - int base_qindex) { - if (segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) { - const int data = get_segdata(seg, segment_id, SEG_LVL_ALT_Q); - const int seg_qindex = base_qindex + data; - return clamp(seg_qindex, 0, MAXQ); - } else { - return base_qindex; - } -} - -const qm_val_t *av1_iqmatrix(AV1_COMMON *cm, int qmlevel, int plane, - TX_SIZE tx_size) { - return &cm->giqmatrix[qmlevel][plane][tx_size][0]; -} -const qm_val_t *av1_qmatrix(AV1_COMMON *cm, int qmlevel, int plane, - TX_SIZE tx_size) { - return &cm->gqmatrix[qmlevel][plane][tx_size][0]; -} - -#define QM_TOTAL_SIZE 3344 -static const qm_val_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE]; -static const qm_val_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE]; - -void av1_qm_init(AV1_COMMON *cm) { - const int num_planes = av1_num_planes(cm); - int q, c, t; - int current; - for (q = 0; q < NUM_QM_LEVELS; ++q) { - for (c = 0; c < num_planes; ++c) { - current = 0; - for (t = 0; t < TX_SIZES_ALL; ++t) { - const int size = tx_size_2d[t]; - const int qm_tx_size = av1_get_adjusted_tx_size(t); - if (q == NUM_QM_LEVELS - 1) { - cm->gqmatrix[q][c][t] = NULL; - cm->giqmatrix[q][c][t] = NULL; - } else if (t != qm_tx_size) { // Reuse matrices for 'qm_tx_size' - cm->gqmatrix[q][c][t] = cm->gqmatrix[q][c][qm_tx_size]; - cm->giqmatrix[q][c][t] = cm->giqmatrix[q][c][qm_tx_size]; - } else { - assert(current + size <= QM_TOTAL_SIZE); - cm->gqmatrix[q][c][t] = &wt_matrix_ref[q][c >= 1][current]; - cm->giqmatrix[q][c][t] = &iwt_matrix_ref[q][c >= 1][current]; - current += size; - } - } - } - } -} - -/* Provide 16 sets of quantization matrices for chroma and luma - and each TX size. Matrices for different TX sizes are in fact - sub-sampled from the 32x32 and 16x16 sizes, but explicitly - defined here for convenience. Intra and inter matrix sets are the - same but changing DEFAULT_QM_INTER_OFFSET from zero allows - for different matrices for inter and intra blocks in the same - frame. - Matrices for different QM levels have been rescaled in the - frequency domain according to different nominal viewing - distances. - */ -static const qm_val_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = { - { - { /* Luma */ - /* Size 4x4 */ - 32, 43, 73, 97, 43, 67, 94, 110, 73, 94, 137, 150, 97, 110, 150, 200, - /* Size 8x8 */ - 32, 32, 38, 51, 68, 84, 95, 109, 32, 35, 40, 49, 63, 76, 89, 102, 38, - 40, 54, 65, 78, 91, 98, 106, 51, 49, 65, 82, 97, 111, 113, 121, 68, 63, - 78, 97, 117, 134, 138, 142, 84, 76, 91, 111, 134, 152, 159, 168, 95, 89, - 98, 113, 138, 159, 183, 199, 109, 102, 106, 121, 142, 168, 199, 220, - /* Size 16x16 */ - 32, 31, 31, 34, 36, 44, 48, 59, 65, 80, 83, 91, 97, 104, 111, 119, 31, - 32, 32, 33, 34, 41, 44, 54, 59, 72, 75, 83, 90, 97, 104, 112, 31, 32, - 33, 35, 36, 42, 45, 54, 59, 71, 74, 81, 86, 93, 100, 107, 34, 33, 35, - 39, 42, 47, 51, 58, 63, 74, 76, 81, 84, 90, 97, 105, 36, 34, 36, 42, 48, - 54, 57, 64, 68, 79, 81, 88, 91, 96, 102, 105, 44, 41, 42, 47, 54, 63, - 67, 75, 79, 90, 92, 95, 100, 102, 109, 112, 48, 44, 45, 51, 57, 67, 71, - 80, 85, 96, 99, 107, 108, 111, 117, 120, 59, 54, 54, 58, 64, 75, 80, 92, - 98, 110, 113, 115, 116, 122, 125, 130, 65, 59, 59, 63, 68, 79, 85, 98, - 105, 118, 121, 127, 130, 134, 135, 140, 80, 72, 71, 74, 79, 90, 96, 110, - 118, 134, 137, 140, 143, 144, 146, 152, 83, 75, 74, 76, 81, 92, 99, 113, - 121, 137, 140, 151, 152, 155, 158, 165, 91, 83, 81, 81, 88, 95, 107, - 115, 127, 140, 151, 159, 166, 169, 173, 179, 97, 90, 86, 84, 91, 100, - 108, 116, 130, 143, 152, 166, 174, 182, 189, 193, 104, 97, 93, 90, 96, - 102, 111, 122, 134, 144, 155, 169, 182, 191, 200, 210, 111, 104, 100, - 97, 102, 109, 117, 125, 135, 146, 158, 173, 189, 200, 210, 220, 119, - 112, 107, 105, 105, 112, 120, 130, 140, 152, 165, 179, 193, 210, 220, - 231, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 32, 34, 35, 36, 39, 44, 46, 48, 54, 59, 62, 65, 71, - 80, 81, 83, 88, 91, 94, 97, 101, 104, 107, 111, 115, 119, 123, 31, 32, - 32, 32, 32, 32, 34, 34, 35, 38, 42, 44, 46, 51, 56, 59, 62, 68, 76, 77, - 78, 84, 86, 89, 92, 95, 99, 102, 105, 109, 113, 116, 31, 32, 32, 32, 32, - 32, 33, 34, 34, 37, 41, 42, 44, 49, 54, 56, 59, 65, 72, 73, 75, 80, 83, - 86, 90, 93, 97, 101, 104, 108, 112, 116, 31, 32, 32, 32, 33, 33, 34, 35, - 35, 38, 41, 43, 45, 49, 54, 56, 59, 64, 72, 73, 74, 79, 82, 85, 88, 91, - 94, 97, 101, 104, 107, 111, 31, 32, 32, 33, 33, 34, 35, 36, 36, 39, 42, - 44, 45, 50, 54, 56, 59, 64, 71, 72, 74, 78, 81, 84, 86, 89, 93, 96, 100, - 104, 107, 111, 32, 32, 32, 33, 34, 35, 37, 37, 38, 40, 42, 44, 46, 49, - 53, 55, 58, 63, 69, 70, 72, 76, 79, 82, 85, 89, 93, 96, 99, 102, 106, - 109, 34, 34, 33, 34, 35, 37, 39, 41, 42, 45, 47, 49, 51, 54, 58, 60, 63, - 68, 74, 75, 76, 80, 81, 82, 84, 87, 90, 93, 97, 101, 105, 110, 35, 34, - 34, 35, 36, 37, 41, 43, 45, 47, 50, 52, 53, 57, 61, 63, 65, 70, 76, 77, - 79, 82, 84, 86, 89, 91, 92, 93, 96, 100, 103, 107, 36, 35, 34, 35, 36, - 38, 42, 45, 48, 50, 54, 55, 57, 60, 64, 66, 68, 73, 79, 80, 81, 85, 88, - 90, 91, 93, 96, 99, 102, 103, 105, 107, 39, 38, 37, 38, 39, 40, 45, 47, - 50, 54, 58, 59, 61, 65, 69, 71, 73, 78, 84, 85, 86, 91, 92, 92, 95, 98, - 100, 101, 103, 106, 110, 114, 44, 42, 41, 41, 42, 42, 47, 50, 54, 58, - 63, 65, 67, 71, 75, 77, 79, 84, 90, 91, 92, 95, 95, 97, 100, 101, 102, - 105, 109, 111, 112, 114, 46, 44, 42, 43, 44, 44, 49, 52, 55, 59, 65, 67, - 69, 74, 78, 80, 82, 87, 93, 94, 95, 98, 100, 103, 102, 105, 108, 110, - 111, 113, 117, 121, 48, 46, 44, 45, 45, 46, 51, 53, 57, 61, 67, 69, 71, - 76, 80, 83, 85, 90, 96, 97, 99, 103, 107, 105, 108, 111, 111, 113, 117, - 119, 120, 122, 54, 51, 49, 49, 50, 49, 54, 57, 60, 65, 71, 74, 76, 82, - 87, 89, 92, 97, 104, 105, 106, 111, 110, 111, 114, 113, 116, 120, 120, - 121, 125, 130, 59, 56, 54, 54, 54, 53, 58, 61, 64, 69, 75, 78, 80, 87, - 92, 95, 98, 103, 110, 111, 113, 115, 115, 119, 116, 120, 122, 122, 125, - 129, 130, 130, 62, 59, 56, 56, 56, 55, 60, 63, 66, 71, 77, 80, 83, 89, - 95, 98, 101, 107, 114, 115, 117, 119, 123, 121, 125, 126, 125, 129, 131, - 131, 135, 140, 65, 62, 59, 59, 59, 58, 63, 65, 68, 73, 79, 82, 85, 92, - 98, 101, 105, 111, 118, 119, 121, 126, 127, 128, 130, 130, 134, 133, - 135, 140, 140, 140, 71, 68, 65, 64, 64, 63, 68, 70, 73, 78, 84, 87, 90, - 97, 103, 107, 111, 117, 125, 126, 128, 134, 132, 136, 133, 138, 137, - 140, 143, 142, 145, 150, 80, 76, 72, 72, 71, 69, 74, 76, 79, 84, 90, 93, - 96, 104, 110, 114, 118, 125, 134, 135, 137, 139, 140, 139, 143, 142, - 144, 146, 146, 151, 152, 151, 81, 77, 73, 73, 72, 70, 75, 77, 80, 85, - 91, 94, 97, 105, 111, 115, 119, 126, 135, 137, 138, 144, 147, 146, 148, - 149, 151, 150, 156, 155, 157, 163, 83, 78, 75, 74, 74, 72, 76, 79, 81, - 86, 92, 95, 99, 106, 113, 117, 121, 128, 137, 138, 140, 147, 151, 156, - 152, 157, 155, 161, 158, 162, 165, 164, 88, 84, 80, 79, 78, 76, 80, 82, - 85, 91, 95, 98, 103, 111, 115, 119, 126, 134, 139, 144, 147, 152, 154, - 158, 163, 159, 165, 163, 168, 168, 169, 176, 91, 86, 83, 82, 81, 79, 81, - 84, 88, 92, 95, 100, 107, 110, 115, 123, 127, 132, 140, 147, 151, 154, - 159, 161, 166, 171, 169, 173, 173, 176, 179, 177, 94, 89, 86, 85, 84, - 82, 82, 86, 90, 92, 97, 103, 105, 111, 119, 121, 128, 136, 139, 146, - 156, 158, 161, 166, 168, 174, 179, 178, 180, 183, 183, 190, 97, 92, 90, - 88, 86, 85, 84, 89, 91, 95, 100, 102, 108, 114, 116, 125, 130, 133, 143, - 148, 152, 163, 166, 168, 174, 176, 182, 187, 189, 188, 193, 191, 101, - 95, 93, 91, 89, 89, 87, 91, 93, 98, 101, 105, 111, 113, 120, 126, 130, - 138, 142, 149, 157, 159, 171, 174, 176, 183, 184, 191, 195, 199, 197, - 204, 104, 99, 97, 94, 93, 93, 90, 92, 96, 100, 102, 108, 111, 116, 122, - 125, 134, 137, 144, 151, 155, 165, 169, 179, 182, 184, 191, 193, 200, - 204, 210, 206, 107, 102, 101, 97, 96, 96, 93, 93, 99, 101, 105, 110, - 113, 120, 122, 129, 133, 140, 146, 150, 161, 163, 173, 178, 187, 191, - 193, 200, 202, 210, 214, 222, 111, 105, 104, 101, 100, 99, 97, 96, 102, - 103, 109, 111, 117, 120, 125, 131, 135, 143, 146, 156, 158, 168, 173, - 180, 189, 195, 200, 202, 210, 212, 220, 224, 115, 109, 108, 104, 104, - 102, 101, 100, 103, 106, 111, 113, 119, 121, 129, 131, 140, 142, 151, - 155, 162, 168, 176, 183, 188, 199, 204, 210, 212, 220, 222, 230, 119, - 113, 112, 107, 107, 106, 105, 103, 105, 110, 112, 117, 120, 125, 130, - 135, 140, 145, 152, 157, 165, 169, 179, 183, 193, 197, 210, 214, 220, - 222, 231, 232, 123, 116, 116, 111, 111, 109, 110, 107, 107, 114, 114, - 121, 122, 130, 130, 140, 140, 150, 151, 163, 164, 176, 177, 190, 191, - 204, 206, 222, 224, 230, 232, 242, - /* Size 4x8 */ - 32, 42, 75, 91, 33, 42, 69, 86, 37, 58, 84, 91, 49, 71, 103, 110, 65, - 84, 125, 128, 80, 97, 142, 152, 91, 100, 145, 178, 104, 112, 146, 190, - /* Size 8x4 */ - 32, 33, 37, 49, 65, 80, 91, 104, 42, 42, 58, 71, 84, 97, 100, 112, 75, - 69, 84, 103, 125, 142, 145, 146, 91, 86, 91, 110, 128, 152, 178, 190, - /* Size 8x16 */ - 32, 32, 36, 53, 65, 87, 93, 99, 31, 33, 34, 49, 59, 78, 86, 93, 32, 34, - 36, 50, 59, 77, 82, 89, 34, 37, 42, 54, 63, 79, 80, 88, 36, 38, 48, 60, - 68, 84, 86, 90, 44, 43, 53, 71, 79, 95, 94, 97, 48, 46, 56, 76, 85, 102, - 105, 105, 58, 54, 63, 87, 98, 116, 112, 115, 65, 58, 68, 92, 105, 124, - 122, 124, 79, 70, 79, 104, 118, 141, 135, 135, 82, 72, 81, 106, 121, - 144, 149, 146, 91, 80, 88, 106, 130, 148, 162, 159, 97, 86, 94, 107, - 128, 157, 167, 171, 103, 93, 98, 114, 131, 150, 174, 186, 110, 100, 101, - 117, 138, 161, 183, 193, 118, 107, 105, 118, 136, 157, 182, 203, - /* Size 16x8 */ - 32, 31, 32, 34, 36, 44, 48, 58, 65, 79, 82, 91, 97, 103, 110, 118, 32, - 33, 34, 37, 38, 43, 46, 54, 58, 70, 72, 80, 86, 93, 100, 107, 36, 34, - 36, 42, 48, 53, 56, 63, 68, 79, 81, 88, 94, 98, 101, 105, 53, 49, 50, - 54, 60, 71, 76, 87, 92, 104, 106, 106, 107, 114, 117, 118, 65, 59, 59, - 63, 68, 79, 85, 98, 105, 118, 121, 130, 128, 131, 138, 136, 87, 78, 77, - 79, 84, 95, 102, 116, 124, 141, 144, 148, 157, 150, 161, 157, 93, 86, - 82, 80, 86, 94, 105, 112, 122, 135, 149, 162, 167, 174, 183, 182, 99, - 93, 89, 88, 90, 97, 105, 115, 124, 135, 146, 159, 171, 186, 193, 203, - /* Size 16x32 */ - 32, 31, 32, 34, 36, 44, 53, 59, 65, 79, 87, 90, 93, 96, 99, 102, 31, 32, - 32, 34, 35, 42, 51, 56, 62, 75, 82, 85, 88, 91, 94, 97, 31, 32, 33, 33, - 34, 41, 49, 54, 59, 72, 78, 82, 86, 90, 93, 97, 31, 32, 33, 34, 35, 41, - 49, 54, 59, 71, 78, 81, 84, 87, 90, 93, 32, 32, 34, 35, 36, 42, 50, 54, - 59, 71, 77, 80, 82, 86, 89, 93, 32, 33, 35, 37, 38, 42, 49, 53, 58, 69, - 75, 78, 82, 86, 89, 92, 34, 34, 37, 39, 42, 48, 54, 58, 63, 73, 79, 78, - 80, 83, 88, 92, 35, 34, 37, 41, 45, 50, 57, 61, 65, 76, 82, 83, 84, 84, - 87, 90, 36, 34, 38, 43, 48, 54, 60, 64, 68, 78, 84, 87, 86, 89, 90, 90, - 39, 37, 40, 45, 50, 58, 65, 69, 73, 84, 89, 89, 91, 91, 93, 96, 44, 41, - 43, 48, 53, 63, 71, 75, 79, 90, 95, 93, 94, 95, 97, 97, 46, 43, 44, 49, - 55, 65, 73, 78, 82, 93, 98, 100, 98, 100, 99, 103, 48, 45, 46, 51, 56, - 67, 76, 80, 85, 96, 102, 102, 105, 102, 105, 104, 53, 49, 50, 54, 60, - 71, 82, 87, 92, 103, 109, 107, 107, 110, 107, 111, 58, 54, 54, 58, 63, - 75, 87, 92, 98, 110, 116, 115, 112, 111, 115, 112, 61, 57, 56, 60, 66, - 77, 89, 95, 101, 114, 120, 118, 119, 118, 116, 120, 65, 60, 58, 63, 68, - 79, 92, 98, 105, 118, 124, 123, 122, 123, 124, 121, 71, 65, 63, 68, 73, - 84, 97, 103, 111, 125, 132, 132, 130, 128, 127, 130, 79, 72, 70, 74, 79, - 90, 104, 110, 118, 133, 141, 136, 135, 135, 135, 131, 81, 74, 71, 75, - 80, 91, 105, 112, 119, 135, 142, 140, 140, 138, 139, 142, 82, 75, 72, - 76, 81, 92, 106, 113, 121, 136, 144, 151, 149, 149, 146, 143, 88, 80, - 77, 80, 85, 97, 108, 115, 126, 142, 149, 153, 153, 152, 152, 154, 91, - 83, 80, 81, 88, 100, 106, 114, 130, 142, 148, 155, 162, 160, 159, 155, - 94, 85, 83, 82, 91, 100, 105, 118, 131, 137, 153, 160, 165, 167, 166, - 168, 97, 88, 86, 85, 94, 100, 107, 123, 128, 140, 157, 161, 167, 173, - 171, 169, 100, 91, 89, 87, 97, 100, 111, 121, 127, 145, 152, 164, 173, - 178, 182, 181, 103, 94, 93, 90, 98, 101, 114, 120, 131, 144, 150, 170, - 174, 180, 186, 183, 107, 97, 96, 93, 100, 104, 117, 119, 136, 142, 155, - 168, 177, 187, 191, 198, 110, 101, 100, 97, 101, 108, 117, 123, 138, - 141, 161, 165, 183, 188, 193, 200, 114, 104, 104, 100, 103, 112, 117, - 127, 137, 146, 159, 167, 185, 190, 201, 206, 118, 108, 107, 103, 105, - 115, 118, 131, 136, 151, 157, 172, 182, 197, 203, 208, 122, 111, 111, - 107, 107, 119, 119, 136, 136, 156, 156, 178, 179, 203, 204, 217, - /* Size 32x16 */ - 32, 31, 31, 31, 32, 32, 34, 35, 36, 39, 44, 46, 48, 53, 58, 61, 65, 71, - 79, 81, 82, 88, 91, 94, 97, 100, 103, 107, 110, 114, 118, 122, 31, 32, - 32, 32, 32, 33, 34, 34, 34, 37, 41, 43, 45, 49, 54, 57, 60, 65, 72, 74, - 75, 80, 83, 85, 88, 91, 94, 97, 101, 104, 108, 111, 32, 32, 33, 33, 34, - 35, 37, 37, 38, 40, 43, 44, 46, 50, 54, 56, 58, 63, 70, 71, 72, 77, 80, - 83, 86, 89, 93, 96, 100, 104, 107, 111, 34, 34, 33, 34, 35, 37, 39, 41, - 43, 45, 48, 49, 51, 54, 58, 60, 63, 68, 74, 75, 76, 80, 81, 82, 85, 87, - 90, 93, 97, 100, 103, 107, 36, 35, 34, 35, 36, 38, 42, 45, 48, 50, 53, - 55, 56, 60, 63, 66, 68, 73, 79, 80, 81, 85, 88, 91, 94, 97, 98, 100, - 101, 103, 105, 107, 44, 42, 41, 41, 42, 42, 48, 50, 54, 58, 63, 65, 67, - 71, 75, 77, 79, 84, 90, 91, 92, 97, 100, 100, 100, 100, 101, 104, 108, - 112, 115, 119, 53, 51, 49, 49, 50, 49, 54, 57, 60, 65, 71, 73, 76, 82, - 87, 89, 92, 97, 104, 105, 106, 108, 106, 105, 107, 111, 114, 117, 117, - 117, 118, 119, 59, 56, 54, 54, 54, 53, 58, 61, 64, 69, 75, 78, 80, 87, - 92, 95, 98, 103, 110, 112, 113, 115, 114, 118, 123, 121, 120, 119, 123, - 127, 131, 136, 65, 62, 59, 59, 59, 58, 63, 65, 68, 73, 79, 82, 85, 92, - 98, 101, 105, 111, 118, 119, 121, 126, 130, 131, 128, 127, 131, 136, - 138, 137, 136, 136, 79, 75, 72, 71, 71, 69, 73, 76, 78, 84, 90, 93, 96, - 103, 110, 114, 118, 125, 133, 135, 136, 142, 142, 137, 140, 145, 144, - 142, 141, 146, 151, 156, 87, 82, 78, 78, 77, 75, 79, 82, 84, 89, 95, 98, - 102, 109, 116, 120, 124, 132, 141, 142, 144, 149, 148, 153, 157, 152, - 150, 155, 161, 159, 157, 156, 90, 85, 82, 81, 80, 78, 78, 83, 87, 89, - 93, 100, 102, 107, 115, 118, 123, 132, 136, 140, 151, 153, 155, 160, - 161, 164, 170, 168, 165, 167, 172, 178, 93, 88, 86, 84, 82, 82, 80, 84, - 86, 91, 94, 98, 105, 107, 112, 119, 122, 130, 135, 140, 149, 153, 162, - 165, 167, 173, 174, 177, 183, 185, 182, 179, 96, 91, 90, 87, 86, 86, 83, - 84, 89, 91, 95, 100, 102, 110, 111, 118, 123, 128, 135, 138, 149, 152, - 160, 167, 173, 178, 180, 187, 188, 190, 197, 203, 99, 94, 93, 90, 89, - 89, 88, 87, 90, 93, 97, 99, 105, 107, 115, 116, 124, 127, 135, 139, 146, - 152, 159, 166, 171, 182, 186, 191, 193, 201, 203, 204, 102, 97, 97, 93, - 93, 92, 92, 90, 90, 96, 97, 103, 104, 111, 112, 120, 121, 130, 131, 142, - 143, 154, 155, 168, 169, 181, 183, 198, 200, 206, 208, 217, - /* Size 4x16 */ - 31, 44, 79, 96, 32, 41, 72, 90, 32, 42, 71, 86, 34, 48, 73, 83, 34, 54, - 78, 89, 41, 63, 90, 95, 45, 67, 96, 102, 54, 75, 110, 111, 60, 79, 118, - 123, 72, 90, 133, 135, 75, 92, 136, 149, 83, 100, 142, 160, 88, 100, - 140, 173, 94, 101, 144, 180, 101, 108, 141, 188, 108, 115, 151, 197, - /* Size 16x4 */ - 31, 32, 32, 34, 34, 41, 45, 54, 60, 72, 75, 83, 88, 94, 101, 108, 44, - 41, 42, 48, 54, 63, 67, 75, 79, 90, 92, 100, 100, 101, 108, 115, 79, 72, - 71, 73, 78, 90, 96, 110, 118, 133, 136, 142, 140, 144, 141, 151, 96, 90, - 86, 83, 89, 95, 102, 111, 123, 135, 149, 160, 173, 180, 188, 197, - /* Size 8x32 */ - 32, 32, 36, 53, 65, 87, 93, 99, 31, 32, 35, 51, 62, 82, 88, 94, 31, 33, - 34, 49, 59, 78, 86, 93, 31, 33, 35, 49, 59, 78, 84, 90, 32, 34, 36, 50, - 59, 77, 82, 89, 32, 35, 38, 49, 58, 75, 82, 89, 34, 37, 42, 54, 63, 79, - 80, 88, 35, 37, 45, 57, 65, 82, 84, 87, 36, 38, 48, 60, 68, 84, 86, 90, - 39, 40, 50, 65, 73, 89, 91, 93, 44, 43, 53, 71, 79, 95, 94, 97, 46, 44, - 55, 73, 82, 98, 98, 99, 48, 46, 56, 76, 85, 102, 105, 105, 53, 50, 60, - 82, 92, 109, 107, 107, 58, 54, 63, 87, 98, 116, 112, 115, 61, 56, 66, - 89, 101, 120, 119, 116, 65, 58, 68, 92, 105, 124, 122, 124, 71, 63, 73, - 97, 111, 132, 130, 127, 79, 70, 79, 104, 118, 141, 135, 135, 81, 71, 80, - 105, 119, 142, 140, 139, 82, 72, 81, 106, 121, 144, 149, 146, 88, 77, - 85, 108, 126, 149, 153, 152, 91, 80, 88, 106, 130, 148, 162, 159, 94, - 83, 91, 105, 131, 153, 165, 166, 97, 86, 94, 107, 128, 157, 167, 171, - 100, 89, 97, 111, 127, 152, 173, 182, 103, 93, 98, 114, 131, 150, 174, - 186, 107, 96, 100, 117, 136, 155, 177, 191, 110, 100, 101, 117, 138, - 161, 183, 193, 114, 104, 103, 117, 137, 159, 185, 201, 118, 107, 105, - 118, 136, 157, 182, 203, 122, 111, 107, 119, 136, 156, 179, 204, - /* Size 32x8 */ - 32, 31, 31, 31, 32, 32, 34, 35, 36, 39, 44, 46, 48, 53, 58, 61, 65, 71, - 79, 81, 82, 88, 91, 94, 97, 100, 103, 107, 110, 114, 118, 122, 32, 32, - 33, 33, 34, 35, 37, 37, 38, 40, 43, 44, 46, 50, 54, 56, 58, 63, 70, 71, - 72, 77, 80, 83, 86, 89, 93, 96, 100, 104, 107, 111, 36, 35, 34, 35, 36, - 38, 42, 45, 48, 50, 53, 55, 56, 60, 63, 66, 68, 73, 79, 80, 81, 85, 88, - 91, 94, 97, 98, 100, 101, 103, 105, 107, 53, 51, 49, 49, 50, 49, 54, 57, - 60, 65, 71, 73, 76, 82, 87, 89, 92, 97, 104, 105, 106, 108, 106, 105, - 107, 111, 114, 117, 117, 117, 118, 119, 65, 62, 59, 59, 59, 58, 63, 65, - 68, 73, 79, 82, 85, 92, 98, 101, 105, 111, 118, 119, 121, 126, 130, 131, - 128, 127, 131, 136, 138, 137, 136, 136, 87, 82, 78, 78, 77, 75, 79, 82, - 84, 89, 95, 98, 102, 109, 116, 120, 124, 132, 141, 142, 144, 149, 148, - 153, 157, 152, 150, 155, 161, 159, 157, 156, 93, 88, 86, 84, 82, 82, 80, - 84, 86, 91, 94, 98, 105, 107, 112, 119, 122, 130, 135, 140, 149, 153, - 162, 165, 167, 173, 174, 177, 183, 185, 182, 179, 99, 94, 93, 90, 89, - 89, 88, 87, 90, 93, 97, 99, 105, 107, 115, 116, 124, 127, 135, 139, 146, - 152, 159, 166, 171, 182, 186, 191, 193, 201, 203, 204 }, - { /* Chroma */ - /* Size 4x4 */ - 35, 46, 57, 66, 46, 60, 69, 71, 57, 69, 90, 90, 66, 71, 90, 109, - /* Size 8x8 */ - 31, 38, 47, 50, 57, 63, 67, 71, 38, 47, 46, 47, 52, 57, 62, 67, 47, 46, - 54, 57, 61, 66, 67, 68, 50, 47, 57, 66, 72, 77, 75, 75, 57, 52, 61, 72, - 82, 88, 86, 84, 63, 57, 66, 77, 88, 96, 95, 95, 67, 62, 67, 75, 86, 95, - 104, 107, 71, 67, 68, 75, 84, 95, 107, 113, - /* Size 16x16 */ - 32, 30, 33, 41, 49, 49, 50, 54, 57, 63, 65, 68, 70, 72, 74, 76, 30, 32, - 35, 42, 46, 45, 46, 49, 52, 57, 58, 62, 64, 67, 70, 72, 33, 35, 39, 45, - 47, 45, 46, 49, 51, 56, 57, 60, 62, 64, 66, 69, 41, 42, 45, 48, 50, 49, - 50, 52, 53, 57, 58, 59, 60, 61, 64, 67, 49, 46, 47, 50, 53, 53, 54, 55, - 56, 60, 61, 64, 64, 65, 66, 66, 49, 45, 45, 49, 53, 58, 60, 62, 63, 67, - 68, 67, 69, 68, 70, 70, 50, 46, 46, 50, 54, 60, 61, 65, 67, 71, 71, 74, - 73, 73, 74, 74, 54, 49, 49, 52, 55, 62, 65, 71, 73, 78, 79, 78, 77, 78, - 78, 78, 57, 52, 51, 53, 56, 63, 67, 73, 76, 82, 83, 84, 84, 84, 82, 83, - 63, 57, 56, 57, 60, 67, 71, 78, 82, 89, 90, 90, 89, 88, 87, 88, 65, 58, - 57, 58, 61, 68, 71, 79, 83, 90, 91, 94, 93, 93, 92, 93, 68, 62, 60, 59, - 64, 67, 74, 78, 84, 90, 94, 98, 99, 98, 98, 98, 70, 64, 62, 60, 64, 69, - 73, 77, 84, 89, 93, 99, 102, 103, 104, 104, 72, 67, 64, 61, 65, 68, 73, - 78, 84, 88, 93, 98, 103, 106, 108, 109, 74, 70, 66, 64, 66, 70, 74, 78, - 82, 87, 92, 98, 104, 108, 111, 112, 76, 72, 69, 67, 66, 70, 74, 78, 83, - 88, 93, 98, 104, 109, 112, 116, - /* Size 32x32 */ - 32, 31, 30, 32, 33, 36, 41, 45, 49, 48, 49, 50, 50, 52, 54, 56, 57, 60, - 63, 64, 65, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 78, 31, 31, 31, 33, - 34, 38, 42, 45, 47, 47, 47, 47, 48, 50, 52, 53, 54, 57, 60, 61, 61, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 74, 30, 31, 32, 33, 35, 40, 42, 44, - 46, 45, 45, 45, 46, 47, 49, 51, 52, 54, 57, 58, 58, 61, 62, 63, 64, 66, - 67, 68, 70, 71, 72, 74, 32, 33, 33, 35, 37, 41, 43, 45, 47, 46, 45, 46, - 46, 47, 49, 50, 51, 54, 57, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, - 69, 70, 33, 34, 35, 37, 39, 43, 45, 46, 47, 46, 45, 46, 46, 47, 49, 50, - 51, 53, 56, 57, 57, 59, 60, 61, 62, 63, 64, 65, 66, 68, 69, 70, 36, 38, - 40, 41, 43, 47, 47, 47, 48, 46, 45, 46, 46, 47, 48, 49, 50, 52, 54, 55, - 55, 57, 58, 59, 61, 62, 64, 65, 66, 67, 68, 69, 41, 42, 42, 43, 45, 47, - 48, 49, 50, 49, 49, 49, 50, 50, 52, 52, 53, 55, 57, 58, 58, 60, 59, 59, - 60, 61, 61, 63, 64, 66, 67, 69, 45, 45, 44, 45, 46, 47, 49, 50, 51, 51, - 51, 51, 52, 52, 53, 54, 55, 57, 59, 59, 60, 61, 61, 62, 63, 63, 63, 63, - 63, 64, 65, 66, 49, 47, 46, 47, 47, 48, 50, 51, 53, 53, 53, 54, 54, 54, - 55, 56, 56, 58, 60, 61, 61, 63, 64, 64, 64, 64, 65, 66, 66, 66, 66, 66, - 48, 47, 45, 46, 46, 46, 49, 51, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, - 63, 64, 64, 66, 66, 65, 66, 67, 67, 67, 67, 68, 69, 70, 49, 47, 45, 45, - 45, 45, 49, 51, 53, 55, 58, 59, 60, 61, 62, 63, 63, 65, 67, 67, 68, 69, - 67, 68, 69, 68, 68, 69, 70, 70, 70, 70, 50, 47, 45, 46, 46, 46, 49, 51, - 54, 56, 59, 60, 60, 62, 64, 64, 65, 67, 69, 69, 70, 70, 71, 71, 70, 70, - 71, 71, 71, 71, 72, 74, 50, 48, 46, 46, 46, 46, 50, 52, 54, 56, 60, 60, - 61, 63, 65, 66, 67, 68, 71, 71, 71, 73, 74, 72, 73, 74, 73, 73, 74, 74, - 74, 74, 52, 50, 47, 47, 47, 47, 50, 52, 54, 57, 61, 62, 63, 66, 68, 69, - 70, 72, 75, 75, 75, 77, 75, 75, 76, 75, 75, 76, 75, 75, 76, 77, 54, 52, - 49, 49, 49, 48, 52, 53, 55, 58, 62, 64, 65, 68, 71, 72, 73, 75, 78, 78, - 79, 79, 78, 79, 77, 78, 78, 77, 78, 79, 78, 78, 56, 53, 51, 50, 50, 49, - 52, 54, 56, 59, 63, 64, 66, 69, 72, 73, 75, 77, 80, 80, 81, 81, 82, 80, - 81, 81, 79, 81, 80, 79, 81, 82, 57, 54, 52, 51, 51, 50, 53, 55, 56, 60, - 63, 65, 67, 70, 73, 75, 76, 79, 82, 82, 83, 85, 84, 83, 84, 83, 84, 82, - 82, 84, 83, 82, 60, 57, 54, 54, 53, 52, 55, 57, 58, 61, 65, 67, 68, 72, - 75, 77, 79, 82, 85, 85, 86, 88, 86, 87, 85, 86, 85, 85, 86, 84, 85, 86, - 63, 60, 57, 57, 56, 54, 57, 59, 60, 63, 67, 69, 71, 75, 78, 80, 82, 85, - 89, 89, 90, 90, 90, 89, 89, 88, 88, 88, 87, 88, 88, 87, 64, 61, 58, 57, - 57, 55, 58, 59, 61, 64, 67, 69, 71, 75, 78, 80, 82, 85, 89, 90, 91, 92, - 93, 92, 92, 91, 91, 90, 91, 90, 90, 92, 65, 61, 58, 58, 57, 55, 58, 60, - 61, 64, 68, 70, 71, 75, 79, 81, 83, 86, 90, 91, 91, 94, 94, 96, 93, 94, - 93, 94, 92, 93, 93, 92, 67, 63, 61, 60, 59, 57, 60, 61, 63, 66, 69, 70, - 73, 77, 79, 81, 85, 88, 90, 92, 94, 96, 96, 97, 98, 95, 97, 95, 96, 95, - 95, 96, 68, 64, 62, 61, 60, 58, 59, 61, 64, 66, 67, 71, 74, 75, 78, 82, - 84, 86, 90, 93, 94, 96, 98, 98, 99, 100, 98, 99, 98, 98, 98, 97, 69, 65, - 63, 62, 61, 59, 59, 62, 64, 65, 68, 71, 72, 75, 79, 80, 83, 87, 89, 92, - 96, 97, 98, 100, 100, 101, 102, 101, 101, 101, 100, 102, 70, 66, 64, 63, - 62, 61, 60, 63, 64, 66, 69, 70, 73, 76, 77, 81, 84, 85, 89, 92, 93, 98, - 99, 100, 102, 102, 103, 104, 104, 103, 104, 102, 71, 67, 66, 64, 63, 62, - 61, 63, 64, 67, 68, 70, 74, 75, 78, 81, 83, 86, 88, 91, 94, 95, 100, - 101, 102, 104, 104, 105, 106, 107, 105, 107, 72, 68, 67, 65, 64, 64, 61, - 63, 65, 67, 68, 71, 73, 75, 78, 79, 84, 85, 88, 91, 93, 97, 98, 102, - 103, 104, 106, 106, 108, 108, 109, 107, 73, 69, 68, 66, 65, 65, 63, 63, - 66, 67, 69, 71, 73, 76, 77, 81, 82, 85, 88, 90, 94, 95, 99, 101, 104, - 105, 106, 109, 108, 110, 111, 112, 74, 70, 70, 67, 66, 66, 64, 63, 66, - 67, 70, 71, 74, 75, 78, 80, 82, 86, 87, 91, 92, 96, 98, 101, 104, 106, - 108, 108, 111, 111, 112, 113, 75, 71, 71, 68, 68, 67, 66, 64, 66, 68, - 70, 71, 74, 75, 79, 79, 84, 84, 88, 90, 93, 95, 98, 101, 103, 107, 108, - 110, 111, 113, 113, 115, 76, 72, 72, 69, 69, 68, 67, 65, 66, 69, 70, 72, - 74, 76, 78, 81, 83, 85, 88, 90, 93, 95, 98, 100, 104, 105, 109, 111, - 112, 113, 116, 115, 78, 74, 74, 70, 70, 69, 69, 66, 66, 70, 70, 74, 74, - 77, 78, 82, 82, 86, 87, 92, 92, 96, 97, 102, 102, 107, 107, 112, 113, - 115, 115, 118, - /* Size 4x8 */ - 31, 47, 60, 66, 40, 45, 54, 61, 46, 56, 64, 64, 48, 61, 75, 73, 54, 65, - 85, 82, 61, 69, 92, 92, 64, 68, 90, 102, 68, 71, 87, 105, - /* Size 8x4 */ - 31, 40, 46, 48, 54, 61, 64, 68, 47, 45, 56, 61, 65, 69, 68, 71, 60, 54, - 64, 75, 85, 92, 90, 87, 66, 61, 64, 73, 82, 92, 102, 105, - /* Size 8x16 */ - 32, 37, 48, 52, 57, 66, 68, 71, 30, 40, 46, 48, 52, 60, 63, 66, 33, 43, - 47, 47, 51, 59, 60, 63, 42, 47, 50, 50, 53, 60, 59, 62, 49, 48, 53, 54, - 57, 62, 62, 62, 49, 46, 53, 61, 64, 69, 66, 66, 50, 46, 54, 64, 67, 73, - 72, 70, 54, 49, 55, 68, 73, 80, 76, 75, 57, 50, 56, 70, 76, 84, 80, 79, - 63, 55, 60, 75, 82, 92, 87, 84, 64, 56, 61, 75, 83, 93, 93, 89, 68, 59, - 64, 74, 86, 94, 98, 94, 70, 62, 66, 73, 83, 96, 99, 98, 72, 64, 66, 75, - 83, 92, 101, 104, 74, 67, 66, 74, 84, 94, 103, 106, 76, 69, 67, 73, 82, - 91, 101, 109, - /* Size 16x8 */ - 32, 30, 33, 42, 49, 49, 50, 54, 57, 63, 64, 68, 70, 72, 74, 76, 37, 40, - 43, 47, 48, 46, 46, 49, 50, 55, 56, 59, 62, 64, 67, 69, 48, 46, 47, 50, - 53, 53, 54, 55, 56, 60, 61, 64, 66, 66, 66, 67, 52, 48, 47, 50, 54, 61, - 64, 68, 70, 75, 75, 74, 73, 75, 74, 73, 57, 52, 51, 53, 57, 64, 67, 73, - 76, 82, 83, 86, 83, 83, 84, 82, 66, 60, 59, 60, 62, 69, 73, 80, 84, 92, - 93, 94, 96, 92, 94, 91, 68, 63, 60, 59, 62, 66, 72, 76, 80, 87, 93, 98, - 99, 101, 103, 101, 71, 66, 63, 62, 62, 66, 70, 75, 79, 84, 89, 94, 98, - 104, 106, 109, - /* Size 16x32 */ - 32, 31, 37, 42, 48, 49, 52, 54, 57, 63, 66, 67, 68, 69, 71, 72, 31, 31, - 38, 42, 47, 47, 50, 52, 54, 60, 63, 64, 65, 66, 67, 68, 30, 32, 40, 42, - 46, 45, 48, 50, 52, 57, 60, 62, 63, 65, 66, 68, 32, 34, 41, 44, 46, 45, - 48, 49, 51, 57, 59, 61, 62, 63, 64, 65, 33, 36, 43, 45, 47, 46, 47, 49, - 51, 56, 59, 60, 60, 62, 63, 65, 37, 40, 47, 47, 47, 45, 47, 48, 50, 54, - 57, 58, 60, 61, 62, 63, 42, 43, 47, 48, 50, 49, 50, 52, 53, 57, 60, 58, - 59, 60, 62, 63, 45, 44, 47, 49, 51, 51, 52, 54, 55, 59, 61, 61, 61, 60, - 61, 61, 49, 46, 48, 50, 53, 53, 54, 55, 57, 60, 62, 63, 62, 63, 62, 62, - 48, 46, 47, 50, 53, 56, 57, 59, 60, 64, 66, 65, 65, 64, 64, 65, 49, 45, - 46, 49, 53, 58, 61, 62, 64, 67, 69, 67, 66, 66, 66, 65, 49, 46, 46, 49, - 53, 59, 62, 64, 65, 69, 71, 70, 68, 68, 67, 68, 50, 46, 46, 50, 54, 59, - 64, 65, 67, 71, 73, 72, 72, 70, 70, 69, 52, 48, 47, 50, 54, 61, 66, 68, - 71, 75, 77, 74, 73, 73, 71, 72, 54, 50, 49, 52, 55, 62, 68, 71, 73, 78, - 80, 78, 76, 74, 75, 73, 55, 51, 49, 52, 56, 63, 69, 72, 75, 80, 82, 80, - 79, 78, 76, 77, 57, 52, 50, 53, 56, 64, 70, 73, 76, 82, 84, 82, 80, 80, - 79, 77, 60, 54, 52, 55, 58, 65, 72, 75, 79, 85, 88, 86, 84, 82, 81, 81, - 63, 57, 55, 58, 60, 67, 75, 78, 82, 89, 92, 88, 87, 85, 84, 81, 64, 58, - 55, 58, 61, 68, 75, 78, 82, 89, 92, 90, 89, 87, 86, 86, 64, 59, 56, 58, - 61, 68, 75, 79, 83, 90, 93, 95, 93, 91, 89, 87, 67, 61, 58, 60, 63, 69, - 76, 79, 85, 92, 95, 96, 94, 92, 91, 91, 68, 62, 59, 60, 64, 71, 74, 78, - 86, 91, 94, 96, 98, 96, 94, 91, 69, 62, 60, 60, 65, 70, 72, 79, 85, 88, - 95, 98, 99, 98, 97, 96, 70, 63, 62, 60, 66, 69, 73, 81, 83, 89, 96, 97, - 99, 101, 98, 97, 71, 64, 63, 61, 67, 68, 74, 79, 82, 90, 93, 98, 102, - 102, 102, 101, 72, 65, 64, 62, 66, 68, 75, 78, 83, 89, 92, 100, 101, - 103, 104, 102, 73, 66, 65, 63, 66, 69, 75, 76, 84, 87, 93, 98, 102, 105, - 106, 107, 74, 67, 67, 64, 66, 70, 74, 77, 84, 86, 94, 96, 103, 105, 106, - 107, 75, 68, 68, 65, 66, 71, 74, 78, 83, 87, 93, 96, 103, 105, 109, 109, - 76, 69, 69, 66, 67, 72, 73, 80, 82, 88, 91, 97, 101, 107, 109, 110, 77, - 70, 70, 67, 67, 73, 73, 81, 81, 90, 90, 99, 99, 108, 108, 113, - /* Size 32x16 */ - 32, 31, 30, 32, 33, 37, 42, 45, 49, 48, 49, 49, 50, 52, 54, 55, 57, 60, - 63, 64, 64, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 31, 31, 32, 34, - 36, 40, 43, 44, 46, 46, 45, 46, 46, 48, 50, 51, 52, 54, 57, 58, 59, 61, - 62, 62, 63, 64, 65, 66, 67, 68, 69, 70, 37, 38, 40, 41, 43, 47, 47, 47, - 48, 47, 46, 46, 46, 47, 49, 49, 50, 52, 55, 55, 56, 58, 59, 60, 62, 63, - 64, 65, 67, 68, 69, 70, 42, 42, 42, 44, 45, 47, 48, 49, 50, 50, 49, 49, - 50, 50, 52, 52, 53, 55, 58, 58, 58, 60, 60, 60, 60, 61, 62, 63, 64, 65, - 66, 67, 48, 47, 46, 46, 47, 47, 50, 51, 53, 53, 53, 53, 54, 54, 55, 56, - 56, 58, 60, 61, 61, 63, 64, 65, 66, 67, 66, 66, 66, 66, 67, 67, 49, 47, - 45, 45, 46, 45, 49, 51, 53, 56, 58, 59, 59, 61, 62, 63, 64, 65, 67, 68, - 68, 69, 71, 70, 69, 68, 68, 69, 70, 71, 72, 73, 52, 50, 48, 48, 47, 47, - 50, 52, 54, 57, 61, 62, 64, 66, 68, 69, 70, 72, 75, 75, 75, 76, 74, 72, - 73, 74, 75, 75, 74, 74, 73, 73, 54, 52, 50, 49, 49, 48, 52, 54, 55, 59, - 62, 64, 65, 68, 71, 72, 73, 75, 78, 78, 79, 79, 78, 79, 81, 79, 78, 76, - 77, 78, 80, 81, 57, 54, 52, 51, 51, 50, 53, 55, 57, 60, 64, 65, 67, 71, - 73, 75, 76, 79, 82, 82, 83, 85, 86, 85, 83, 82, 83, 84, 84, 83, 82, 81, - 63, 60, 57, 57, 56, 54, 57, 59, 60, 64, 67, 69, 71, 75, 78, 80, 82, 85, - 89, 89, 90, 92, 91, 88, 89, 90, 89, 87, 86, 87, 88, 90, 66, 63, 60, 59, - 59, 57, 60, 61, 62, 66, 69, 71, 73, 77, 80, 82, 84, 88, 92, 92, 93, 95, - 94, 95, 96, 93, 92, 93, 94, 93, 91, 90, 67, 64, 62, 61, 60, 58, 58, 61, - 63, 65, 67, 70, 72, 74, 78, 80, 82, 86, 88, 90, 95, 96, 96, 98, 97, 98, - 100, 98, 96, 96, 97, 99, 68, 65, 63, 62, 60, 60, 59, 61, 62, 65, 66, 68, - 72, 73, 76, 79, 80, 84, 87, 89, 93, 94, 98, 99, 99, 102, 101, 102, 103, - 103, 101, 99, 69, 66, 65, 63, 62, 61, 60, 60, 63, 64, 66, 68, 70, 73, - 74, 78, 80, 82, 85, 87, 91, 92, 96, 98, 101, 102, 103, 105, 105, 105, - 107, 108, 71, 67, 66, 64, 63, 62, 62, 61, 62, 64, 66, 67, 70, 71, 75, - 76, 79, 81, 84, 86, 89, 91, 94, 97, 98, 102, 104, 106, 106, 109, 109, - 108, 72, 68, 68, 65, 65, 63, 63, 61, 62, 65, 65, 68, 69, 72, 73, 77, 77, - 81, 81, 86, 87, 91, 91, 96, 97, 101, 102, 107, 107, 109, 110, 113, - /* Size 4x16 */ - 31, 49, 63, 69, 32, 45, 57, 65, 36, 46, 56, 62, 43, 49, 57, 60, 46, 53, - 60, 63, 45, 58, 67, 66, 46, 59, 71, 70, 50, 62, 78, 74, 52, 64, 82, 80, - 57, 67, 89, 85, 59, 68, 90, 91, 62, 71, 91, 96, 63, 69, 89, 101, 65, 68, - 89, 103, 67, 70, 86, 105, 69, 72, 88, 107, - /* Size 16x4 */ - 31, 32, 36, 43, 46, 45, 46, 50, 52, 57, 59, 62, 63, 65, 67, 69, 49, 45, - 46, 49, 53, 58, 59, 62, 64, 67, 68, 71, 69, 68, 70, 72, 63, 57, 56, 57, - 60, 67, 71, 78, 82, 89, 90, 91, 89, 89, 86, 88, 69, 65, 62, 60, 63, 66, - 70, 74, 80, 85, 91, 96, 101, 103, 105, 107, - /* Size 8x32 */ - 32, 37, 48, 52, 57, 66, 68, 71, 31, 38, 47, 50, 54, 63, 65, 67, 30, 40, - 46, 48, 52, 60, 63, 66, 32, 41, 46, 48, 51, 59, 62, 64, 33, 43, 47, 47, - 51, 59, 60, 63, 37, 47, 47, 47, 50, 57, 60, 62, 42, 47, 50, 50, 53, 60, - 59, 62, 45, 47, 51, 52, 55, 61, 61, 61, 49, 48, 53, 54, 57, 62, 62, 62, - 48, 47, 53, 57, 60, 66, 65, 64, 49, 46, 53, 61, 64, 69, 66, 66, 49, 46, - 53, 62, 65, 71, 68, 67, 50, 46, 54, 64, 67, 73, 72, 70, 52, 47, 54, 66, - 71, 77, 73, 71, 54, 49, 55, 68, 73, 80, 76, 75, 55, 49, 56, 69, 75, 82, - 79, 76, 57, 50, 56, 70, 76, 84, 80, 79, 60, 52, 58, 72, 79, 88, 84, 81, - 63, 55, 60, 75, 82, 92, 87, 84, 64, 55, 61, 75, 82, 92, 89, 86, 64, 56, - 61, 75, 83, 93, 93, 89, 67, 58, 63, 76, 85, 95, 94, 91, 68, 59, 64, 74, - 86, 94, 98, 94, 69, 60, 65, 72, 85, 95, 99, 97, 70, 62, 66, 73, 83, 96, - 99, 98, 71, 63, 67, 74, 82, 93, 102, 102, 72, 64, 66, 75, 83, 92, 101, - 104, 73, 65, 66, 75, 84, 93, 102, 106, 74, 67, 66, 74, 84, 94, 103, 106, - 75, 68, 66, 74, 83, 93, 103, 109, 76, 69, 67, 73, 82, 91, 101, 109, 77, - 70, 67, 73, 81, 90, 99, 108, - /* Size 32x8 */ - 32, 31, 30, 32, 33, 37, 42, 45, 49, 48, 49, 49, 50, 52, 54, 55, 57, 60, - 63, 64, 64, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 37, 38, 40, 41, - 43, 47, 47, 47, 48, 47, 46, 46, 46, 47, 49, 49, 50, 52, 55, 55, 56, 58, - 59, 60, 62, 63, 64, 65, 67, 68, 69, 70, 48, 47, 46, 46, 47, 47, 50, 51, - 53, 53, 53, 53, 54, 54, 55, 56, 56, 58, 60, 61, 61, 63, 64, 65, 66, 67, - 66, 66, 66, 66, 67, 67, 52, 50, 48, 48, 47, 47, 50, 52, 54, 57, 61, 62, - 64, 66, 68, 69, 70, 72, 75, 75, 75, 76, 74, 72, 73, 74, 75, 75, 74, 74, - 73, 73, 57, 54, 52, 51, 51, 50, 53, 55, 57, 60, 64, 65, 67, 71, 73, 75, - 76, 79, 82, 82, 83, 85, 86, 85, 83, 82, 83, 84, 84, 83, 82, 81, 66, 63, - 60, 59, 59, 57, 60, 61, 62, 66, 69, 71, 73, 77, 80, 82, 84, 88, 92, 92, - 93, 95, 94, 95, 96, 93, 92, 93, 94, 93, 91, 90, 68, 65, 63, 62, 60, 60, - 59, 61, 62, 65, 66, 68, 72, 73, 76, 79, 80, 84, 87, 89, 93, 94, 98, 99, - 99, 102, 101, 102, 103, 103, 101, 99, 71, 67, 66, 64, 63, 62, 62, 61, - 62, 64, 66, 67, 70, 71, 75, 76, 79, 81, 84, 86, 89, 91, 94, 97, 98, 102, - 104, 106, 106, 109, 109, 108 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 41, 69, 92, 41, 63, 88, 103, 69, 88, 127, 140, 92, 103, 140, 184, - /* Size 8x8 */ - 32, 32, 37, 47, 62, 78, 90, 102, 32, 35, 39, 46, 58, 72, 84, 96, 37, 39, - 51, 60, 71, 84, 93, 100, 47, 46, 60, 73, 87, 100, 106, 113, 62, 58, 71, - 87, 105, 121, 129, 132, 78, 72, 84, 100, 121, 140, 148, 155, 90, 84, 93, - 106, 129, 148, 169, 183, 102, 96, 100, 113, 132, 155, 183, 201, - /* Size 16x16 */ - 32, 31, 31, 32, 36, 39, 47, 54, 61, 71, 80, 86, 92, 98, 104, 111, 31, - 32, 32, 33, 34, 37, 44, 50, 56, 65, 73, 79, 85, 91, 98, 105, 31, 32, 33, - 34, 36, 39, 45, 50, 56, 64, 71, 77, 82, 88, 94, 100, 32, 33, 34, 36, 40, - 42, 47, 51, 57, 65, 71, 76, 80, 85, 91, 98, 36, 34, 36, 40, 48, 50, 56, - 60, 65, 73, 79, 84, 86, 90, 95, 98, 39, 37, 39, 42, 50, 54, 60, 65, 70, - 78, 84, 89, 95, 96, 102, 105, 47, 44, 45, 47, 56, 60, 69, 75, 81, 89, - 95, 100, 102, 104, 109, 112, 54, 50, 50, 51, 60, 65, 75, 82, 89, 97, - 104, 109, 110, 114, 117, 121, 61, 56, 56, 57, 65, 70, 81, 89, 97, 106, - 113, 119, 122, 126, 125, 130, 71, 65, 64, 65, 73, 78, 89, 97, 106, 117, - 125, 131, 134, 134, 136, 141, 80, 73, 71, 71, 79, 84, 95, 104, 113, 125, - 134, 140, 142, 145, 146, 152, 86, 79, 77, 76, 84, 89, 100, 109, 119, - 131, 140, 147, 154, 157, 160, 165, 92, 85, 82, 80, 86, 95, 102, 110, - 122, 134, 142, 154, 162, 168, 174, 178, 98, 91, 88, 85, 90, 96, 104, - 114, 126, 134, 145, 157, 168, 176, 184, 193, 104, 98, 94, 91, 95, 102, - 109, 117, 125, 136, 146, 160, 174, 184, 193, 201, 111, 105, 100, 98, 98, - 105, 112, 121, 130, 141, 152, 165, 178, 193, 201, 210, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 32, 32, 34, 36, 38, 39, 44, 47, 49, 54, 59, 61, 65, - 71, 76, 80, 83, 86, 89, 92, 95, 98, 101, 104, 108, 111, 114, 31, 32, 32, - 32, 32, 32, 33, 34, 35, 37, 38, 42, 45, 47, 51, 56, 58, 62, 68, 72, 76, - 78, 82, 85, 88, 90, 93, 96, 99, 102, 105, 109, 31, 32, 32, 32, 32, 32, - 33, 33, 34, 36, 37, 41, 44, 46, 50, 54, 56, 60, 65, 70, 73, 76, 79, 82, - 85, 88, 91, 95, 98, 101, 105, 109, 31, 32, 32, 32, 32, 33, 33, 34, 35, - 36, 38, 41, 44, 45, 49, 54, 56, 59, 65, 69, 72, 75, 78, 81, 84, 86, 89, - 92, 95, 98, 101, 104, 31, 32, 32, 32, 33, 34, 34, 35, 36, 38, 39, 42, - 45, 46, 50, 54, 56, 59, 64, 68, 71, 74, 77, 79, 82, 85, 88, 91, 94, 97, - 100, 104, 32, 32, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 45, 46, 49, - 53, 55, 58, 63, 66, 69, 72, 74, 78, 81, 84, 87, 90, 93, 96, 99, 102, 32, - 33, 33, 33, 34, 36, 36, 38, 40, 41, 42, 44, 47, 48, 51, 55, 57, 60, 65, - 68, 71, 73, 76, 78, 80, 82, 85, 88, 91, 95, 98, 102, 34, 34, 33, 34, 35, - 37, 38, 39, 42, 44, 45, 47, 50, 51, 54, 58, 60, 63, 68, 71, 74, 76, 79, - 82, 85, 86, 87, 88, 90, 93, 96, 99, 36, 35, 34, 35, 36, 38, 40, 42, 48, - 50, 50, 54, 56, 57, 60, 64, 65, 68, 73, 76, 79, 81, 84, 86, 86, 88, 90, - 93, 95, 97, 98, 100, 38, 37, 36, 36, 38, 39, 41, 44, 50, 51, 52, 56, 58, - 60, 63, 67, 68, 71, 76, 79, 82, 84, 87, 87, 90, 93, 94, 95, 96, 100, - 103, 106, 39, 38, 37, 38, 39, 40, 42, 45, 50, 52, 54, 58, 60, 62, 65, - 69, 70, 73, 78, 81, 84, 86, 89, 92, 95, 95, 96, 99, 102, 104, 105, 106, - 44, 42, 41, 41, 42, 42, 44, 47, 54, 56, 58, 63, 66, 68, 71, 75, 77, 79, - 84, 88, 90, 92, 95, 97, 97, 99, 102, 103, 103, 106, 109, 113, 47, 45, - 44, 44, 45, 45, 47, 50, 56, 58, 60, 66, 69, 71, 75, 79, 81, 84, 89, 92, - 95, 97, 100, 100, 102, 105, 104, 106, 109, 111, 112, 113, 49, 47, 46, - 45, 46, 46, 48, 51, 57, 60, 62, 68, 71, 73, 77, 81, 83, 87, 92, 95, 98, - 100, 103, 105, 107, 106, 109, 112, 112, 113, 117, 120, 54, 51, 50, 49, - 50, 49, 51, 54, 60, 63, 65, 71, 75, 77, 82, 87, 89, 92, 97, 101, 104, - 106, 109, 112, 110, 113, 114, 114, 117, 121, 121, 121, 59, 56, 54, 54, - 54, 53, 55, 58, 64, 67, 69, 75, 79, 81, 87, 92, 94, 98, 103, 107, 110, - 113, 116, 114, 117, 118, 117, 121, 122, 122, 125, 129, 61, 58, 56, 56, - 56, 55, 57, 60, 65, 68, 70, 77, 81, 83, 89, 94, 97, 101, 106, 110, 113, - 116, 119, 120, 122, 121, 126, 124, 125, 130, 130, 130, 65, 62, 60, 59, - 59, 58, 60, 63, 68, 71, 73, 79, 84, 87, 92, 98, 101, 105, 111, 115, 118, - 121, 124, 128, 125, 129, 128, 131, 133, 132, 135, 139, 71, 68, 65, 65, - 64, 63, 65, 68, 73, 76, 78, 84, 89, 92, 97, 103, 106, 111, 117, 122, - 125, 128, 131, 131, 134, 132, 134, 136, 136, 140, 141, 140, 76, 72, 70, - 69, 68, 66, 68, 71, 76, 79, 81, 88, 92, 95, 101, 107, 110, 115, 122, - 127, 130, 133, 136, 136, 138, 139, 141, 140, 145, 143, 146, 151, 80, 76, - 73, 72, 71, 69, 71, 74, 79, 82, 84, 90, 95, 98, 104, 110, 113, 118, 125, - 130, 134, 137, 140, 146, 142, 146, 145, 149, 146, 150, 152, 151, 83, 78, - 76, 75, 74, 72, 73, 76, 81, 84, 86, 92, 97, 100, 106, 113, 116, 121, - 128, 133, 137, 140, 144, 147, 152, 148, 154, 151, 156, 155, 156, 162, - 86, 82, 79, 78, 77, 74, 76, 79, 84, 87, 89, 95, 100, 103, 109, 116, 119, - 124, 131, 136, 140, 144, 147, 150, 154, 159, 157, 160, 160, 162, 165, - 162, 89, 85, 82, 81, 79, 78, 78, 82, 86, 87, 92, 97, 100, 105, 112, 114, - 120, 128, 131, 136, 146, 147, 150, 155, 156, 161, 166, 165, 167, 169, - 169, 175, 92, 88, 85, 84, 82, 81, 80, 85, 86, 90, 95, 97, 102, 107, 110, - 117, 122, 125, 134, 138, 142, 152, 154, 156, 162, 163, 168, 173, 174, - 174, 178, 176, 95, 90, 88, 86, 85, 84, 82, 86, 88, 93, 95, 99, 105, 106, - 113, 118, 121, 129, 132, 139, 146, 148, 159, 161, 163, 169, 170, 176, - 180, 183, 181, 187, 98, 93, 91, 89, 88, 87, 85, 87, 90, 94, 96, 102, - 104, 109, 114, 117, 126, 128, 134, 141, 145, 154, 157, 166, 168, 170, - 176, 178, 184, 188, 193, 188, 101, 96, 95, 92, 91, 90, 88, 88, 93, 95, - 99, 103, 106, 112, 114, 121, 124, 131, 136, 140, 149, 151, 160, 165, - 173, 176, 178, 184, 186, 192, 196, 203, 104, 99, 98, 95, 94, 93, 91, 90, - 95, 96, 102, 103, 109, 112, 117, 122, 125, 133, 136, 145, 146, 156, 160, - 167, 174, 180, 184, 186, 193, 194, 201, 204, 108, 102, 101, 98, 97, 96, - 95, 93, 97, 100, 104, 106, 111, 113, 121, 122, 130, 132, 140, 143, 150, - 155, 162, 169, 174, 183, 188, 192, 194, 201, 202, 210, 111, 105, 105, - 101, 100, 99, 98, 96, 98, 103, 105, 109, 112, 117, 121, 125, 130, 135, - 141, 146, 152, 156, 165, 169, 178, 181, 193, 196, 201, 202, 210, 211, - 114, 109, 109, 104, 104, 102, 102, 99, 100, 106, 106, 113, 113, 120, - 121, 129, 130, 139, 140, 151, 151, 162, 162, 175, 176, 187, 188, 203, - 204, 210, 211, 219, - /* Size 4x8 */ - 32, 42, 69, 88, 33, 42, 64, 83, 36, 56, 77, 88, 46, 67, 93, 105, 60, 79, - 112, 122, 75, 92, 130, 144, 86, 95, 136, 167, 98, 105, 136, 177, - /* Size 8x4 */ - 32, 33, 36, 46, 60, 75, 86, 98, 42, 42, 56, 67, 79, 92, 95, 105, 69, 64, - 77, 93, 112, 130, 136, 136, 88, 83, 88, 105, 122, 144, 167, 177, - /* Size 8x16 */ - 32, 32, 36, 47, 65, 79, 90, 96, 31, 32, 35, 44, 60, 72, 84, 90, 32, 34, - 36, 45, 59, 71, 80, 87, 32, 35, 40, 47, 60, 71, 78, 85, 36, 37, 48, 56, - 68, 78, 83, 87, 39, 40, 50, 60, 73, 84, 91, 94, 47, 45, 56, 69, 84, 95, - 101, 101, 53, 50, 60, 75, 92, 103, 108, 110, 61, 56, 65, 81, 100, 113, - 116, 118, 71, 64, 73, 89, 111, 125, 129, 129, 79, 70, 79, 95, 118, 133, - 142, 138, 86, 76, 84, 100, 124, 140, 153, 150, 92, 82, 89, 101, 121, - 148, 157, 161, 98, 88, 93, 108, 124, 141, 163, 174, 104, 94, 95, 110, - 129, 151, 171, 181, 110, 100, 98, 111, 127, 147, 169, 188, - /* Size 16x8 */ - 32, 31, 32, 32, 36, 39, 47, 53, 61, 71, 79, 86, 92, 98, 104, 110, 32, - 32, 34, 35, 37, 40, 45, 50, 56, 64, 70, 76, 82, 88, 94, 100, 36, 35, 36, - 40, 48, 50, 56, 60, 65, 73, 79, 84, 89, 93, 95, 98, 47, 44, 45, 47, 56, - 60, 69, 75, 81, 89, 95, 100, 101, 108, 110, 111, 65, 60, 59, 60, 68, 73, - 84, 92, 100, 111, 118, 124, 121, 124, 129, 127, 79, 72, 71, 71, 78, 84, - 95, 103, 113, 125, 133, 140, 148, 141, 151, 147, 90, 84, 80, 78, 83, 91, - 101, 108, 116, 129, 142, 153, 157, 163, 171, 169, 96, 90, 87, 85, 87, - 94, 101, 110, 118, 129, 138, 150, 161, 174, 181, 188, - /* Size 16x32 */ - 32, 31, 32, 32, 36, 44, 47, 53, 65, 73, 79, 87, 90, 93, 96, 99, 31, 32, - 32, 33, 35, 42, 45, 51, 62, 69, 75, 83, 86, 88, 91, 94, 31, 32, 32, 33, - 35, 41, 44, 49, 60, 67, 72, 80, 84, 87, 90, 94, 31, 32, 33, 33, 35, 41, - 44, 49, 59, 66, 71, 79, 82, 84, 87, 90, 32, 32, 34, 34, 36, 42, 45, 50, - 59, 65, 71, 78, 80, 83, 87, 90, 32, 33, 35, 36, 38, 42, 45, 49, 58, 64, - 69, 76, 80, 83, 86, 88, 32, 33, 35, 36, 40, 44, 47, 51, 60, 66, 71, 76, - 78, 81, 85, 89, 34, 34, 36, 38, 42, 48, 50, 54, 63, 69, 73, 80, 82, 81, - 84, 86, 36, 34, 37, 40, 48, 54, 56, 60, 68, 74, 78, 84, 83, 86, 87, 87, - 38, 36, 39, 41, 49, 56, 58, 63, 71, 77, 81, 86, 88, 88, 90, 93, 39, 37, - 40, 42, 50, 58, 60, 65, 73, 79, 84, 90, 91, 92, 94, 93, 44, 41, 42, 45, - 53, 63, 66, 71, 79, 85, 90, 96, 94, 96, 96, 99, 47, 44, 45, 47, 56, 66, - 69, 75, 84, 90, 95, 99, 101, 98, 101, 99, 49, 46, 47, 48, 57, 67, 71, - 77, 86, 93, 97, 103, 103, 105, 102, 106, 53, 49, 50, 51, 60, 71, 75, 82, - 92, 99, 103, 111, 108, 107, 110, 107, 58, 54, 54, 55, 63, 75, 79, 87, - 98, 105, 110, 114, 114, 113, 111, 115, 61, 56, 56, 57, 65, 77, 81, 89, - 100, 107, 113, 118, 116, 117, 118, 116, 65, 60, 59, 60, 68, 79, 84, 92, - 105, 112, 118, 126, 124, 122, 121, 124, 71, 65, 64, 65, 73, 84, 89, 97, - 111, 119, 125, 130, 129, 129, 129, 125, 76, 69, 68, 69, 76, 88, 92, 101, - 115, 123, 130, 134, 134, 131, 132, 135, 79, 72, 70, 71, 79, 90, 95, 104, - 118, 127, 133, 143, 142, 141, 138, 136, 82, 75, 73, 74, 81, 92, 97, 106, - 121, 130, 136, 146, 145, 144, 144, 145, 86, 78, 76, 77, 84, 95, 100, - 109, 124, 133, 140, 147, 153, 151, 150, 146, 89, 81, 79, 78, 87, 95, 99, - 112, 124, 130, 145, 152, 156, 157, 156, 158, 92, 84, 82, 80, 89, 95, - 101, 116, 121, 132, 148, 151, 157, 163, 161, 159, 95, 86, 85, 83, 92, - 95, 105, 114, 120, 136, 143, 155, 163, 167, 171, 170, 98, 89, 88, 85, - 93, 95, 108, 113, 124, 136, 141, 160, 163, 169, 174, 171, 101, 92, 91, - 88, 94, 98, 110, 112, 128, 133, 146, 158, 166, 175, 179, 185, 104, 95, - 94, 91, 95, 101, 110, 115, 129, 132, 151, 154, 171, 175, 181, 186, 107, - 98, 97, 94, 96, 105, 110, 119, 128, 136, 149, 156, 173, 177, 188, 192, - 110, 101, 100, 97, 98, 108, 111, 123, 127, 141, 147, 161, 169, 183, 188, - 193, 114, 104, 104, 100, 100, 111, 111, 126, 127, 145, 145, 166, 166, - 189, 190, 201, - /* Size 32x16 */ - 32, 31, 31, 31, 32, 32, 32, 34, 36, 38, 39, 44, 47, 49, 53, 58, 61, 65, - 71, 76, 79, 82, 86, 89, 92, 95, 98, 101, 104, 107, 110, 114, 31, 32, 32, - 32, 32, 33, 33, 34, 34, 36, 37, 41, 44, 46, 49, 54, 56, 60, 65, 69, 72, - 75, 78, 81, 84, 86, 89, 92, 95, 98, 101, 104, 32, 32, 32, 33, 34, 35, - 35, 36, 37, 39, 40, 42, 45, 47, 50, 54, 56, 59, 64, 68, 70, 73, 76, 79, - 82, 85, 88, 91, 94, 97, 100, 104, 32, 33, 33, 33, 34, 36, 36, 38, 40, - 41, 42, 45, 47, 48, 51, 55, 57, 60, 65, 69, 71, 74, 77, 78, 80, 83, 85, - 88, 91, 94, 97, 100, 36, 35, 35, 35, 36, 38, 40, 42, 48, 49, 50, 53, 56, - 57, 60, 63, 65, 68, 73, 76, 79, 81, 84, 87, 89, 92, 93, 94, 95, 96, 98, - 100, 44, 42, 41, 41, 42, 42, 44, 48, 54, 56, 58, 63, 66, 67, 71, 75, 77, - 79, 84, 88, 90, 92, 95, 95, 95, 95, 95, 98, 101, 105, 108, 111, 47, 45, - 44, 44, 45, 45, 47, 50, 56, 58, 60, 66, 69, 71, 75, 79, 81, 84, 89, 92, - 95, 97, 100, 99, 101, 105, 108, 110, 110, 110, 111, 111, 53, 51, 49, 49, - 50, 49, 51, 54, 60, 63, 65, 71, 75, 77, 82, 87, 89, 92, 97, 101, 104, - 106, 109, 112, 116, 114, 113, 112, 115, 119, 123, 126, 65, 62, 60, 59, - 59, 58, 60, 63, 68, 71, 73, 79, 84, 86, 92, 98, 100, 105, 111, 115, 118, - 121, 124, 124, 121, 120, 124, 128, 129, 128, 127, 127, 73, 69, 67, 66, - 65, 64, 66, 69, 74, 77, 79, 85, 90, 93, 99, 105, 107, 112, 119, 123, - 127, 130, 133, 130, 132, 136, 136, 133, 132, 136, 141, 145, 79, 75, 72, - 71, 71, 69, 71, 73, 78, 81, 84, 90, 95, 97, 103, 110, 113, 118, 125, - 130, 133, 136, 140, 145, 148, 143, 141, 146, 151, 149, 147, 145, 87, 83, - 80, 79, 78, 76, 76, 80, 84, 86, 90, 96, 99, 103, 111, 114, 118, 126, - 130, 134, 143, 146, 147, 152, 151, 155, 160, 158, 154, 156, 161, 166, - 90, 86, 84, 82, 80, 80, 78, 82, 83, 88, 91, 94, 101, 103, 108, 114, 116, - 124, 129, 134, 142, 145, 153, 156, 157, 163, 163, 166, 171, 173, 169, - 166, 93, 88, 87, 84, 83, 83, 81, 81, 86, 88, 92, 96, 98, 105, 107, 113, - 117, 122, 129, 131, 141, 144, 151, 157, 163, 167, 169, 175, 175, 177, - 183, 189, 96, 91, 90, 87, 87, 86, 85, 84, 87, 90, 94, 96, 101, 102, 110, - 111, 118, 121, 129, 132, 138, 144, 150, 156, 161, 171, 174, 179, 181, - 188, 188, 190, 99, 94, 94, 90, 90, 88, 89, 86, 87, 93, 93, 99, 99, 106, - 107, 115, 116, 124, 125, 135, 136, 145, 146, 158, 159, 170, 171, 185, - 186, 192, 193, 201, - /* Size 4x16 */ - 31, 44, 73, 93, 32, 41, 67, 87, 32, 42, 65, 83, 33, 44, 66, 81, 34, 54, - 74, 86, 37, 58, 79, 92, 44, 66, 90, 98, 49, 71, 99, 107, 56, 77, 107, - 117, 65, 84, 119, 129, 72, 90, 127, 141, 78, 95, 133, 151, 84, 95, 132, - 163, 89, 95, 136, 169, 95, 101, 132, 175, 101, 108, 141, 183, - /* Size 16x4 */ - 31, 32, 32, 33, 34, 37, 44, 49, 56, 65, 72, 78, 84, 89, 95, 101, 44, 41, - 42, 44, 54, 58, 66, 71, 77, 84, 90, 95, 95, 95, 101, 108, 73, 67, 65, - 66, 74, 79, 90, 99, 107, 119, 127, 133, 132, 136, 132, 141, 93, 87, 83, - 81, 86, 92, 98, 107, 117, 129, 141, 151, 163, 169, 175, 183, - /* Size 8x32 */ - 32, 32, 36, 47, 65, 79, 90, 96, 31, 32, 35, 45, 62, 75, 86, 91, 31, 32, - 35, 44, 60, 72, 84, 90, 31, 33, 35, 44, 59, 71, 82, 87, 32, 34, 36, 45, - 59, 71, 80, 87, 32, 35, 38, 45, 58, 69, 80, 86, 32, 35, 40, 47, 60, 71, - 78, 85, 34, 36, 42, 50, 63, 73, 82, 84, 36, 37, 48, 56, 68, 78, 83, 87, - 38, 39, 49, 58, 71, 81, 88, 90, 39, 40, 50, 60, 73, 84, 91, 94, 44, 42, - 53, 66, 79, 90, 94, 96, 47, 45, 56, 69, 84, 95, 101, 101, 49, 47, 57, - 71, 86, 97, 103, 102, 53, 50, 60, 75, 92, 103, 108, 110, 58, 54, 63, 79, - 98, 110, 114, 111, 61, 56, 65, 81, 100, 113, 116, 118, 65, 59, 68, 84, - 105, 118, 124, 121, 71, 64, 73, 89, 111, 125, 129, 129, 76, 68, 76, 92, - 115, 130, 134, 132, 79, 70, 79, 95, 118, 133, 142, 138, 82, 73, 81, 97, - 121, 136, 145, 144, 86, 76, 84, 100, 124, 140, 153, 150, 89, 79, 87, 99, - 124, 145, 156, 156, 92, 82, 89, 101, 121, 148, 157, 161, 95, 85, 92, - 105, 120, 143, 163, 171, 98, 88, 93, 108, 124, 141, 163, 174, 101, 91, - 94, 110, 128, 146, 166, 179, 104, 94, 95, 110, 129, 151, 171, 181, 107, - 97, 96, 110, 128, 149, 173, 188, 110, 100, 98, 111, 127, 147, 169, 188, - 114, 104, 100, 111, 127, 145, 166, 190, - /* Size 32x8 */ - 32, 31, 31, 31, 32, 32, 32, 34, 36, 38, 39, 44, 47, 49, 53, 58, 61, 65, - 71, 76, 79, 82, 86, 89, 92, 95, 98, 101, 104, 107, 110, 114, 32, 32, 32, - 33, 34, 35, 35, 36, 37, 39, 40, 42, 45, 47, 50, 54, 56, 59, 64, 68, 70, - 73, 76, 79, 82, 85, 88, 91, 94, 97, 100, 104, 36, 35, 35, 35, 36, 38, - 40, 42, 48, 49, 50, 53, 56, 57, 60, 63, 65, 68, 73, 76, 79, 81, 84, 87, - 89, 92, 93, 94, 95, 96, 98, 100, 47, 45, 44, 44, 45, 45, 47, 50, 56, 58, - 60, 66, 69, 71, 75, 79, 81, 84, 89, 92, 95, 97, 100, 99, 101, 105, 108, - 110, 110, 110, 111, 111, 65, 62, 60, 59, 59, 58, 60, 63, 68, 71, 73, 79, - 84, 86, 92, 98, 100, 105, 111, 115, 118, 121, 124, 124, 121, 120, 124, - 128, 129, 128, 127, 127, 79, 75, 72, 71, 71, 69, 71, 73, 78, 81, 84, 90, - 95, 97, 103, 110, 113, 118, 125, 130, 133, 136, 140, 145, 148, 143, 141, - 146, 151, 149, 147, 145, 90, 86, 84, 82, 80, 80, 78, 82, 83, 88, 91, 94, - 101, 103, 108, 114, 116, 124, 129, 134, 142, 145, 153, 156, 157, 163, - 163, 166, 171, 173, 169, 166, 96, 91, 90, 87, 87, 86, 85, 84, 87, 90, - 94, 96, 101, 102, 110, 111, 118, 121, 129, 132, 138, 144, 150, 156, 161, - 171, 174, 179, 181, 188, 188, 190 }, - { /* Chroma */ - /* Size 4x4 */ - 33, 45, 56, 64, 45, 58, 66, 69, 56, 66, 86, 87, 64, 69, 87, 105, - /* Size 8x8 */ - 31, 38, 47, 48, 54, 61, 66, 69, 38, 47, 47, 46, 50, 55, 61, 65, 47, 47, - 53, 55, 58, 63, 65, 66, 48, 46, 55, 62, 67, 72, 73, 73, 54, 50, 58, 67, - 76, 83, 84, 82, 61, 55, 63, 72, 83, 91, 92, 92, 66, 61, 65, 73, 84, 92, - 101, 103, 69, 65, 66, 73, 82, 92, 103, 109, - /* Size 16x16 */ - 32, 30, 33, 38, 49, 48, 50, 52, 55, 60, 63, 66, 68, 70, 72, 74, 30, 31, - 35, 41, 46, 46, 46, 48, 51, 55, 58, 60, 63, 65, 68, 70, 33, 35, 39, 44, - 47, 46, 46, 47, 50, 53, 56, 58, 60, 62, 65, 67, 38, 41, 44, 47, 49, 48, - 47, 48, 50, 53, 55, 58, 58, 60, 62, 65, 49, 46, 47, 49, 53, 53, 54, 54, - 56, 58, 60, 62, 62, 63, 64, 64, 48, 46, 46, 48, 53, 54, 56, 57, 59, 61, - 63, 65, 67, 66, 68, 68, 50, 46, 46, 47, 54, 56, 61, 63, 65, 68, 70, 72, - 71, 71, 72, 72, 52, 48, 47, 48, 54, 57, 63, 66, 69, 72, 75, 76, 75, 76, - 76, 76, 55, 51, 50, 50, 56, 59, 65, 69, 73, 77, 79, 81, 81, 81, 80, 80, - 60, 55, 53, 53, 58, 61, 68, 72, 77, 82, 85, 87, 87, 85, 84, 85, 63, 58, - 56, 55, 60, 63, 70, 75, 79, 85, 89, 91, 91, 90, 89, 90, 66, 60, 58, 58, - 62, 65, 72, 76, 81, 87, 91, 94, 96, 95, 95, 95, 68, 63, 60, 58, 62, 67, - 71, 75, 81, 87, 91, 96, 99, 100, 100, 100, 70, 65, 62, 60, 63, 66, 71, - 76, 81, 85, 90, 95, 100, 103, 104, 105, 72, 68, 65, 62, 64, 68, 72, 76, - 80, 84, 89, 95, 100, 104, 107, 108, 74, 70, 67, 65, 64, 68, 72, 76, 80, - 85, 90, 95, 100, 105, 108, 111, - /* Size 32x32 */ - 32, 31, 30, 31, 33, 36, 38, 41, 49, 49, 48, 49, 50, 51, 52, 54, 55, 57, - 60, 62, 63, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 31, 31, 31, 32, - 34, 38, 40, 42, 47, 47, 47, 47, 48, 48, 50, 52, 53, 54, 57, 59, 60, 61, - 63, 64, 65, 66, 67, 67, 68, 69, 70, 71, 30, 31, 31, 32, 35, 39, 41, 42, - 46, 46, 46, 45, 46, 47, 48, 50, 51, 52, 55, 57, 58, 59, 60, 62, 63, 64, - 65, 67, 68, 69, 70, 71, 31, 32, 32, 33, 36, 40, 41, 43, 46, 46, 45, 45, - 46, 46, 47, 49, 50, 51, 54, 56, 57, 58, 59, 61, 62, 63, 63, 64, 65, 66, - 67, 68, 33, 34, 35, 36, 39, 43, 44, 45, 47, 46, 46, 45, 46, 47, 47, 49, - 50, 51, 53, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 36, 38, - 39, 40, 43, 47, 47, 47, 48, 47, 46, 45, 46, 46, 47, 48, 49, 50, 52, 53, - 54, 55, 56, 58, 59, 61, 62, 63, 64, 65, 66, 66, 38, 40, 41, 41, 44, 47, - 47, 48, 49, 48, 48, 47, 47, 47, 48, 49, 50, 51, 53, 54, 55, 56, 58, 58, - 58, 59, 60, 61, 62, 64, 65, 66, 41, 42, 42, 43, 45, 47, 48, 48, 50, 50, - 49, 49, 50, 50, 50, 52, 52, 53, 55, 56, 57, 58, 59, 60, 61, 61, 61, 61, - 62, 63, 63, 64, 49, 47, 46, 46, 47, 48, 49, 50, 53, 53, 53, 53, 54, 54, - 54, 55, 56, 56, 58, 59, 60, 61, 62, 63, 62, 62, 63, 64, 64, 64, 64, 64, - 49, 47, 46, 46, 46, 47, 48, 50, 53, 53, 54, 55, 55, 55, 56, 57, 58, 58, - 60, 61, 62, 63, 64, 64, 64, 65, 65, 65, 65, 66, 67, 68, 48, 47, 46, 45, - 46, 46, 48, 49, 53, 54, 54, 55, 56, 56, 57, 58, 59, 60, 61, 63, 63, 64, - 65, 66, 67, 66, 66, 67, 68, 68, 68, 68, 49, 47, 45, 45, 45, 45, 47, 49, - 53, 55, 55, 58, 59, 60, 61, 62, 63, 63, 65, 66, 67, 68, 69, 69, 68, 68, - 69, 69, 69, 69, 70, 71, 50, 48, 46, 46, 46, 46, 47, 50, 54, 55, 56, 59, - 61, 61, 63, 64, 65, 66, 68, 69, 70, 71, 72, 71, 71, 72, 71, 71, 72, 72, - 72, 71, 51, 48, 47, 46, 47, 46, 47, 50, 54, 55, 56, 60, 61, 62, 64, 66, - 66, 67, 69, 70, 71, 72, 73, 73, 74, 73, 73, 74, 73, 73, 74, 75, 52, 50, - 48, 47, 47, 47, 48, 50, 54, 56, 57, 61, 63, 64, 66, 68, 69, 70, 72, 74, - 75, 75, 76, 77, 75, 76, 76, 75, 76, 77, 76, 75, 54, 52, 50, 49, 49, 48, - 49, 52, 55, 57, 58, 62, 64, 66, 68, 71, 72, 73, 75, 77, 78, 79, 80, 78, - 79, 78, 77, 78, 78, 77, 78, 79, 55, 53, 51, 50, 50, 49, 50, 52, 56, 58, - 59, 63, 65, 66, 69, 72, 73, 74, 77, 78, 79, 80, 81, 81, 81, 80, 81, 80, - 80, 81, 80, 79, 57, 54, 52, 51, 51, 50, 51, 53, 56, 58, 60, 63, 66, 67, - 70, 73, 74, 76, 79, 80, 82, 83, 84, 85, 83, 84, 83, 83, 83, 82, 82, 83, - 60, 57, 55, 54, 53, 52, 53, 55, 58, 60, 61, 65, 68, 69, 72, 75, 77, 79, - 82, 84, 85, 86, 87, 86, 87, 85, 85, 85, 84, 86, 85, 84, 62, 59, 57, 56, - 55, 53, 54, 56, 59, 61, 63, 66, 69, 70, 74, 77, 78, 80, 84, 86, 87, 88, - 90, 89, 89, 88, 88, 87, 88, 87, 87, 88, 63, 60, 58, 57, 56, 54, 55, 57, - 60, 62, 63, 67, 70, 71, 75, 78, 79, 82, 85, 87, 89, 90, 91, 93, 91, 91, - 90, 91, 89, 90, 90, 89, 65, 61, 59, 58, 57, 55, 56, 58, 61, 63, 64, 68, - 71, 72, 75, 79, 80, 83, 86, 88, 90, 91, 93, 94, 95, 92, 94, 92, 93, 92, - 91, 93, 66, 63, 60, 59, 58, 56, 58, 59, 62, 64, 65, 69, 72, 73, 76, 80, - 81, 84, 87, 90, 91, 93, 94, 95, 96, 97, 95, 95, 95, 95, 95, 93, 67, 64, - 62, 61, 59, 58, 58, 60, 63, 64, 66, 69, 71, 73, 77, 78, 81, 85, 86, 89, - 93, 94, 95, 97, 97, 98, 99, 97, 97, 97, 96, 98, 68, 65, 63, 62, 60, 59, - 58, 61, 62, 64, 67, 68, 71, 74, 75, 79, 81, 83, 87, 89, 91, 95, 96, 97, - 99, 98, 100, 100, 100, 99, 100, 98, 69, 66, 64, 63, 61, 61, 59, 61, 62, - 65, 66, 68, 72, 73, 76, 78, 80, 84, 85, 88, 91, 92, 97, 98, 98, 101, - 100, 102, 102, 103, 101, 102, 70, 67, 65, 63, 62, 62, 60, 61, 63, 65, - 66, 69, 71, 73, 76, 77, 81, 83, 85, 88, 90, 94, 95, 99, 100, 100, 103, - 102, 104, 104, 105, 103, 71, 67, 67, 64, 63, 63, 61, 61, 64, 65, 67, 69, - 71, 74, 75, 78, 80, 83, 85, 87, 91, 92, 95, 97, 100, 102, 102, 105, 104, - 106, 106, 108, 72, 68, 68, 65, 65, 64, 62, 62, 64, 65, 68, 69, 72, 73, - 76, 78, 80, 83, 84, 88, 89, 93, 95, 97, 100, 102, 104, 104, 107, 106, - 108, 108, 73, 69, 69, 66, 66, 65, 64, 63, 64, 66, 68, 69, 72, 73, 77, - 77, 81, 82, 86, 87, 90, 92, 95, 97, 99, 103, 104, 106, 106, 109, 108, - 110, 74, 70, 70, 67, 67, 66, 65, 63, 64, 67, 68, 70, 72, 74, 76, 78, 80, - 82, 85, 87, 90, 91, 95, 96, 100, 101, 105, 106, 108, 108, 111, 110, 75, - 71, 71, 68, 68, 66, 66, 64, 64, 68, 68, 71, 71, 75, 75, 79, 79, 83, 84, - 88, 89, 93, 93, 98, 98, 102, 103, 108, 108, 110, 110, 113, - /* Size 4x8 */ - 31, 47, 57, 65, 40, 45, 52, 61, 46, 55, 61, 63, 47, 60, 70, 72, 52, 64, - 79, 81, 59, 68, 87, 90, 63, 66, 88, 99, 66, 69, 85, 102, - /* Size 8x4 */ - 31, 40, 46, 47, 52, 59, 63, 66, 47, 45, 55, 60, 64, 68, 66, 69, 57, 52, - 61, 70, 79, 87, 88, 85, 65, 61, 63, 72, 81, 90, 99, 102, - /* Size 8x16 */ - 32, 35, 48, 50, 57, 63, 68, 70, 30, 38, 46, 46, 52, 58, 63, 65, 33, 41, - 47, 46, 51, 56, 60, 63, 39, 46, 48, 47, 51, 55, 58, 61, 49, 48, 53, 54, - 57, 60, 61, 61, 48, 46, 53, 56, 60, 64, 65, 65, 50, 46, 54, 61, 66, 70, - 71, 69, 52, 47, 54, 63, 71, 75, 75, 74, 55, 49, 56, 65, 74, 79, 79, 78, - 60, 53, 58, 68, 79, 85, 85, 82, 63, 55, 60, 70, 82, 89, 91, 87, 66, 58, - 62, 72, 84, 91, 95, 91, 68, 60, 64, 71, 81, 94, 97, 96, 70, 62, 65, 73, - 81, 89, 98, 101, 72, 65, 65, 72, 82, 92, 100, 103, 74, 67, 65, 71, 79, - 89, 98, 105, - /* Size 16x8 */ - 32, 30, 33, 39, 49, 48, 50, 52, 55, 60, 63, 66, 68, 70, 72, 74, 35, 38, - 41, 46, 48, 46, 46, 47, 49, 53, 55, 58, 60, 62, 65, 67, 48, 46, 47, 48, - 53, 53, 54, 54, 56, 58, 60, 62, 64, 65, 65, 65, 50, 46, 46, 47, 54, 56, - 61, 63, 65, 68, 70, 72, 71, 73, 72, 71, 57, 52, 51, 51, 57, 60, 66, 71, - 74, 79, 82, 84, 81, 81, 82, 79, 63, 58, 56, 55, 60, 64, 70, 75, 79, 85, - 89, 91, 94, 89, 92, 89, 68, 63, 60, 58, 61, 65, 71, 75, 79, 85, 91, 95, - 97, 98, 100, 98, 70, 65, 63, 61, 61, 65, 69, 74, 78, 82, 87, 91, 96, - 101, 103, 105, - /* Size 16x32 */ - 32, 31, 35, 38, 48, 49, 50, 52, 57, 61, 63, 67, 68, 69, 70, 71, 31, 31, - 37, 40, 47, 47, 48, 50, 54, 57, 60, 63, 64, 65, 66, 67, 30, 32, 38, 40, - 46, 45, 46, 48, 52, 55, 58, 61, 63, 64, 65, 67, 31, 33, 38, 41, 46, 45, - 46, 48, 52, 55, 57, 60, 61, 62, 63, 64, 33, 36, 41, 44, 47, 46, 46, 47, - 51, 54, 56, 59, 60, 61, 63, 64, 37, 40, 45, 47, 47, 45, 46, 47, 50, 52, - 54, 57, 59, 61, 62, 62, 39, 41, 46, 47, 48, 47, 47, 48, 51, 54, 55, 57, - 58, 59, 61, 62, 42, 43, 46, 48, 50, 49, 50, 50, 53, 56, 57, 60, 60, 59, - 60, 60, 49, 46, 48, 49, 53, 53, 54, 54, 57, 59, 60, 63, 61, 62, 61, 61, - 48, 46, 47, 48, 53, 55, 55, 56, 58, 61, 62, 64, 64, 63, 63, 64, 48, 46, - 46, 48, 53, 56, 56, 57, 60, 62, 64, 66, 65, 65, 65, 64, 49, 45, 45, 47, - 53, 58, 59, 61, 64, 66, 67, 69, 67, 67, 66, 67, 50, 46, 46, 48, 54, 59, - 61, 63, 66, 68, 70, 71, 71, 68, 69, 67, 51, 47, 47, 48, 54, 60, 61, 64, - 68, 70, 71, 73, 72, 72, 70, 71, 52, 48, 47, 48, 54, 61, 63, 66, 71, 73, - 75, 77, 75, 73, 74, 71, 54, 50, 49, 50, 55, 62, 65, 68, 73, 76, 78, 79, - 78, 76, 74, 75, 55, 51, 49, 50, 56, 63, 65, 69, 74, 77, 79, 81, 79, 78, - 78, 75, 57, 52, 50, 51, 56, 64, 66, 70, 76, 79, 82, 85, 83, 81, 79, 79, - 60, 54, 53, 53, 58, 65, 68, 72, 79, 82, 85, 87, 85, 84, 82, 80, 62, 56, - 54, 55, 60, 66, 69, 74, 81, 84, 87, 88, 87, 85, 84, 84, 63, 57, 55, 56, - 60, 67, 70, 75, 82, 86, 89, 92, 91, 89, 87, 84, 64, 59, 56, 57, 61, 68, - 71, 75, 83, 87, 90, 93, 92, 90, 89, 89, 66, 60, 58, 58, 62, 69, 72, 76, - 84, 88, 91, 94, 95, 93, 91, 89, 67, 61, 59, 58, 63, 68, 71, 78, 83, 86, - 93, 96, 96, 96, 94, 94, 68, 62, 60, 59, 64, 67, 71, 79, 81, 86, 94, 95, - 97, 98, 96, 94, 69, 63, 61, 60, 65, 66, 72, 77, 80, 88, 91, 96, 99, 99, - 100, 98, 70, 64, 62, 60, 65, 66, 73, 76, 81, 87, 89, 97, 98, 100, 101, - 99, 71, 65, 64, 61, 65, 67, 73, 74, 82, 85, 90, 95, 99, 102, 103, 104, - 72, 65, 65, 62, 65, 68, 72, 75, 82, 83, 92, 93, 100, 102, 103, 104, 73, - 66, 66, 63, 65, 69, 72, 76, 81, 85, 90, 93, 100, 102, 105, 106, 74, 67, - 67, 64, 65, 70, 71, 77, 79, 86, 89, 94, 98, 103, 105, 106, 75, 68, 68, - 65, 65, 71, 71, 78, 78, 87, 87, 96, 96, 105, 105, 109, - /* Size 32x16 */ - 32, 31, 30, 31, 33, 37, 39, 42, 49, 48, 48, 49, 50, 51, 52, 54, 55, 57, - 60, 62, 63, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 31, 31, 32, 33, - 36, 40, 41, 43, 46, 46, 46, 45, 46, 47, 48, 50, 51, 52, 54, 56, 57, 59, - 60, 61, 62, 63, 64, 65, 65, 66, 67, 68, 35, 37, 38, 38, 41, 45, 46, 46, - 48, 47, 46, 45, 46, 47, 47, 49, 49, 50, 53, 54, 55, 56, 58, 59, 60, 61, - 62, 64, 65, 66, 67, 68, 38, 40, 40, 41, 44, 47, 47, 48, 49, 48, 48, 47, - 48, 48, 48, 50, 50, 51, 53, 55, 56, 57, 58, 58, 59, 60, 60, 61, 62, 63, - 64, 65, 48, 47, 46, 46, 47, 47, 48, 50, 53, 53, 53, 53, 54, 54, 54, 55, - 56, 56, 58, 60, 60, 61, 62, 63, 64, 65, 65, 65, 65, 65, 65, 65, 49, 47, - 45, 45, 46, 45, 47, 49, 53, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66, - 67, 68, 69, 68, 67, 66, 66, 67, 68, 69, 70, 71, 50, 48, 46, 46, 46, 46, - 47, 50, 54, 55, 56, 59, 61, 61, 63, 65, 65, 66, 68, 69, 70, 71, 72, 71, - 71, 72, 73, 73, 72, 72, 71, 71, 52, 50, 48, 48, 47, 47, 48, 50, 54, 56, - 57, 61, 63, 64, 66, 68, 69, 70, 72, 74, 75, 75, 76, 78, 79, 77, 76, 74, - 75, 76, 77, 78, 57, 54, 52, 52, 51, 50, 51, 53, 57, 58, 60, 64, 66, 68, - 71, 73, 74, 76, 79, 81, 82, 83, 84, 83, 81, 80, 81, 82, 82, 81, 79, 78, - 61, 57, 55, 55, 54, 52, 54, 56, 59, 61, 62, 66, 68, 70, 73, 76, 77, 79, - 82, 84, 86, 87, 88, 86, 86, 88, 87, 85, 83, 85, 86, 87, 63, 60, 58, 57, - 56, 54, 55, 57, 60, 62, 64, 67, 70, 71, 75, 78, 79, 82, 85, 87, 89, 90, - 91, 93, 94, 91, 89, 90, 92, 90, 89, 87, 67, 63, 61, 60, 59, 57, 57, 60, - 63, 64, 66, 69, 71, 73, 77, 79, 81, 85, 87, 88, 92, 93, 94, 96, 95, 96, - 97, 95, 93, 93, 94, 96, 68, 64, 63, 61, 60, 59, 58, 60, 61, 64, 65, 67, - 71, 72, 75, 78, 79, 83, 85, 87, 91, 92, 95, 96, 97, 99, 98, 99, 100, - 100, 98, 96, 69, 65, 64, 62, 61, 61, 59, 59, 62, 63, 65, 67, 68, 72, 73, - 76, 78, 81, 84, 85, 89, 90, 93, 96, 98, 99, 100, 102, 102, 102, 103, - 105, 70, 66, 65, 63, 63, 62, 61, 60, 61, 63, 65, 66, 69, 70, 74, 74, 78, - 79, 82, 84, 87, 89, 91, 94, 96, 100, 101, 103, 103, 105, 105, 105, 71, - 67, 67, 64, 64, 62, 62, 60, 61, 64, 64, 67, 67, 71, 71, 75, 75, 79, 80, - 84, 84, 89, 89, 94, 94, 98, 99, 104, 104, 106, 106, 109, - /* Size 4x16 */ - 31, 49, 61, 69, 32, 45, 55, 64, 36, 46, 54, 61, 41, 47, 54, 59, 46, 53, - 59, 62, 46, 56, 62, 65, 46, 59, 68, 68, 48, 61, 73, 73, 51, 63, 77, 78, - 54, 65, 82, 84, 57, 67, 86, 89, 60, 69, 88, 93, 62, 67, 86, 98, 64, 66, - 87, 100, 65, 68, 83, 102, 67, 70, 86, 103, - /* Size 16x4 */ - 31, 32, 36, 41, 46, 46, 46, 48, 51, 54, 57, 60, 62, 64, 65, 67, 49, 45, - 46, 47, 53, 56, 59, 61, 63, 65, 67, 69, 67, 66, 68, 70, 61, 55, 54, 54, - 59, 62, 68, 73, 77, 82, 86, 88, 86, 87, 83, 86, 69, 64, 61, 59, 62, 65, - 68, 73, 78, 84, 89, 93, 98, 100, 102, 103, - /* Size 8x32 */ - 32, 35, 48, 50, 57, 63, 68, 70, 31, 37, 47, 48, 54, 60, 64, 66, 30, 38, - 46, 46, 52, 58, 63, 65, 31, 38, 46, 46, 52, 57, 61, 63, 33, 41, 47, 46, - 51, 56, 60, 63, 37, 45, 47, 46, 50, 54, 59, 62, 39, 46, 48, 47, 51, 55, - 58, 61, 42, 46, 50, 50, 53, 57, 60, 60, 49, 48, 53, 54, 57, 60, 61, 61, - 48, 47, 53, 55, 58, 62, 64, 63, 48, 46, 53, 56, 60, 64, 65, 65, 49, 45, - 53, 59, 64, 67, 67, 66, 50, 46, 54, 61, 66, 70, 71, 69, 51, 47, 54, 61, - 68, 71, 72, 70, 52, 47, 54, 63, 71, 75, 75, 74, 54, 49, 55, 65, 73, 78, - 78, 74, 55, 49, 56, 65, 74, 79, 79, 78, 57, 50, 56, 66, 76, 82, 83, 79, - 60, 53, 58, 68, 79, 85, 85, 82, 62, 54, 60, 69, 81, 87, 87, 84, 63, 55, - 60, 70, 82, 89, 91, 87, 64, 56, 61, 71, 83, 90, 92, 89, 66, 58, 62, 72, - 84, 91, 95, 91, 67, 59, 63, 71, 83, 93, 96, 94, 68, 60, 64, 71, 81, 94, - 97, 96, 69, 61, 65, 72, 80, 91, 99, 100, 70, 62, 65, 73, 81, 89, 98, - 101, 71, 64, 65, 73, 82, 90, 99, 103, 72, 65, 65, 72, 82, 92, 100, 103, - 73, 66, 65, 72, 81, 90, 100, 105, 74, 67, 65, 71, 79, 89, 98, 105, 75, - 68, 65, 71, 78, 87, 96, 105, - /* Size 32x8 */ - 32, 31, 30, 31, 33, 37, 39, 42, 49, 48, 48, 49, 50, 51, 52, 54, 55, 57, - 60, 62, 63, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 35, 37, 38, 38, - 41, 45, 46, 46, 48, 47, 46, 45, 46, 47, 47, 49, 49, 50, 53, 54, 55, 56, - 58, 59, 60, 61, 62, 64, 65, 66, 67, 68, 48, 47, 46, 46, 47, 47, 48, 50, - 53, 53, 53, 53, 54, 54, 54, 55, 56, 56, 58, 60, 60, 61, 62, 63, 64, 65, - 65, 65, 65, 65, 65, 65, 50, 48, 46, 46, 46, 46, 47, 50, 54, 55, 56, 59, - 61, 61, 63, 65, 65, 66, 68, 69, 70, 71, 72, 71, 71, 72, 73, 73, 72, 72, - 71, 71, 57, 54, 52, 52, 51, 50, 51, 53, 57, 58, 60, 64, 66, 68, 71, 73, - 74, 76, 79, 81, 82, 83, 84, 83, 81, 80, 81, 82, 82, 81, 79, 78, 63, 60, - 58, 57, 56, 54, 55, 57, 60, 62, 64, 67, 70, 71, 75, 78, 79, 82, 85, 87, - 89, 90, 91, 93, 94, 91, 89, 90, 92, 90, 89, 87, 68, 64, 63, 61, 60, 59, - 58, 60, 61, 64, 65, 67, 71, 72, 75, 78, 79, 83, 85, 87, 91, 92, 95, 96, - 97, 99, 98, 99, 100, 100, 98, 96, 70, 66, 65, 63, 63, 62, 61, 60, 61, - 63, 65, 66, 69, 70, 74, 74, 78, 79, 82, 84, 87, 89, 91, 94, 96, 100, - 101, 103, 103, 105, 105, 105 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 38, 63, 86, 38, 56, 78, 97, 63, 78, 113, 130, 86, 97, 130, 169, - /* Size 8x8 */ - 32, 32, 35, 46, 57, 76, 85, 96, 32, 34, 37, 45, 54, 70, 79, 90, 35, 37, - 48, 56, 64, 79, 87, 93, 46, 45, 56, 70, 80, 96, 100, 105, 57, 54, 64, - 80, 93, 111, 121, 122, 76, 70, 79, 96, 111, 134, 138, 144, 85, 79, 87, - 100, 121, 138, 156, 168, 96, 90, 93, 105, 122, 144, 168, 184, - /* Size 16x16 */ - 32, 31, 31, 32, 34, 39, 44, 49, 58, 65, 71, 81, 87, 93, 98, 104, 31, 32, - 32, 32, 34, 38, 41, 46, 54, 60, 66, 75, 81, 86, 92, 98, 31, 32, 33, 34, - 36, 39, 42, 46, 53, 59, 64, 73, 78, 83, 88, 94, 32, 32, 34, 35, 37, 40, - 42, 46, 52, 58, 63, 71, 75, 80, 86, 92, 34, 34, 36, 37, 42, 47, 50, 53, - 59, 65, 70, 77, 82, 85, 89, 92, 39, 38, 39, 40, 47, 54, 58, 62, 68, 73, - 78, 85, 90, 90, 96, 98, 44, 41, 42, 42, 50, 58, 63, 68, 74, 79, 84, 91, - 96, 98, 102, 104, 49, 46, 46, 46, 53, 62, 68, 73, 81, 87, 92, 99, 103, - 107, 109, 112, 58, 54, 53, 52, 59, 68, 74, 81, 90, 97, 102, 110, 114, - 118, 117, 121, 65, 60, 59, 58, 65, 73, 79, 87, 97, 105, 111, 120, 125, - 125, 126, 130, 71, 66, 64, 63, 70, 78, 84, 92, 102, 111, 117, 127, 133, - 134, 136, 141, 81, 75, 73, 71, 77, 85, 91, 99, 110, 120, 127, 137, 143, - 145, 148, 152, 87, 81, 78, 75, 82, 90, 96, 103, 114, 125, 133, 143, 150, - 156, 160, 163, 93, 86, 83, 80, 85, 90, 98, 107, 118, 125, 134, 145, 156, - 163, 169, 177, 98, 92, 88, 86, 89, 96, 102, 109, 117, 126, 136, 148, - 160, 169, 176, 184, 104, 98, 94, 92, 92, 98, 104, 112, 121, 130, 141, - 152, 163, 177, 184, 191, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 32, 32, 34, 34, 36, 39, 41, 44, 48, 49, 54, 58, 59, - 65, 69, 71, 80, 81, 83, 87, 90, 93, 95, 98, 101, 104, 107, 31, 32, 32, - 32, 32, 32, 32, 34, 34, 35, 38, 39, 42, 46, 47, 51, 55, 57, 62, 66, 68, - 76, 77, 78, 83, 85, 88, 90, 93, 96, 99, 101, 31, 32, 32, 32, 32, 32, 32, - 33, 34, 34, 38, 39, 41, 45, 46, 50, 54, 55, 60, 64, 66, 73, 75, 76, 81, - 83, 86, 89, 92, 95, 98, 101, 31, 32, 32, 32, 32, 32, 32, 33, 34, 34, 37, - 38, 41, 44, 45, 49, 53, 54, 59, 63, 65, 72, 74, 75, 79, 81, 84, 86, 89, - 91, 94, 97, 31, 32, 32, 32, 33, 33, 34, 35, 36, 36, 39, 40, 42, 45, 46, - 50, 53, 54, 59, 63, 64, 71, 73, 74, 78, 80, 83, 85, 88, 91, 94, 97, 32, - 32, 32, 32, 33, 34, 34, 36, 36, 37, 40, 40, 42, 45, 46, 49, 53, 54, 58, - 62, 63, 70, 72, 73, 77, 79, 82, 85, 87, 90, 92, 95, 32, 32, 32, 32, 34, - 34, 35, 37, 37, 38, 40, 41, 42, 45, 46, 49, 52, 54, 58, 61, 63, 69, 71, - 72, 75, 78, 80, 83, 86, 89, 92, 95, 34, 34, 33, 33, 35, 36, 37, 39, 41, - 42, 45, 46, 47, 50, 51, 54, 57, 59, 63, 66, 68, 74, 75, 76, 80, 81, 82, - 83, 85, 87, 90, 93, 34, 34, 34, 34, 36, 36, 37, 41, 42, 45, 47, 48, 50, - 53, 53, 56, 59, 61, 65, 68, 70, 76, 77, 78, 82, 83, 85, 88, 89, 90, 92, - 93, 36, 35, 34, 34, 36, 37, 38, 42, 45, 48, 50, 51, 54, 56, 57, 60, 63, - 64, 68, 71, 73, 79, 80, 81, 85, 87, 89, 89, 90, 93, 96, 99, 39, 38, 38, - 37, 39, 40, 40, 45, 47, 50, 54, 55, 58, 61, 62, 65, 68, 69, 73, 76, 78, - 84, 85, 86, 90, 89, 90, 93, 96, 97, 98, 99, 41, 39, 39, 38, 40, 40, 41, - 46, 48, 51, 55, 56, 59, 62, 63, 67, 70, 71, 75, 78, 80, 86, 87, 88, 91, - 93, 96, 97, 97, 99, 102, 105, 44, 42, 41, 41, 42, 42, 42, 47, 50, 54, - 58, 59, 63, 66, 68, 71, 74, 75, 79, 83, 84, 90, 91, 92, 96, 98, 98, 99, - 102, 104, 104, 105, 48, 46, 45, 44, 45, 45, 45, 50, 53, 56, 61, 62, 66, - 70, 71, 76, 79, 80, 85, 88, 90, 96, 97, 98, 101, 100, 102, 105, 105, - 105, 109, 112, 49, 47, 46, 45, 46, 46, 46, 51, 53, 57, 62, 63, 68, 71, - 73, 77, 81, 82, 87, 90, 92, 98, 99, 100, 103, 106, 107, 106, 109, 112, - 112, 112, 54, 51, 50, 49, 50, 49, 49, 54, 56, 60, 65, 67, 71, 76, 77, - 82, 86, 87, 92, 96, 97, 104, 105, 106, 110, 110, 109, 113, 114, 113, - 116, 120, 58, 55, 54, 53, 53, 53, 52, 57, 59, 63, 68, 70, 74, 79, 81, - 86, 90, 91, 97, 100, 102, 109, 110, 111, 114, 114, 118, 116, 117, 121, - 121, 120, 59, 57, 55, 54, 54, 54, 54, 59, 61, 64, 69, 71, 75, 80, 82, - 87, 91, 93, 99, 102, 104, 111, 112, 113, 117, 121, 120, 122, 124, 122, - 125, 129, 65, 62, 60, 59, 59, 58, 58, 63, 65, 68, 73, 75, 79, 85, 87, - 92, 97, 99, 105, 109, 111, 118, 120, 121, 125, 124, 125, 127, 126, 130, - 130, 129, 69, 66, 64, 63, 63, 62, 61, 66, 68, 71, 76, 78, 83, 88, 90, - 96, 100, 102, 109, 113, 115, 123, 125, 126, 129, 130, 131, 130, 134, - 133, 135, 139, 71, 68, 66, 65, 64, 63, 63, 68, 70, 73, 78, 80, 84, 90, - 92, 97, 102, 104, 111, 115, 117, 125, 127, 128, 133, 136, 134, 139, 136, - 139, 141, 140, 80, 76, 73, 72, 71, 70, 69, 74, 76, 79, 84, 86, 90, 96, - 98, 104, 109, 111, 118, 123, 125, 134, 136, 137, 142, 138, 143, 140, - 144, 144, 144, 149, 81, 77, 75, 74, 73, 72, 71, 75, 77, 80, 85, 87, 91, - 97, 99, 105, 110, 112, 120, 125, 127, 136, 137, 139, 143, 148, 145, 148, - 148, 150, 152, 149, 83, 78, 76, 75, 74, 73, 72, 76, 78, 81, 86, 88, 92, - 98, 100, 106, 111, 113, 121, 126, 128, 137, 139, 140, 145, 149, 153, - 153, 154, 155, 155, 161, 87, 83, 81, 79, 78, 77, 75, 80, 82, 85, 90, 91, - 96, 101, 103, 110, 114, 117, 125, 129, 133, 142, 143, 145, 150, 151, - 156, 159, 160, 160, 163, 161, 90, 85, 83, 81, 80, 79, 78, 81, 83, 87, - 89, 93, 98, 100, 106, 110, 114, 121, 124, 130, 136, 138, 148, 149, 151, - 156, 157, 162, 166, 168, 166, 172, 93, 88, 86, 84, 83, 82, 80, 82, 85, - 89, 90, 96, 98, 102, 107, 109, 118, 120, 125, 131, 134, 143, 145, 153, - 156, 157, 163, 164, 169, 172, 177, 172, 95, 90, 89, 86, 85, 85, 83, 83, - 88, 89, 93, 97, 99, 105, 106, 113, 116, 122, 127, 130, 139, 140, 148, - 153, 159, 162, 164, 169, 170, 176, 179, 185, 98, 93, 92, 89, 88, 87, 86, - 85, 89, 90, 96, 97, 102, 105, 109, 114, 117, 124, 126, 134, 136, 144, - 148, 154, 160, 166, 169, 170, 176, 177, 184, 186, 101, 96, 95, 91, 91, - 90, 89, 87, 90, 93, 97, 99, 104, 105, 112, 113, 121, 122, 130, 133, 139, - 144, 150, 155, 160, 168, 172, 176, 177, 184, 185, 191, 104, 99, 98, 94, - 94, 92, 92, 90, 92, 96, 98, 102, 104, 109, 112, 116, 121, 125, 130, 135, - 141, 144, 152, 155, 163, 166, 177, 179, 184, 185, 191, 192, 107, 101, - 101, 97, 97, 95, 95, 93, 93, 99, 99, 105, 105, 112, 112, 120, 120, 129, - 129, 139, 140, 149, 149, 161, 161, 172, 172, 185, 186, 191, 192, 199, - /* Size 4x8 */ - 32, 38, 62, 86, 32, 40, 58, 80, 34, 51, 68, 85, 44, 61, 85, 101, 54, 69, - 98, 117, 72, 84, 118, 136, 82, 89, 129, 157, 92, 98, 127, 165, - /* Size 8x4 */ - 32, 32, 34, 44, 54, 72, 82, 92, 38, 40, 51, 61, 69, 84, 89, 98, 62, 58, - 68, 85, 98, 118, 129, 127, 86, 80, 85, 101, 117, 136, 157, 165, - /* Size 8x16 */ - 32, 32, 36, 44, 58, 79, 88, 93, 31, 32, 35, 41, 54, 73, 81, 88, 32, 33, - 36, 42, 53, 71, 78, 84, 32, 34, 38, 42, 52, 69, 76, 82, 34, 36, 44, 50, - 59, 75, 81, 84, 39, 39, 50, 58, 68, 84, 88, 90, 44, 42, 53, 63, 74, 90, - 97, 97, 49, 46, 57, 67, 81, 97, 104, 105, 57, 53, 63, 74, 90, 108, 111, - 113, 65, 59, 68, 79, 97, 118, 123, 122, 71, 64, 73, 84, 102, 125, 135, - 131, 81, 72, 80, 91, 110, 135, 145, 141, 87, 77, 85, 96, 114, 140, 148, - 151, 92, 83, 88, 102, 117, 133, 153, 163, 98, 88, 89, 103, 121, 141, - 160, 169, 103, 94, 92, 103, 119, 137, 158, 175, - /* Size 16x8 */ - 32, 31, 32, 32, 34, 39, 44, 49, 57, 65, 71, 81, 87, 92, 98, 103, 32, 32, - 33, 34, 36, 39, 42, 46, 53, 59, 64, 72, 77, 83, 88, 94, 36, 35, 36, 38, - 44, 50, 53, 57, 63, 68, 73, 80, 85, 88, 89, 92, 44, 41, 42, 42, 50, 58, - 63, 67, 74, 79, 84, 91, 96, 102, 103, 103, 58, 54, 53, 52, 59, 68, 74, - 81, 90, 97, 102, 110, 114, 117, 121, 119, 79, 73, 71, 69, 75, 84, 90, - 97, 108, 118, 125, 135, 140, 133, 141, 137, 88, 81, 78, 76, 81, 88, 97, - 104, 111, 123, 135, 145, 148, 153, 160, 158, 93, 88, 84, 82, 84, 90, 97, - 105, 113, 122, 131, 141, 151, 163, 169, 175, - /* Size 16x32 */ - 32, 31, 32, 32, 36, 39, 44, 53, 58, 65, 79, 81, 88, 90, 93, 96, 31, 32, - 32, 32, 35, 38, 42, 51, 55, 62, 75, 77, 83, 86, 88, 91, 31, 32, 32, 32, - 35, 38, 41, 50, 54, 60, 73, 75, 81, 84, 88, 91, 31, 32, 32, 33, 34, 37, - 41, 49, 53, 59, 72, 74, 79, 82, 84, 87, 32, 32, 33, 34, 36, 39, 42, 50, - 53, 59, 71, 72, 78, 81, 84, 87, 32, 32, 34, 34, 37, 40, 42, 49, 53, 58, - 70, 71, 77, 80, 83, 85, 32, 33, 34, 35, 38, 40, 42, 49, 52, 58, 69, 70, - 76, 78, 82, 86, 34, 34, 35, 37, 42, 45, 48, 54, 57, 63, 73, 75, 79, 79, - 81, 83, 34, 34, 36, 37, 44, 47, 50, 56, 59, 65, 75, 77, 81, 83, 84, 84, - 36, 34, 37, 38, 48, 51, 54, 60, 63, 68, 78, 80, 85, 85, 86, 89, 39, 37, - 39, 40, 50, 54, 58, 65, 68, 73, 84, 85, 88, 89, 90, 89, 40, 38, 40, 41, - 51, 55, 59, 67, 70, 75, 85, 87, 91, 92, 92, 95, 44, 41, 42, 43, 53, 58, - 63, 71, 74, 79, 90, 91, 97, 94, 97, 95, 47, 44, 45, 46, 56, 61, 66, 75, - 79, 85, 95, 97, 99, 101, 98, 102, 49, 46, 46, 47, 57, 62, 67, 77, 81, - 86, 97, 99, 104, 102, 105, 102, 53, 49, 50, 50, 60, 65, 71, 82, 86, 92, - 103, 105, 109, 108, 106, 110, 57, 53, 53, 53, 63, 68, 74, 86, 90, 97, - 108, 110, 111, 112, 113, 110, 59, 54, 54, 54, 64, 69, 75, 87, 91, 98, - 111, 112, 119, 117, 115, 118, 65, 60, 59, 58, 68, 73, 79, 92, 97, 105, - 118, 119, 123, 123, 122, 119, 69, 63, 62, 62, 71, 76, 83, 96, 100, 109, - 122, 124, 127, 125, 125, 128, 71, 65, 64, 63, 73, 78, 84, 97, 102, 111, - 125, 127, 135, 134, 131, 129, 79, 72, 71, 70, 79, 84, 90, 104, 109, 118, - 133, 135, 137, 136, 136, 137, 81, 74, 72, 71, 80, 85, 91, 105, 110, 120, - 135, 137, 145, 143, 141, 138, 82, 75, 73, 72, 81, 86, 92, 106, 111, 121, - 136, 139, 147, 148, 147, 149, 87, 79, 77, 76, 85, 90, 96, 110, 114, 125, - 140, 143, 148, 154, 151, 149, 90, 82, 80, 78, 87, 89, 99, 108, 113, 129, - 135, 146, 153, 157, 160, 159, 92, 84, 83, 81, 88, 90, 102, 106, 117, - 128, 133, 150, 153, 158, 163, 160, 95, 87, 85, 83, 88, 92, 103, 105, - 120, 125, 137, 148, 155, 164, 168, 173, 98, 89, 88, 85, 89, 95, 103, - 108, 121, 124, 141, 144, 160, 164, 169, 174, 100, 92, 91, 88, 90, 98, - 103, 111, 120, 127, 139, 146, 161, 165, 175, 179, 103, 94, 94, 90, 92, - 101, 103, 114, 119, 131, 137, 150, 158, 170, 175, 180, 106, 97, 97, 93, - 93, 104, 104, 118, 118, 135, 135, 154, 155, 175, 176, 187, - /* Size 32x16 */ - 32, 31, 31, 31, 32, 32, 32, 34, 34, 36, 39, 40, 44, 47, 49, 53, 57, 59, - 65, 69, 71, 79, 81, 82, 87, 90, 92, 95, 98, 100, 103, 106, 31, 32, 32, - 32, 32, 32, 33, 34, 34, 34, 37, 38, 41, 44, 46, 49, 53, 54, 60, 63, 65, - 72, 74, 75, 79, 82, 84, 87, 89, 92, 94, 97, 32, 32, 32, 32, 33, 34, 34, - 35, 36, 37, 39, 40, 42, 45, 46, 50, 53, 54, 59, 62, 64, 71, 72, 73, 77, - 80, 83, 85, 88, 91, 94, 97, 32, 32, 32, 33, 34, 34, 35, 37, 37, 38, 40, - 41, 43, 46, 47, 50, 53, 54, 58, 62, 63, 70, 71, 72, 76, 78, 81, 83, 85, - 88, 90, 93, 36, 35, 35, 34, 36, 37, 38, 42, 44, 48, 50, 51, 53, 56, 57, - 60, 63, 64, 68, 71, 73, 79, 80, 81, 85, 87, 88, 88, 89, 90, 92, 93, 39, - 38, 38, 37, 39, 40, 40, 45, 47, 51, 54, 55, 58, 61, 62, 65, 68, 69, 73, - 76, 78, 84, 85, 86, 90, 89, 90, 92, 95, 98, 101, 104, 44, 42, 41, 41, - 42, 42, 42, 48, 50, 54, 58, 59, 63, 66, 67, 71, 74, 75, 79, 83, 84, 90, - 91, 92, 96, 99, 102, 103, 103, 103, 103, 104, 53, 51, 50, 49, 50, 49, - 49, 54, 56, 60, 65, 67, 71, 75, 77, 82, 86, 87, 92, 96, 97, 104, 105, - 106, 110, 108, 106, 105, 108, 111, 114, 118, 58, 55, 54, 53, 53, 53, 52, - 57, 59, 63, 68, 70, 74, 79, 81, 86, 90, 91, 97, 100, 102, 109, 110, 111, - 114, 113, 117, 120, 121, 120, 119, 118, 65, 62, 60, 59, 59, 58, 58, 63, - 65, 68, 73, 75, 79, 85, 86, 92, 97, 98, 105, 109, 111, 118, 120, 121, - 125, 129, 128, 125, 124, 127, 131, 135, 79, 75, 73, 72, 71, 70, 69, 73, - 75, 78, 84, 85, 90, 95, 97, 103, 108, 111, 118, 122, 125, 133, 135, 136, - 140, 135, 133, 137, 141, 139, 137, 135, 81, 77, 75, 74, 72, 71, 70, 75, - 77, 80, 85, 87, 91, 97, 99, 105, 110, 112, 119, 124, 127, 135, 137, 139, - 143, 146, 150, 148, 144, 146, 150, 154, 88, 83, 81, 79, 78, 77, 76, 79, - 81, 85, 88, 91, 97, 99, 104, 109, 111, 119, 123, 127, 135, 137, 145, - 147, 148, 153, 153, 155, 160, 161, 158, 155, 90, 86, 84, 82, 81, 80, 78, - 79, 83, 85, 89, 92, 94, 101, 102, 108, 112, 117, 123, 125, 134, 136, - 143, 148, 154, 157, 158, 164, 164, 165, 170, 175, 93, 88, 88, 84, 84, - 83, 82, 81, 84, 86, 90, 92, 97, 98, 105, 106, 113, 115, 122, 125, 131, - 136, 141, 147, 151, 160, 163, 168, 169, 175, 175, 176, 96, 91, 91, 87, - 87, 85, 86, 83, 84, 89, 89, 95, 95, 102, 102, 110, 110, 118, 119, 128, - 129, 137, 138, 149, 149, 159, 160, 173, 174, 179, 180, 187, - /* Size 4x16 */ - 31, 39, 65, 90, 32, 38, 60, 84, 32, 39, 59, 81, 33, 40, 58, 78, 34, 47, - 65, 83, 37, 54, 73, 89, 41, 58, 79, 94, 46, 62, 86, 102, 53, 68, 97, - 112, 60, 73, 105, 123, 65, 78, 111, 134, 74, 85, 120, 143, 79, 90, 125, - 154, 84, 90, 128, 158, 89, 95, 124, 164, 94, 101, 131, 170, - /* Size 16x4 */ - 31, 32, 32, 33, 34, 37, 41, 46, 53, 60, 65, 74, 79, 84, 89, 94, 39, 38, - 39, 40, 47, 54, 58, 62, 68, 73, 78, 85, 90, 90, 95, 101, 65, 60, 59, 58, - 65, 73, 79, 86, 97, 105, 111, 120, 125, 128, 124, 131, 90, 84, 81, 78, - 83, 89, 94, 102, 112, 123, 134, 143, 154, 158, 164, 170, - /* Size 8x32 */ - 32, 32, 36, 44, 58, 79, 88, 93, 31, 32, 35, 42, 55, 75, 83, 88, 31, 32, - 35, 41, 54, 73, 81, 88, 31, 32, 34, 41, 53, 72, 79, 84, 32, 33, 36, 42, - 53, 71, 78, 84, 32, 34, 37, 42, 53, 70, 77, 83, 32, 34, 38, 42, 52, 69, - 76, 82, 34, 35, 42, 48, 57, 73, 79, 81, 34, 36, 44, 50, 59, 75, 81, 84, - 36, 37, 48, 54, 63, 78, 85, 86, 39, 39, 50, 58, 68, 84, 88, 90, 40, 40, - 51, 59, 70, 85, 91, 92, 44, 42, 53, 63, 74, 90, 97, 97, 47, 45, 56, 66, - 79, 95, 99, 98, 49, 46, 57, 67, 81, 97, 104, 105, 53, 50, 60, 71, 86, - 103, 109, 106, 57, 53, 63, 74, 90, 108, 111, 113, 59, 54, 64, 75, 91, - 111, 119, 115, 65, 59, 68, 79, 97, 118, 123, 122, 69, 62, 71, 83, 100, - 122, 127, 125, 71, 64, 73, 84, 102, 125, 135, 131, 79, 71, 79, 90, 109, - 133, 137, 136, 81, 72, 80, 91, 110, 135, 145, 141, 82, 73, 81, 92, 111, - 136, 147, 147, 87, 77, 85, 96, 114, 140, 148, 151, 90, 80, 87, 99, 113, - 135, 153, 160, 92, 83, 88, 102, 117, 133, 153, 163, 95, 85, 88, 103, - 120, 137, 155, 168, 98, 88, 89, 103, 121, 141, 160, 169, 100, 91, 90, - 103, 120, 139, 161, 175, 103, 94, 92, 103, 119, 137, 158, 175, 106, 97, - 93, 104, 118, 135, 155, 176, - /* Size 32x8 */ - 32, 31, 31, 31, 32, 32, 32, 34, 34, 36, 39, 40, 44, 47, 49, 53, 57, 59, - 65, 69, 71, 79, 81, 82, 87, 90, 92, 95, 98, 100, 103, 106, 32, 32, 32, - 32, 33, 34, 34, 35, 36, 37, 39, 40, 42, 45, 46, 50, 53, 54, 59, 62, 64, - 71, 72, 73, 77, 80, 83, 85, 88, 91, 94, 97, 36, 35, 35, 34, 36, 37, 38, - 42, 44, 48, 50, 51, 53, 56, 57, 60, 63, 64, 68, 71, 73, 79, 80, 81, 85, - 87, 88, 88, 89, 90, 92, 93, 44, 42, 41, 41, 42, 42, 42, 48, 50, 54, 58, - 59, 63, 66, 67, 71, 74, 75, 79, 83, 84, 90, 91, 92, 96, 99, 102, 103, - 103, 103, 103, 104, 58, 55, 54, 53, 53, 53, 52, 57, 59, 63, 68, 70, 74, - 79, 81, 86, 90, 91, 97, 100, 102, 109, 110, 111, 114, 113, 117, 120, - 121, 120, 119, 118, 79, 75, 73, 72, 71, 70, 69, 73, 75, 78, 84, 85, 90, - 95, 97, 103, 108, 111, 118, 122, 125, 133, 135, 136, 140, 135, 133, 137, - 141, 139, 137, 135, 88, 83, 81, 79, 78, 77, 76, 79, 81, 85, 88, 91, 97, - 99, 104, 109, 111, 119, 123, 127, 135, 137, 145, 147, 148, 153, 153, - 155, 160, 161, 158, 155, 93, 88, 88, 84, 84, 83, 82, 81, 84, 86, 90, 92, - 97, 98, 105, 106, 113, 115, 122, 125, 131, 136, 141, 147, 151, 160, 163, - 168, 169, 175, 175, 176 }, - { /* Chroma */ - /* Size 4x4 */ - 32, 45, 53, 63, 45, 55, 62, 67, 53, 62, 80, 84, 63, 67, 84, 101, - /* Size 8x8 */ - 31, 36, 47, 48, 52, 60, 64, 67, 36, 43, 47, 46, 49, 55, 59, 63, 47, 47, - 53, 54, 55, 60, 63, 64, 48, 46, 54, 61, 65, 70, 71, 71, 52, 49, 55, 65, - 71, 78, 81, 79, 60, 55, 60, 70, 78, 89, 89, 89, 64, 59, 63, 71, 81, 89, - 97, 99, 67, 63, 64, 71, 79, 89, 99, 104, - /* Size 16x16 */ - 32, 30, 33, 36, 44, 48, 49, 51, 54, 57, 60, 64, 67, 68, 70, 72, 30, 31, - 35, 39, 44, 46, 46, 47, 50, 53, 55, 59, 61, 64, 66, 68, 33, 35, 39, 43, - 46, 46, 45, 47, 49, 51, 53, 57, 59, 61, 63, 65, 36, 39, 43, 47, 47, 46, - 45, 46, 48, 50, 52, 55, 57, 58, 61, 63, 44, 44, 46, 47, 50, 51, 51, 51, - 53, 54, 56, 59, 61, 61, 63, 62, 48, 46, 46, 46, 51, 54, 55, 56, 58, 60, - 61, 64, 65, 64, 66, 66, 49, 46, 45, 45, 51, 55, 58, 60, 62, 63, 65, 68, - 69, 69, 69, 69, 51, 47, 47, 46, 51, 56, 60, 62, 65, 67, 69, 72, 73, 74, - 73, 73, 54, 50, 49, 48, 53, 58, 62, 65, 70, 73, 75, 78, 79, 79, 77, 77, - 57, 53, 51, 50, 54, 60, 63, 67, 73, 76, 79, 82, 84, 83, 82, 82, 60, 55, - 53, 52, 56, 61, 65, 69, 75, 79, 82, 86, 88, 87, 86, 87, 64, 59, 57, 55, - 59, 64, 68, 72, 78, 82, 86, 90, 93, 92, 91, 92, 67, 61, 59, 57, 61, 65, - 69, 73, 79, 84, 88, 93, 95, 96, 96, 96, 68, 64, 61, 58, 61, 64, 69, 74, - 79, 83, 87, 92, 96, 99, 100, 101, 70, 66, 63, 61, 63, 66, 69, 73, 77, - 82, 86, 91, 96, 100, 103, 104, 72, 68, 65, 63, 62, 66, 69, 73, 77, 82, - 87, 92, 96, 101, 104, 106, - /* Size 32x32 */ - 32, 31, 30, 30, 33, 35, 36, 41, 44, 49, 48, 48, 49, 50, 51, 52, 54, 55, - 57, 59, 60, 63, 64, 65, 67, 68, 68, 69, 70, 71, 72, 73, 31, 31, 31, 31, - 34, 36, 38, 42, 44, 47, 47, 47, 47, 48, 48, 50, 51, 52, 54, 56, 57, 60, - 61, 61, 63, 64, 65, 66, 67, 67, 68, 69, 30, 31, 31, 31, 35, 37, 39, 42, - 44, 47, 46, 46, 46, 47, 47, 48, 50, 51, 53, 54, 55, 58, 59, 60, 61, 63, - 64, 65, 66, 67, 68, 69, 30, 31, 31, 32, 35, 37, 40, 42, 44, 46, 45, 45, - 45, 46, 46, 47, 49, 50, 52, 53, 54, 57, 58, 58, 60, 61, 62, 63, 63, 64, - 65, 66, 33, 34, 35, 35, 39, 41, 43, 45, 46, 47, 46, 46, 45, 46, 47, 47, - 49, 49, 51, 53, 53, 56, 57, 57, 59, 60, 61, 62, 63, 64, 65, 66, 35, 36, - 37, 37, 41, 43, 45, 46, 46, 47, 46, 46, 45, 46, 46, 47, 48, 49, 50, 52, - 53, 55, 56, 56, 58, 59, 60, 61, 62, 63, 64, 64, 36, 38, 39, 40, 43, 45, - 47, 47, 47, 48, 46, 46, 45, 46, 46, 47, 48, 48, 50, 51, 52, 54, 55, 55, - 57, 58, 58, 59, 61, 62, 63, 64, 41, 42, 42, 42, 45, 46, 47, 48, 49, 50, - 49, 49, 49, 50, 50, 50, 51, 52, 53, 54, 55, 57, 58, 58, 60, 60, 59, 59, - 60, 61, 61, 62, 44, 44, 44, 44, 46, 46, 47, 49, 50, 51, 51, 51, 51, 51, - 51, 52, 53, 53, 54, 56, 56, 59, 59, 59, 61, 61, 61, 62, 63, 62, 62, 62, - 49, 47, 47, 46, 47, 47, 48, 50, 51, 53, 53, 53, 53, 54, 54, 54, 55, 55, - 56, 58, 58, 60, 61, 61, 63, 63, 64, 63, 63, 64, 65, 66, 48, 47, 46, 45, - 46, 46, 46, 49, 51, 53, 54, 54, 55, 56, 56, 57, 58, 59, 60, 61, 61, 63, - 64, 64, 65, 65, 64, 65, 66, 66, 66, 66, 48, 47, 46, 45, 46, 46, 46, 49, - 51, 53, 54, 55, 56, 57, 57, 58, 59, 60, 61, 62, 63, 65, 65, 65, 66, 67, - 68, 67, 67, 67, 68, 69, 49, 47, 46, 45, 45, 45, 45, 49, 51, 53, 55, 56, - 58, 59, 60, 61, 62, 62, 63, 65, 65, 67, 68, 68, 69, 70, 69, 69, 69, 70, - 69, 69, 50, 48, 47, 46, 46, 46, 46, 50, 51, 54, 56, 57, 59, 61, 62, 63, - 64, 65, 66, 68, 68, 70, 71, 71, 72, 71, 71, 72, 71, 71, 71, 72, 51, 48, - 47, 46, 47, 46, 46, 50, 51, 54, 56, 57, 60, 62, 62, 64, 65, 66, 67, 69, - 69, 71, 72, 72, 73, 74, 74, 72, 73, 74, 73, 73, 52, 50, 48, 47, 47, 47, - 47, 50, 52, 54, 57, 58, 61, 63, 64, 66, 68, 68, 70, 72, 72, 75, 75, 75, - 77, 76, 75, 76, 76, 74, 75, 76, 54, 51, 50, 49, 49, 48, 48, 51, 53, 55, - 58, 59, 62, 64, 65, 68, 70, 70, 73, 74, 75, 77, 78, 78, 79, 78, 79, 78, - 77, 78, 77, 77, 55, 52, 51, 50, 49, 49, 48, 52, 53, 55, 59, 60, 62, 65, - 66, 68, 70, 71, 73, 75, 76, 78, 79, 79, 80, 81, 80, 80, 81, 79, 79, 81, - 57, 54, 53, 52, 51, 50, 50, 53, 54, 56, 60, 61, 63, 66, 67, 70, 73, 73, - 76, 78, 79, 82, 82, 83, 84, 83, 83, 83, 82, 83, 82, 81, 59, 56, 54, 53, - 53, 52, 51, 54, 56, 58, 61, 62, 65, 68, 69, 72, 74, 75, 78, 80, 81, 84, - 85, 85, 86, 86, 86, 84, 85, 84, 84, 85, 60, 57, 55, 54, 53, 53, 52, 55, - 56, 58, 61, 63, 65, 68, 69, 72, 75, 76, 79, 81, 82, 85, 86, 86, 88, 88, - 87, 88, 86, 87, 87, 85, 63, 60, 58, 57, 56, 55, 54, 57, 59, 60, 63, 65, - 67, 70, 71, 75, 77, 78, 82, 84, 85, 89, 89, 90, 92, 89, 91, 89, 90, 89, - 88, 89, 64, 61, 59, 58, 57, 56, 55, 58, 59, 61, 64, 65, 68, 71, 72, 75, - 78, 79, 82, 85, 86, 89, 90, 91, 93, 94, 92, 92, 91, 91, 92, 90, 65, 61, - 60, 58, 57, 56, 55, 58, 59, 61, 64, 65, 68, 71, 72, 75, 78, 79, 83, 85, - 86, 90, 91, 91, 93, 94, 95, 94, 94, 94, 93, 94, 67, 63, 61, 60, 59, 58, - 57, 60, 61, 63, 65, 66, 69, 72, 73, 77, 79, 80, 84, 86, 88, 92, 93, 93, - 95, 95, 96, 97, 96, 95, 96, 94, 68, 64, 63, 61, 60, 59, 58, 60, 61, 63, - 65, 67, 70, 71, 74, 76, 78, 81, 83, 86, 88, 89, 94, 94, 95, 97, 97, 98, - 99, 99, 97, 99, 68, 65, 64, 62, 61, 60, 58, 59, 61, 64, 64, 68, 69, 71, - 74, 75, 79, 80, 83, 86, 87, 91, 92, 95, 96, 97, 99, 99, 100, 100, 101, - 99, 69, 66, 65, 63, 62, 61, 59, 59, 62, 63, 65, 67, 69, 72, 72, 76, 78, - 80, 83, 84, 88, 89, 92, 94, 97, 98, 99, 101, 100, 102, 102, 104, 70, 67, - 66, 63, 63, 62, 61, 60, 63, 63, 66, 67, 69, 71, 73, 76, 77, 81, 82, 85, - 86, 90, 91, 94, 96, 99, 100, 100, 103, 102, 104, 104, 71, 67, 67, 64, - 64, 63, 62, 61, 62, 64, 66, 67, 70, 71, 74, 74, 78, 79, 83, 84, 87, 89, - 91, 94, 95, 99, 100, 102, 102, 104, 104, 106, 72, 68, 68, 65, 65, 64, - 63, 61, 62, 65, 66, 68, 69, 71, 73, 75, 77, 79, 82, 84, 87, 88, 92, 93, - 96, 97, 101, 102, 104, 104, 106, 106, 73, 69, 69, 66, 66, 64, 64, 62, - 62, 66, 66, 69, 69, 72, 73, 76, 77, 81, 81, 85, 85, 89, 90, 94, 94, 99, - 99, 104, 104, 106, 106, 108, - /* Size 4x8 */ - 31, 47, 54, 64, 38, 46, 50, 60, 46, 53, 57, 62, 46, 56, 66, 71, 50, 59, - 74, 79, 57, 64, 82, 88, 61, 65, 85, 97, 65, 67, 82, 99, - /* Size 8x4 */ - 31, 38, 46, 46, 50, 57, 61, 65, 47, 46, 53, 56, 59, 64, 65, 67, 54, 50, - 57, 66, 74, 82, 85, 82, 64, 60, 62, 71, 79, 88, 97, 99, - /* Size 8x16 */ - 32, 34, 48, 49, 54, 63, 67, 69, 31, 36, 46, 46, 50, 58, 62, 65, 33, 40, - 47, 46, 49, 56, 59, 62, 37, 44, 47, 45, 48, 54, 57, 60, 44, 46, 51, 51, - 53, 59, 60, 61, 48, 46, 53, 56, 58, 64, 64, 64, 49, 45, 53, 58, 62, 67, - 70, 68, 51, 47, 54, 60, 65, 71, 73, 72, 54, 49, 55, 62, 70, 77, 77, 76, - 57, 51, 56, 64, 73, 82, 83, 81, 60, 53, 58, 65, 75, 85, 89, 85, 64, 57, - 61, 68, 78, 89, 93, 89, 66, 59, 63, 69, 79, 91, 94, 93, 68, 61, 63, 71, - 79, 87, 96, 98, 70, 63, 63, 70, 80, 89, 97, 100, 72, 65, 63, 69, 77, 86, - 95, 102, - /* Size 16x8 */ - 32, 31, 33, 37, 44, 48, 49, 51, 54, 57, 60, 64, 66, 68, 70, 72, 34, 36, - 40, 44, 46, 46, 45, 47, 49, 51, 53, 57, 59, 61, 63, 65, 48, 46, 47, 47, - 51, 53, 53, 54, 55, 56, 58, 61, 63, 63, 63, 63, 49, 46, 46, 45, 51, 56, - 58, 60, 62, 64, 65, 68, 69, 71, 70, 69, 54, 50, 49, 48, 53, 58, 62, 65, - 70, 73, 75, 78, 79, 79, 80, 77, 63, 58, 56, 54, 59, 64, 67, 71, 77, 82, - 85, 89, 91, 87, 89, 86, 67, 62, 59, 57, 60, 64, 70, 73, 77, 83, 89, 93, - 94, 96, 97, 95, 69, 65, 62, 60, 61, 64, 68, 72, 76, 81, 85, 89, 93, 98, - 100, 102, - /* Size 16x32 */ - 32, 31, 34, 37, 48, 48, 49, 52, 54, 57, 63, 64, 67, 68, 69, 69, 31, 31, - 35, 38, 47, 47, 47, 50, 51, 54, 60, 61, 63, 64, 65, 66, 31, 32, 36, 39, - 46, 46, 46, 48, 50, 53, 58, 59, 62, 63, 65, 66, 30, 32, 36, 40, 46, 45, - 45, 48, 49, 52, 57, 58, 60, 61, 62, 63, 33, 36, 40, 43, 47, 46, 46, 47, - 49, 51, 56, 57, 59, 60, 62, 63, 35, 38, 42, 45, 47, 46, 45, 47, 48, 50, - 55, 56, 58, 60, 61, 61, 37, 40, 44, 47, 47, 46, 45, 47, 48, 50, 54, 55, - 57, 58, 60, 61, 42, 43, 45, 47, 50, 50, 49, 50, 51, 53, 57, 58, 59, 58, - 59, 59, 44, 44, 46, 47, 51, 51, 51, 52, 53, 54, 59, 59, 60, 61, 61, 60, - 49, 46, 47, 48, 53, 53, 53, 54, 55, 57, 60, 61, 63, 62, 62, 63, 48, 46, - 46, 47, 53, 54, 56, 57, 58, 60, 64, 64, 64, 64, 64, 63, 48, 45, 46, 46, - 53, 55, 56, 58, 59, 61, 65, 65, 66, 66, 65, 66, 49, 45, 45, 46, 53, 56, - 58, 61, 62, 64, 67, 68, 70, 67, 68, 66, 50, 46, 46, 46, 54, 56, 59, 63, - 65, 66, 70, 71, 70, 71, 68, 70, 51, 47, 47, 47, 54, 57, 60, 64, 65, 68, - 71, 72, 73, 71, 72, 70, 52, 48, 47, 47, 54, 57, 61, 66, 68, 71, 75, 75, - 76, 75, 73, 73, 54, 49, 49, 48, 55, 58, 62, 68, 70, 73, 77, 78, 77, 77, - 76, 74, 54, 50, 49, 49, 55, 59, 62, 68, 70, 74, 78, 79, 81, 79, 77, 78, - 57, 52, 51, 50, 56, 60, 64, 70, 73, 76, 82, 82, 83, 82, 81, 78, 59, 54, - 52, 52, 58, 61, 65, 72, 74, 78, 84, 85, 85, 83, 82, 82, 60, 54, 53, 52, - 58, 62, 65, 72, 75, 79, 85, 86, 89, 87, 85, 82, 63, 57, 56, 55, 60, 64, - 67, 75, 77, 82, 89, 90, 90, 88, 87, 86, 64, 58, 57, 55, 61, 64, 68, 75, - 78, 82, 89, 90, 93, 91, 89, 87, 64, 59, 57, 56, 61, 65, 68, 75, 78, 83, - 90, 91, 94, 93, 92, 91, 66, 60, 59, 57, 63, 66, 69, 77, 79, 84, 91, 93, - 94, 95, 93, 91, 67, 61, 60, 58, 63, 65, 70, 75, 78, 85, 88, 93, 96, 97, - 97, 95, 68, 62, 61, 59, 63, 64, 71, 74, 79, 84, 87, 94, 96, 97, 98, 96, - 69, 63, 62, 60, 63, 65, 71, 72, 80, 82, 88, 93, 96, 99, 100, 101, 70, - 64, 63, 60, 63, 66, 70, 73, 80, 81, 89, 90, 97, 99, 100, 101, 71, 65, - 64, 61, 63, 67, 70, 74, 78, 82, 88, 90, 97, 99, 102, 103, 72, 65, 65, - 62, 63, 68, 69, 75, 77, 83, 86, 92, 95, 100, 102, 103, 73, 66, 66, 63, - 63, 69, 69, 76, 76, 84, 84, 93, 93, 101, 101, 105, - /* Size 32x16 */ - 32, 31, 31, 30, 33, 35, 37, 42, 44, 49, 48, 48, 49, 50, 51, 52, 54, 54, - 57, 59, 60, 63, 64, 64, 66, 67, 68, 69, 70, 71, 72, 73, 31, 31, 32, 32, - 36, 38, 40, 43, 44, 46, 46, 45, 45, 46, 47, 48, 49, 50, 52, 54, 54, 57, - 58, 59, 60, 61, 62, 63, 64, 65, 65, 66, 34, 35, 36, 36, 40, 42, 44, 45, - 46, 47, 46, 46, 45, 46, 47, 47, 49, 49, 51, 52, 53, 56, 57, 57, 59, 60, - 61, 62, 63, 64, 65, 66, 37, 38, 39, 40, 43, 45, 47, 47, 47, 48, 47, 46, - 46, 46, 47, 47, 48, 49, 50, 52, 52, 55, 55, 56, 57, 58, 59, 60, 60, 61, - 62, 63, 48, 47, 46, 46, 47, 47, 47, 50, 51, 53, 53, 53, 53, 54, 54, 54, - 55, 55, 56, 58, 58, 60, 61, 61, 63, 63, 63, 63, 63, 63, 63, 63, 48, 47, - 46, 45, 46, 46, 46, 50, 51, 53, 54, 55, 56, 56, 57, 57, 58, 59, 60, 61, - 62, 64, 64, 65, 66, 65, 64, 65, 66, 67, 68, 69, 49, 47, 46, 45, 46, 45, - 45, 49, 51, 53, 56, 56, 58, 59, 60, 61, 62, 62, 64, 65, 65, 67, 68, 68, - 69, 70, 71, 71, 70, 70, 69, 69, 52, 50, 48, 48, 47, 47, 47, 50, 52, 54, - 57, 58, 61, 63, 64, 66, 68, 68, 70, 72, 72, 75, 75, 75, 77, 75, 74, 72, - 73, 74, 75, 76, 54, 51, 50, 49, 49, 48, 48, 51, 53, 55, 58, 59, 62, 65, - 65, 68, 70, 70, 73, 74, 75, 77, 78, 78, 79, 78, 79, 80, 80, 78, 77, 76, - 57, 54, 53, 52, 51, 50, 50, 53, 54, 57, 60, 61, 64, 66, 68, 71, 73, 74, - 76, 78, 79, 82, 82, 83, 84, 85, 84, 82, 81, 82, 83, 84, 63, 60, 58, 57, - 56, 55, 54, 57, 59, 60, 64, 65, 67, 70, 71, 75, 77, 78, 82, 84, 85, 89, - 89, 90, 91, 88, 87, 88, 89, 88, 86, 84, 64, 61, 59, 58, 57, 56, 55, 58, - 59, 61, 64, 65, 68, 71, 72, 75, 78, 79, 82, 85, 86, 90, 90, 91, 93, 93, - 94, 93, 90, 90, 92, 93, 67, 63, 62, 60, 59, 58, 57, 59, 60, 63, 64, 66, - 70, 70, 73, 76, 77, 81, 83, 85, 89, 90, 93, 94, 94, 96, 96, 96, 97, 97, - 95, 93, 68, 64, 63, 61, 60, 60, 58, 58, 61, 62, 64, 66, 67, 71, 71, 75, - 77, 79, 82, 83, 87, 88, 91, 93, 95, 97, 97, 99, 99, 99, 100, 101, 69, - 65, 65, 62, 62, 61, 60, 59, 61, 62, 64, 65, 68, 68, 72, 73, 76, 77, 81, - 82, 85, 87, 89, 92, 93, 97, 98, 100, 100, 102, 102, 101, 69, 66, 66, 63, - 63, 61, 61, 59, 60, 63, 63, 66, 66, 70, 70, 73, 74, 78, 78, 82, 82, 86, - 87, 91, 91, 95, 96, 101, 101, 103, 103, 105, - /* Size 4x16 */ - 31, 48, 57, 68, 32, 46, 53, 63, 36, 46, 51, 60, 40, 46, 50, 58, 44, 51, - 54, 61, 46, 54, 60, 64, 45, 56, 64, 67, 47, 57, 68, 71, 49, 58, 73, 77, - 52, 60, 76, 82, 54, 62, 79, 87, 58, 64, 82, 91, 60, 66, 84, 95, 62, 64, - 84, 97, 64, 66, 81, 99, 65, 68, 83, 100, - /* Size 16x4 */ - 31, 32, 36, 40, 44, 46, 45, 47, 49, 52, 54, 58, 60, 62, 64, 65, 48, 46, - 46, 46, 51, 54, 56, 57, 58, 60, 62, 64, 66, 64, 66, 68, 57, 53, 51, 50, - 54, 60, 64, 68, 73, 76, 79, 82, 84, 84, 81, 83, 68, 63, 60, 58, 61, 64, - 67, 71, 77, 82, 87, 91, 95, 97, 99, 100, - /* Size 8x32 */ - 32, 34, 48, 49, 54, 63, 67, 69, 31, 35, 47, 47, 51, 60, 63, 65, 31, 36, - 46, 46, 50, 58, 62, 65, 30, 36, 46, 45, 49, 57, 60, 62, 33, 40, 47, 46, - 49, 56, 59, 62, 35, 42, 47, 45, 48, 55, 58, 61, 37, 44, 47, 45, 48, 54, - 57, 60, 42, 45, 50, 49, 51, 57, 59, 59, 44, 46, 51, 51, 53, 59, 60, 61, - 49, 47, 53, 53, 55, 60, 63, 62, 48, 46, 53, 56, 58, 64, 64, 64, 48, 46, - 53, 56, 59, 65, 66, 65, 49, 45, 53, 58, 62, 67, 70, 68, 50, 46, 54, 59, - 65, 70, 70, 68, 51, 47, 54, 60, 65, 71, 73, 72, 52, 47, 54, 61, 68, 75, - 76, 73, 54, 49, 55, 62, 70, 77, 77, 76, 54, 49, 55, 62, 70, 78, 81, 77, - 57, 51, 56, 64, 73, 82, 83, 81, 59, 52, 58, 65, 74, 84, 85, 82, 60, 53, - 58, 65, 75, 85, 89, 85, 63, 56, 60, 67, 77, 89, 90, 87, 64, 57, 61, 68, - 78, 89, 93, 89, 64, 57, 61, 68, 78, 90, 94, 92, 66, 59, 63, 69, 79, 91, - 94, 93, 67, 60, 63, 70, 78, 88, 96, 97, 68, 61, 63, 71, 79, 87, 96, 98, - 69, 62, 63, 71, 80, 88, 96, 100, 70, 63, 63, 70, 80, 89, 97, 100, 71, - 64, 63, 70, 78, 88, 97, 102, 72, 65, 63, 69, 77, 86, 95, 102, 73, 66, - 63, 69, 76, 84, 93, 101, - /* Size 32x8 */ - 32, 31, 31, 30, 33, 35, 37, 42, 44, 49, 48, 48, 49, 50, 51, 52, 54, 54, - 57, 59, 60, 63, 64, 64, 66, 67, 68, 69, 70, 71, 72, 73, 34, 35, 36, 36, - 40, 42, 44, 45, 46, 47, 46, 46, 45, 46, 47, 47, 49, 49, 51, 52, 53, 56, - 57, 57, 59, 60, 61, 62, 63, 64, 65, 66, 48, 47, 46, 46, 47, 47, 47, 50, - 51, 53, 53, 53, 53, 54, 54, 54, 55, 55, 56, 58, 58, 60, 61, 61, 63, 63, - 63, 63, 63, 63, 63, 63, 49, 47, 46, 45, 46, 45, 45, 49, 51, 53, 56, 56, - 58, 59, 60, 61, 62, 62, 64, 65, 65, 67, 68, 68, 69, 70, 71, 71, 70, 70, - 69, 69, 54, 51, 50, 49, 49, 48, 48, 51, 53, 55, 58, 59, 62, 65, 65, 68, - 70, 70, 73, 74, 75, 77, 78, 78, 79, 78, 79, 80, 80, 78, 77, 76, 63, 60, - 58, 57, 56, 55, 54, 57, 59, 60, 64, 65, 67, 70, 71, 75, 77, 78, 82, 84, - 85, 89, 89, 90, 91, 88, 87, 88, 89, 88, 86, 84, 67, 63, 62, 60, 59, 58, - 57, 59, 60, 63, 64, 66, 70, 70, 73, 76, 77, 81, 83, 85, 89, 90, 93, 94, - 94, 96, 96, 96, 97, 97, 95, 93, 69, 65, 65, 62, 62, 61, 60, 59, 61, 62, - 64, 65, 68, 68, 72, 73, 76, 77, 81, 82, 85, 87, 89, 92, 93, 97, 98, 100, - 100, 102, 102, 101 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 37, 58, 81, 37, 54, 72, 91, 58, 72, 102, 121, 81, 91, 121, 156, - /* Size 8x8 */ - 32, 32, 35, 42, 53, 68, 78, 90, 32, 33, 36, 42, 51, 64, 74, 84, 35, 36, - 46, 52, 60, 72, 80, 87, 42, 42, 52, 63, 73, 84, 92, 98, 53, 51, 60, 73, - 86, 100, 109, 114, 68, 64, 72, 84, 100, 117, 128, 133, 78, 74, 80, 92, - 109, 128, 140, 155, 90, 84, 87, 98, 114, 133, 155, 168, - /* Size 16x16 */ - 32, 31, 31, 32, 34, 36, 41, 47, 54, 59, 65, 74, 82, 87, 92, 97, 31, 32, - 32, 32, 34, 35, 39, 45, 50, 55, 61, 69, 76, 81, 87, 92, 31, 32, 33, 33, - 35, 36, 40, 44, 49, 54, 59, 67, 73, 78, 83, 88, 32, 32, 33, 35, 37, 38, - 41, 45, 49, 53, 58, 65, 71, 75, 80, 86, 34, 34, 35, 37, 39, 42, 46, 50, - 54, 58, 63, 70, 76, 80, 84, 85, 36, 35, 36, 38, 42, 48, 52, 56, 60, 64, - 68, 75, 80, 85, 90, 91, 41, 39, 40, 41, 46, 52, 57, 62, 67, 71, 75, 83, - 88, 92, 95, 97, 47, 45, 44, 45, 50, 56, 62, 69, 75, 79, 84, 91, 97, 100, - 102, 104, 54, 50, 49, 49, 54, 60, 67, 75, 82, 87, 92, 100, 106, 110, - 109, 112, 59, 55, 54, 53, 58, 64, 71, 79, 87, 92, 98, 106, 112, 117, - 117, 121, 65, 61, 59, 58, 63, 68, 75, 84, 92, 98, 105, 114, 120, 125, - 126, 130, 74, 69, 67, 65, 70, 75, 83, 91, 100, 106, 114, 123, 131, 135, - 137, 140, 82, 76, 73, 71, 76, 80, 88, 97, 106, 112, 120, 131, 139, 144, - 148, 150, 87, 81, 78, 75, 80, 85, 92, 100, 110, 117, 125, 135, 144, 150, - 155, 162, 92, 87, 83, 80, 84, 90, 95, 102, 109, 117, 126, 137, 148, 155, - 162, 168, 97, 92, 88, 86, 85, 91, 97, 104, 112, 121, 130, 140, 150, 162, - 168, 174, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 31, 32, 32, 34, 35, 36, 39, 41, 44, 47, 48, 54, 56, - 59, 64, 65, 71, 74, 80, 82, 83, 87, 90, 92, 95, 97, 100, 31, 32, 32, 32, - 32, 32, 32, 33, 34, 35, 35, 38, 40, 42, 45, 46, 51, 53, 56, 61, 62, 68, - 71, 76, 78, 78, 83, 85, 88, 90, 92, 95, 31, 32, 32, 32, 32, 32, 32, 33, - 34, 34, 35, 38, 39, 42, 45, 45, 50, 52, 55, 60, 61, 67, 69, 74, 76, 77, - 81, 84, 87, 89, 92, 95, 31, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 37, - 38, 41, 44, 44, 49, 51, 54, 58, 59, 65, 68, 72, 74, 75, 79, 81, 84, 86, - 88, 90, 31, 32, 32, 32, 33, 33, 33, 34, 35, 36, 36, 39, 40, 42, 44, 45, - 49, 51, 54, 58, 59, 64, 67, 71, 73, 74, 78, 80, 83, 85, 88, 90, 31, 32, - 32, 32, 33, 33, 34, 34, 35, 36, 36, 39, 40, 42, 45, 45, 50, 51, 54, 58, - 59, 64, 67, 71, 73, 74, 78, 80, 82, 84, 86, 89, 32, 32, 32, 32, 33, 34, - 35, 36, 37, 38, 38, 40, 41, 42, 45, 46, 49, 51, 53, 57, 58, 63, 65, 69, - 71, 72, 75, 78, 80, 83, 86, 89, 32, 33, 33, 33, 34, 34, 36, 36, 38, 39, - 40, 42, 43, 44, 47, 47, 51, 53, 55, 59, 60, 65, 67, 71, 73, 73, 77, 78, - 80, 82, 84, 86, 34, 34, 34, 33, 35, 35, 37, 38, 39, 42, 42, 45, 46, 47, - 50, 51, 54, 56, 58, 62, 63, 68, 70, 74, 76, 76, 80, 82, 84, 85, 85, 86, - 35, 35, 34, 34, 36, 36, 38, 39, 42, 46, 47, 49, 50, 52, 55, 55, 59, 60, - 62, 66, 67, 72, 74, 78, 79, 80, 83, 84, 85, 87, 90, 92, 36, 35, 35, 34, - 36, 36, 38, 40, 42, 47, 48, 50, 52, 54, 56, 57, 60, 61, 64, 67, 68, 73, - 75, 79, 80, 81, 85, 87, 90, 91, 91, 92, 39, 38, 38, 37, 39, 39, 40, 42, - 45, 49, 50, 54, 55, 58, 60, 61, 65, 66, 69, 72, 73, 78, 80, 84, 86, 86, - 90, 91, 91, 92, 95, 97, 41, 40, 39, 38, 40, 40, 41, 43, 46, 50, 52, 55, - 57, 60, 62, 63, 67, 69, 71, 75, 75, 80, 83, 86, 88, 89, 92, 93, 95, 97, - 97, 98, 44, 42, 42, 41, 42, 42, 42, 44, 47, 52, 54, 58, 60, 63, 66, 67, - 71, 73, 75, 79, 79, 84, 86, 90, 92, 92, 96, 98, 98, 98, 101, 104, 47, - 45, 45, 44, 44, 45, 45, 47, 50, 55, 56, 60, 62, 66, 69, 70, 75, 77, 79, - 83, 84, 89, 91, 95, 97, 97, 100, 99, 102, 105, 104, 104, 48, 46, 45, 44, - 45, 45, 46, 47, 51, 55, 57, 61, 63, 67, 70, 71, 76, 78, 80, 84, 85, 90, - 93, 96, 98, 99, 102, 106, 106, 105, 108, 111, 54, 51, 50, 49, 49, 50, - 49, 51, 54, 59, 60, 65, 67, 71, 75, 76, 82, 84, 87, 91, 92, 97, 100, - 104, 106, 106, 110, 108, 109, 112, 112, 111, 56, 53, 52, 51, 51, 51, 51, - 53, 56, 60, 61, 66, 69, 73, 77, 78, 84, 86, 89, 93, 94, 100, 102, 106, - 108, 109, 112, 113, 115, 114, 116, 119, 59, 56, 55, 54, 54, 54, 53, 55, - 58, 62, 64, 69, 71, 75, 79, 80, 87, 89, 92, 97, 98, 103, 106, 110, 112, - 113, 117, 118, 117, 121, 121, 119, 64, 61, 60, 58, 58, 58, 57, 59, 62, - 66, 67, 72, 75, 79, 83, 84, 91, 93, 97, 102, 103, 109, 112, 116, 118, - 119, 122, 121, 125, 123, 125, 128, 65, 62, 61, 59, 59, 59, 58, 60, 63, - 67, 68, 73, 75, 79, 84, 85, 92, 94, 98, 103, 105, 111, 114, 118, 120, - 121, 125, 129, 126, 129, 130, 129, 71, 68, 67, 65, 64, 64, 63, 65, 68, - 72, 73, 78, 80, 84, 89, 90, 97, 100, 103, 109, 111, 117, 120, 125, 127, - 128, 133, 130, 134, 133, 133, 137, 74, 71, 69, 68, 67, 67, 65, 67, 70, - 74, 75, 80, 83, 86, 91, 93, 100, 102, 106, 112, 114, 120, 123, 128, 131, - 131, 135, 137, 137, 138, 140, 137, 80, 76, 74, 72, 71, 71, 69, 71, 74, - 78, 79, 84, 86, 90, 95, 96, 104, 106, 110, 116, 118, 125, 128, 134, 136, - 137, 142, 141, 142, 143, 143, 147, 82, 78, 76, 74, 73, 73, 71, 73, 76, - 79, 80, 86, 88, 92, 97, 98, 106, 108, 112, 118, 120, 127, 131, 136, 139, - 139, 144, 147, 148, 147, 150, 148, 83, 78, 77, 75, 74, 74, 72, 73, 76, - 80, 81, 86, 89, 92, 97, 99, 106, 109, 113, 119, 121, 128, 131, 137, 139, - 140, 145, 150, 152, 155, 152, 157, 87, 83, 81, 79, 78, 78, 75, 77, 80, - 83, 85, 90, 92, 96, 100, 102, 110, 112, 117, 122, 125, 133, 135, 142, - 144, 145, 150, 151, 155, 158, 162, 158, 90, 85, 84, 81, 80, 80, 78, 78, - 82, 84, 87, 91, 93, 98, 99, 106, 108, 113, 118, 121, 129, 130, 137, 141, - 147, 150, 151, 156, 156, 161, 164, 169, 92, 88, 87, 84, 83, 82, 80, 80, - 84, 85, 90, 91, 95, 98, 102, 106, 109, 115, 117, 125, 126, 134, 137, - 142, 148, 152, 155, 156, 162, 162, 168, 170, 95, 90, 89, 86, 85, 84, 83, - 82, 85, 87, 91, 92, 97, 98, 105, 105, 112, 114, 121, 123, 129, 133, 138, - 143, 147, 155, 158, 161, 162, 168, 168, 174, 97, 92, 92, 88, 88, 86, 86, - 84, 85, 90, 91, 95, 97, 101, 104, 108, 112, 116, 121, 125, 130, 133, - 140, 143, 150, 152, 162, 164, 168, 168, 174, 175, 100, 95, 95, 90, 90, - 89, 89, 86, 86, 92, 92, 97, 98, 104, 104, 111, 111, 119, 119, 128, 129, - 137, 137, 147, 148, 157, 158, 169, 170, 174, 175, 181, - /* Size 4x8 */ - 32, 35, 59, 83, 32, 36, 57, 78, 34, 47, 65, 82, 41, 53, 78, 97, 51, 61, - 92, 111, 65, 73, 108, 129, 75, 81, 117, 148, 86, 92, 119, 154, - /* Size 8x4 */ - 32, 32, 34, 41, 51, 65, 75, 86, 35, 36, 47, 53, 61, 73, 81, 92, 59, 57, - 65, 78, 92, 108, 117, 119, 83, 78, 82, 97, 111, 129, 148, 154, - /* Size 8x16 */ - 32, 31, 35, 44, 53, 65, 82, 90, 31, 32, 34, 41, 50, 61, 76, 85, 31, 33, - 35, 42, 49, 59, 73, 81, 32, 34, 37, 42, 49, 58, 71, 79, 34, 35, 41, 48, - 54, 63, 76, 81, 36, 36, 46, 54, 60, 68, 80, 87, 41, 40, 49, 60, 67, 76, - 88, 93, 47, 44, 53, 66, 75, 84, 97, 101, 53, 50, 57, 71, 82, 92, 106, - 108, 58, 54, 61, 75, 87, 98, 112, 116, 65, 59, 66, 79, 92, 105, 120, - 124, 74, 67, 73, 86, 100, 113, 131, 134, 82, 73, 79, 92, 105, 120, 139, - 142, 87, 78, 83, 96, 110, 125, 144, 153, 92, 83, 84, 97, 114, 132, 150, - 157, 97, 88, 86, 97, 111, 128, 147, 163, - /* Size 16x8 */ - 32, 31, 31, 32, 34, 36, 41, 47, 53, 58, 65, 74, 82, 87, 92, 97, 31, 32, - 33, 34, 35, 36, 40, 44, 50, 54, 59, 67, 73, 78, 83, 88, 35, 34, 35, 37, - 41, 46, 49, 53, 57, 61, 66, 73, 79, 83, 84, 86, 44, 41, 42, 42, 48, 54, - 60, 66, 71, 75, 79, 86, 92, 96, 97, 97, 53, 50, 49, 49, 54, 60, 67, 75, - 82, 87, 92, 100, 105, 110, 114, 111, 65, 61, 59, 58, 63, 68, 76, 84, 92, - 98, 105, 113, 120, 125, 132, 128, 82, 76, 73, 71, 76, 80, 88, 97, 106, - 112, 120, 131, 139, 144, 150, 147, 90, 85, 81, 79, 81, 87, 93, 101, 108, - 116, 124, 134, 142, 153, 157, 163, - /* Size 16x32 */ - 32, 31, 31, 32, 35, 36, 44, 47, 53, 62, 65, 79, 82, 88, 90, 93, 31, 32, - 32, 32, 35, 35, 42, 45, 51, 59, 62, 75, 78, 83, 86, 88, 31, 32, 32, 32, - 34, 35, 41, 45, 50, 58, 61, 74, 76, 82, 85, 88, 31, 32, 32, 33, 34, 34, - 41, 44, 49, 57, 59, 72, 74, 79, 82, 84, 31, 32, 33, 34, 35, 36, 42, 44, - 49, 57, 59, 71, 73, 79, 81, 84, 32, 32, 33, 34, 36, 36, 42, 45, 50, 57, - 59, 71, 73, 78, 80, 82, 32, 33, 34, 35, 37, 38, 42, 45, 49, 56, 58, 69, - 71, 76, 79, 83, 32, 33, 34, 36, 39, 40, 44, 47, 51, 58, 60, 71, 73, 76, - 78, 80, 34, 34, 35, 37, 41, 42, 48, 50, 54, 61, 63, 73, 76, 81, 81, 80, - 35, 34, 36, 38, 45, 47, 52, 55, 59, 65, 67, 77, 79, 82, 83, 86, 36, 34, - 36, 38, 46, 48, 54, 56, 60, 66, 68, 78, 80, 85, 87, 86, 39, 37, 39, 40, - 48, 50, 58, 60, 65, 71, 73, 84, 86, 89, 88, 91, 41, 39, 40, 41, 49, 51, - 60, 62, 67, 74, 76, 86, 88, 91, 93, 91, 44, 41, 42, 43, 51, 53, 63, 66, - 71, 78, 79, 90, 92, 97, 94, 97, 47, 44, 44, 45, 53, 56, 66, 69, 75, 82, - 84, 95, 97, 98, 101, 98, 48, 45, 45, 46, 54, 56, 67, 70, 76, 83, 85, 96, - 98, 104, 101, 105, 53, 49, 50, 50, 57, 60, 71, 75, 82, 90, 92, 103, 106, - 107, 108, 105, 55, 51, 51, 51, 59, 61, 72, 77, 84, 92, 94, 106, 108, - 111, 110, 112, 58, 54, 54, 54, 61, 63, 75, 79, 87, 95, 98, 110, 112, - 117, 116, 113, 63, 58, 58, 57, 65, 67, 78, 83, 91, 100, 103, 116, 118, - 119, 119, 121, 65, 60, 59, 58, 66, 68, 79, 84, 92, 102, 105, 118, 120, - 127, 124, 122, 71, 65, 64, 63, 71, 73, 84, 89, 97, 108, 111, 125, 127, - 129, 129, 130, 74, 68, 67, 66, 73, 75, 86, 91, 100, 110, 113, 128, 131, - 135, 134, 130, 79, 72, 71, 70, 77, 79, 90, 95, 104, 115, 118, 133, 136, - 140, 139, 140, 82, 75, 73, 72, 79, 81, 92, 97, 105, 117, 120, 136, 139, - 145, 142, 140, 82, 75, 74, 72, 79, 81, 92, 97, 106, 117, 121, 136, 139, - 148, 150, 149, 87, 79, 78, 76, 83, 85, 96, 100, 110, 120, 125, 141, 144, - 148, 153, 150, 89, 82, 81, 78, 83, 87, 97, 99, 113, 118, 128, 139, 145, - 153, 157, 161, 92, 84, 83, 80, 84, 89, 97, 101, 114, 116, 132, 135, 150, - 153, 157, 162, 94, 86, 85, 82, 85, 92, 97, 104, 112, 119, 130, 136, 151, - 154, 163, 166, 97, 88, 88, 85, 86, 94, 97, 107, 111, 123, 128, 140, 147, - 159, 163, 167, 99, 91, 91, 87, 87, 97, 97, 110, 110, 126, 126, 144, 144, - 163, 163, 173, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 32, 32, 32, 34, 35, 36, 39, 41, 44, 47, 48, 53, 55, - 58, 63, 65, 71, 74, 79, 82, 82, 87, 89, 92, 94, 97, 99, 31, 32, 32, 32, - 32, 32, 33, 33, 34, 34, 34, 37, 39, 41, 44, 45, 49, 51, 54, 58, 60, 65, - 68, 72, 75, 75, 79, 82, 84, 86, 88, 91, 31, 32, 32, 32, 33, 33, 34, 34, - 35, 36, 36, 39, 40, 42, 44, 45, 50, 51, 54, 58, 59, 64, 67, 71, 73, 74, - 78, 81, 83, 85, 88, 91, 32, 32, 32, 33, 34, 34, 35, 36, 37, 38, 38, 40, - 41, 43, 45, 46, 50, 51, 54, 57, 58, 63, 66, 70, 72, 72, 76, 78, 80, 82, - 85, 87, 35, 35, 34, 34, 35, 36, 37, 39, 41, 45, 46, 48, 49, 51, 53, 54, - 57, 59, 61, 65, 66, 71, 73, 77, 79, 79, 83, 83, 84, 85, 86, 87, 36, 35, - 35, 34, 36, 36, 38, 40, 42, 47, 48, 50, 51, 53, 56, 56, 60, 61, 63, 67, - 68, 73, 75, 79, 81, 81, 85, 87, 89, 92, 94, 97, 44, 42, 41, 41, 42, 42, - 42, 44, 48, 52, 54, 58, 60, 63, 66, 67, 71, 72, 75, 78, 79, 84, 86, 90, - 92, 92, 96, 97, 97, 97, 97, 97, 47, 45, 45, 44, 44, 45, 45, 47, 50, 55, - 56, 60, 62, 66, 69, 70, 75, 77, 79, 83, 84, 89, 91, 95, 97, 97, 100, 99, - 101, 104, 107, 110, 53, 51, 50, 49, 49, 50, 49, 51, 54, 59, 60, 65, 67, - 71, 75, 76, 82, 84, 87, 91, 92, 97, 100, 104, 105, 106, 110, 113, 114, - 112, 111, 110, 62, 59, 58, 57, 57, 57, 56, 58, 61, 65, 66, 71, 74, 78, - 82, 83, 90, 92, 95, 100, 102, 108, 110, 115, 117, 117, 120, 118, 116, - 119, 123, 126, 65, 62, 61, 59, 59, 59, 58, 60, 63, 67, 68, 73, 76, 79, - 84, 85, 92, 94, 98, 103, 105, 111, 113, 118, 120, 121, 125, 128, 132, - 130, 128, 126, 79, 75, 74, 72, 71, 71, 69, 71, 73, 77, 78, 84, 86, 90, - 95, 96, 103, 106, 110, 116, 118, 125, 128, 133, 136, 136, 141, 139, 135, - 136, 140, 144, 82, 78, 76, 74, 73, 73, 71, 73, 76, 79, 80, 86, 88, 92, - 97, 98, 106, 108, 112, 118, 120, 127, 131, 136, 139, 139, 144, 145, 150, - 151, 147, 144, 88, 83, 82, 79, 79, 78, 76, 76, 81, 82, 85, 89, 91, 97, - 98, 104, 107, 111, 117, 119, 127, 129, 135, 140, 145, 148, 148, 153, - 153, 154, 159, 163, 90, 86, 85, 82, 81, 80, 79, 78, 81, 83, 87, 88, 93, - 94, 101, 101, 108, 110, 116, 119, 124, 129, 134, 139, 142, 150, 153, - 157, 157, 163, 163, 163, 93, 88, 88, 84, 84, 82, 83, 80, 80, 86, 86, 91, - 91, 97, 98, 105, 105, 112, 113, 121, 122, 130, 130, 140, 140, 149, 150, - 161, 162, 166, 167, 173, - /* Size 4x16 */ - 31, 36, 62, 88, 32, 35, 58, 82, 32, 36, 57, 79, 33, 38, 56, 76, 34, 42, - 61, 81, 34, 48, 66, 85, 39, 51, 74, 91, 44, 56, 82, 98, 49, 60, 90, 107, - 54, 63, 95, 117, 60, 68, 102, 127, 68, 75, 110, 135, 75, 81, 117, 145, - 79, 85, 120, 148, 84, 89, 116, 153, 88, 94, 123, 159, - /* Size 16x4 */ - 31, 32, 32, 33, 34, 34, 39, 44, 49, 54, 60, 68, 75, 79, 84, 88, 36, 35, - 36, 38, 42, 48, 51, 56, 60, 63, 68, 75, 81, 85, 89, 94, 62, 58, 57, 56, - 61, 66, 74, 82, 90, 95, 102, 110, 117, 120, 116, 123, 88, 82, 79, 76, - 81, 85, 91, 98, 107, 117, 127, 135, 145, 148, 153, 159, - /* Size 8x32 */ - 32, 31, 35, 44, 53, 65, 82, 90, 31, 32, 35, 42, 51, 62, 78, 86, 31, 32, - 34, 41, 50, 61, 76, 85, 31, 32, 34, 41, 49, 59, 74, 82, 31, 33, 35, 42, - 49, 59, 73, 81, 32, 33, 36, 42, 50, 59, 73, 80, 32, 34, 37, 42, 49, 58, - 71, 79, 32, 34, 39, 44, 51, 60, 73, 78, 34, 35, 41, 48, 54, 63, 76, 81, - 35, 36, 45, 52, 59, 67, 79, 83, 36, 36, 46, 54, 60, 68, 80, 87, 39, 39, - 48, 58, 65, 73, 86, 88, 41, 40, 49, 60, 67, 76, 88, 93, 44, 42, 51, 63, - 71, 79, 92, 94, 47, 44, 53, 66, 75, 84, 97, 101, 48, 45, 54, 67, 76, 85, - 98, 101, 53, 50, 57, 71, 82, 92, 106, 108, 55, 51, 59, 72, 84, 94, 108, - 110, 58, 54, 61, 75, 87, 98, 112, 116, 63, 58, 65, 78, 91, 103, 118, - 119, 65, 59, 66, 79, 92, 105, 120, 124, 71, 64, 71, 84, 97, 111, 127, - 129, 74, 67, 73, 86, 100, 113, 131, 134, 79, 71, 77, 90, 104, 118, 136, - 139, 82, 73, 79, 92, 105, 120, 139, 142, 82, 74, 79, 92, 106, 121, 139, - 150, 87, 78, 83, 96, 110, 125, 144, 153, 89, 81, 83, 97, 113, 128, 145, - 157, 92, 83, 84, 97, 114, 132, 150, 157, 94, 85, 85, 97, 112, 130, 151, - 163, 97, 88, 86, 97, 111, 128, 147, 163, 99, 91, 87, 97, 110, 126, 144, - 163, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 32, 32, 32, 34, 35, 36, 39, 41, 44, 47, 48, 53, 55, - 58, 63, 65, 71, 74, 79, 82, 82, 87, 89, 92, 94, 97, 99, 31, 32, 32, 32, - 33, 33, 34, 34, 35, 36, 36, 39, 40, 42, 44, 45, 50, 51, 54, 58, 59, 64, - 67, 71, 73, 74, 78, 81, 83, 85, 88, 91, 35, 35, 34, 34, 35, 36, 37, 39, - 41, 45, 46, 48, 49, 51, 53, 54, 57, 59, 61, 65, 66, 71, 73, 77, 79, 79, - 83, 83, 84, 85, 86, 87, 44, 42, 41, 41, 42, 42, 42, 44, 48, 52, 54, 58, - 60, 63, 66, 67, 71, 72, 75, 78, 79, 84, 86, 90, 92, 92, 96, 97, 97, 97, - 97, 97, 53, 51, 50, 49, 49, 50, 49, 51, 54, 59, 60, 65, 67, 71, 75, 76, - 82, 84, 87, 91, 92, 97, 100, 104, 105, 106, 110, 113, 114, 112, 111, - 110, 65, 62, 61, 59, 59, 59, 58, 60, 63, 67, 68, 73, 76, 79, 84, 85, 92, - 94, 98, 103, 105, 111, 113, 118, 120, 121, 125, 128, 132, 130, 128, 126, - 82, 78, 76, 74, 73, 73, 71, 73, 76, 79, 80, 86, 88, 92, 97, 98, 106, - 108, 112, 118, 120, 127, 131, 136, 139, 139, 144, 145, 150, 151, 147, - 144, 90, 86, 85, 82, 81, 80, 79, 78, 81, 83, 87, 88, 93, 94, 101, 101, - 108, 110, 116, 119, 124, 129, 134, 139, 142, 150, 153, 157, 157, 163, - 163, 163 }, - { /* Chroma */ - /* Size 4x4 */ - 32, 45, 51, 61, 45, 54, 59, 65, 51, 59, 75, 81, 61, 65, 81, 97, - /* Size 8x8 */ - 31, 34, 46, 47, 50, 57, 61, 65, 34, 39, 47, 45, 48, 53, 57, 61, 46, 47, - 52, 52, 54, 58, 61, 62, 47, 45, 52, 58, 62, 65, 68, 68, 50, 48, 54, 62, - 68, 73, 77, 76, 57, 53, 58, 65, 73, 82, 86, 86, 61, 57, 61, 68, 77, 86, - 91, 95, 65, 61, 62, 68, 76, 86, 95, 100, - /* Size 16x16 */ - 32, 31, 33, 36, 41, 49, 49, 50, 52, 54, 57, 61, 64, 67, 68, 70, 31, 31, - 34, 39, 42, 47, 46, 47, 49, 51, 53, 57, 60, 62, 64, 66, 33, 34, 37, 42, - 44, 47, 46, 46, 47, 49, 51, 55, 57, 59, 61, 63, 36, 39, 42, 47, 47, 48, - 46, 46, 47, 48, 50, 53, 55, 57, 59, 61, 41, 42, 44, 47, 48, 50, 49, 50, - 50, 52, 53, 56, 58, 60, 61, 60, 49, 47, 47, 48, 50, 53, 53, 54, 54, 55, - 56, 59, 61, 63, 64, 64, 49, 46, 46, 46, 49, 53, 55, 57, 59, 60, 61, 64, - 66, 67, 67, 67, 50, 47, 46, 46, 50, 54, 57, 61, 63, 64, 66, 69, 70, 72, - 71, 71, 52, 49, 47, 47, 50, 54, 59, 63, 66, 68, 70, 73, 75, 77, 75, 75, - 54, 51, 49, 48, 52, 55, 60, 64, 68, 71, 73, 76, 79, 80, 79, 79, 57, 53, - 51, 50, 53, 56, 61, 66, 70, 73, 76, 80, 82, 84, 83, 84, 61, 57, 55, 53, - 56, 59, 64, 69, 73, 76, 80, 84, 87, 89, 88, 88, 64, 60, 57, 55, 58, 61, - 66, 70, 75, 79, 82, 87, 91, 93, 93, 93, 67, 62, 59, 57, 60, 63, 67, 72, - 77, 80, 84, 89, 93, 95, 96, 97, 68, 64, 61, 59, 61, 64, 67, 71, 75, 79, - 83, 88, 93, 96, 99, 100, 70, 66, 63, 61, 60, 64, 67, 71, 75, 79, 84, 88, - 93, 97, 100, 102, - /* Size 32x32 */ - 32, 31, 31, 30, 33, 33, 36, 38, 41, 47, 49, 48, 49, 49, 50, 50, 52, 53, - 54, 56, 57, 60, 61, 63, 64, 65, 67, 67, 68, 69, 70, 71, 31, 31, 31, 31, - 34, 34, 38, 40, 42, 46, 47, 47, 47, 47, 48, 48, 50, 50, 52, 54, 54, 57, - 58, 60, 61, 61, 63, 64, 65, 65, 66, 67, 31, 31, 31, 31, 34, 35, 39, 40, - 42, 46, 47, 46, 46, 46, 47, 47, 49, 50, 51, 53, 53, 56, 57, 59, 60, 60, - 62, 63, 64, 65, 66, 67, 30, 31, 31, 32, 34, 35, 40, 41, 42, 45, 46, 45, - 45, 45, 46, 46, 47, 48, 49, 51, 52, 54, 55, 57, 58, 58, 60, 61, 62, 62, - 63, 64, 33, 34, 34, 34, 37, 38, 42, 43, 44, 46, 47, 46, 46, 45, 46, 46, - 47, 48, 49, 51, 51, 53, 55, 56, 57, 57, 59, 60, 61, 62, 63, 64, 33, 34, - 35, 35, 38, 39, 43, 44, 45, 47, 47, 46, 46, 45, 46, 46, 47, 48, 49, 51, - 51, 53, 54, 56, 57, 57, 59, 60, 60, 61, 62, 62, 36, 38, 39, 40, 42, 43, - 47, 47, 47, 47, 48, 46, 46, 45, 46, 46, 47, 47, 48, 49, 50, 52, 53, 54, - 55, 55, 57, 58, 59, 60, 61, 62, 38, 40, 40, 41, 43, 44, 47, 47, 48, 48, - 49, 48, 47, 47, 47, 47, 48, 49, 49, 51, 51, 53, 54, 55, 56, 56, 58, 58, - 58, 59, 60, 60, 41, 42, 42, 42, 44, 45, 47, 48, 48, 50, 50, 49, 49, 49, - 50, 50, 50, 51, 52, 53, 53, 55, 56, 57, 58, 58, 60, 61, 61, 61, 60, 60, - 47, 46, 46, 45, 46, 47, 47, 48, 50, 52, 52, 52, 52, 52, 53, 53, 53, 54, - 55, 55, 56, 58, 58, 60, 60, 61, 62, 61, 61, 62, 63, 64, 49, 47, 47, 46, - 47, 47, 48, 49, 50, 52, 53, 53, 53, 53, 54, 54, 54, 55, 55, 56, 56, 58, - 59, 60, 61, 61, 63, 63, 64, 64, 64, 64, 48, 47, 46, 45, 46, 46, 46, 48, - 49, 52, 53, 54, 55, 55, 56, 56, 57, 58, 58, 59, 60, 61, 62, 63, 64, 64, - 66, 65, 65, 65, 66, 67, 49, 47, 46, 45, 46, 46, 46, 47, 49, 52, 53, 55, - 55, 57, 57, 58, 59, 59, 60, 61, 61, 63, 64, 65, 66, 66, 67, 67, 67, 68, - 67, 67, 49, 47, 46, 45, 45, 45, 45, 47, 49, 52, 53, 55, 57, 58, 59, 60, - 61, 62, 62, 63, 63, 65, 66, 67, 68, 68, 69, 70, 69, 68, 69, 70, 50, 48, - 47, 46, 46, 46, 46, 47, 50, 53, 54, 56, 57, 59, 61, 61, 63, 64, 64, 66, - 66, 68, 69, 70, 70, 71, 72, 70, 71, 72, 71, 70, 50, 48, 47, 46, 46, 46, - 46, 47, 50, 53, 54, 56, 58, 60, 61, 61, 63, 64, 65, 66, 67, 68, 69, 71, - 71, 71, 73, 74, 73, 72, 73, 74, 52, 50, 49, 47, 47, 47, 47, 48, 50, 53, - 54, 57, 59, 61, 63, 63, 66, 67, 68, 70, 70, 72, 73, 75, 75, 75, 77, 75, - 75, 76, 75, 74, 53, 50, 50, 48, 48, 48, 47, 49, 51, 54, 55, 58, 59, 62, - 64, 64, 67, 68, 69, 71, 71, 73, 74, 76, 77, 77, 78, 78, 78, 76, 77, 78, - 54, 52, 51, 49, 49, 49, 48, 49, 52, 55, 55, 58, 60, 62, 64, 65, 68, 69, - 71, 73, 73, 75, 76, 78, 79, 79, 80, 80, 79, 80, 79, 78, 56, 54, 53, 51, - 51, 51, 49, 51, 53, 55, 56, 59, 61, 63, 66, 66, 70, 71, 73, 75, 76, 78, - 79, 81, 82, 82, 83, 81, 83, 81, 81, 82, 57, 54, 53, 52, 51, 51, 50, 51, - 53, 56, 56, 60, 61, 63, 66, 67, 70, 71, 73, 76, 76, 79, 80, 82, 82, 83, - 84, 85, 83, 84, 84, 82, 60, 57, 56, 54, 53, 53, 52, 53, 55, 58, 58, 61, - 63, 65, 68, 68, 72, 73, 75, 78, 79, 82, 83, 85, 86, 86, 88, 86, 87, 86, - 85, 86, 61, 58, 57, 55, 55, 54, 53, 54, 56, 58, 59, 62, 64, 66, 69, 69, - 73, 74, 76, 79, 80, 83, 84, 86, 87, 88, 89, 89, 88, 88, 88, 86, 63, 60, - 59, 57, 56, 56, 54, 55, 57, 60, 60, 63, 65, 67, 70, 71, 75, 76, 78, 81, - 82, 85, 86, 89, 90, 90, 92, 91, 91, 90, 89, 91, 64, 61, 60, 58, 57, 57, - 55, 56, 58, 60, 61, 64, 66, 68, 70, 71, 75, 77, 79, 82, 82, 86, 87, 90, - 91, 91, 93, 93, 93, 92, 93, 91, 65, 61, 60, 58, 57, 57, 55, 56, 58, 61, - 61, 64, 66, 68, 71, 71, 75, 77, 79, 82, 83, 86, 88, 90, 91, 91, 93, 94, - 95, 95, 93, 95, 67, 63, 62, 60, 59, 59, 57, 58, 60, 62, 63, 66, 67, 69, - 72, 73, 77, 78, 80, 83, 84, 88, 89, 92, 93, 93, 95, 95, 96, 96, 97, 95, - 67, 64, 63, 61, 60, 60, 58, 58, 61, 61, 63, 65, 67, 70, 70, 74, 75, 78, - 80, 81, 85, 86, 89, 91, 93, 94, 95, 97, 97, 98, 98, 100, 68, 65, 64, 62, - 61, 60, 59, 58, 61, 61, 64, 65, 67, 69, 71, 73, 75, 78, 79, 83, 83, 87, - 88, 91, 93, 95, 96, 97, 99, 98, 100, 100, 69, 65, 65, 62, 62, 61, 60, - 59, 61, 62, 64, 65, 68, 68, 72, 72, 76, 76, 80, 81, 84, 86, 88, 90, 92, - 95, 96, 98, 98, 100, 100, 101, 70, 66, 66, 63, 63, 62, 61, 60, 60, 63, - 64, 66, 67, 69, 71, 73, 75, 77, 79, 81, 84, 85, 88, 89, 93, 93, 97, 98, - 100, 100, 102, 101, 71, 67, 67, 64, 64, 62, 62, 60, 60, 64, 64, 67, 67, - 70, 70, 74, 74, 78, 78, 82, 82, 86, 86, 91, 91, 95, 95, 100, 100, 101, - 101, 104, - /* Size 4x8 */ - 31, 47, 53, 63, 36, 47, 50, 59, 46, 52, 55, 61, 45, 53, 63, 70, 49, 55, - 71, 77, 54, 58, 77, 86, 59, 61, 81, 94, 63, 65, 80, 95, - /* Size 8x4 */ - 31, 36, 46, 45, 49, 54, 59, 63, 47, 47, 52, 53, 55, 58, 61, 65, 53, 50, - 55, 63, 71, 77, 81, 80, 63, 59, 61, 70, 77, 86, 94, 95, - /* Size 8x16 */ - 32, 33, 45, 49, 52, 57, 64, 68, 31, 34, 45, 46, 49, 53, 60, 64, 33, 37, - 46, 45, 47, 51, 57, 61, 37, 43, 47, 45, 47, 50, 55, 59, 42, 44, 49, 49, - 50, 53, 58, 60, 49, 47, 52, 53, 54, 57, 61, 63, 48, 46, 51, 57, 59, 61, - 66, 67, 50, 46, 52, 59, 63, 66, 71, 71, 52, 47, 53, 61, 66, 71, 75, 74, - 54, 49, 54, 62, 68, 73, 79, 79, 57, 51, 55, 64, 70, 76, 83, 83, 61, 55, - 58, 66, 73, 80, 87, 87, 64, 57, 60, 68, 75, 83, 91, 91, 66, 59, 61, 69, - 77, 84, 93, 95, 68, 61, 61, 68, 77, 86, 94, 97, 70, 63, 61, 67, 75, 83, - 92, 98, - /* Size 16x8 */ - 32, 31, 33, 37, 42, 49, 48, 50, 52, 54, 57, 61, 64, 66, 68, 70, 33, 34, - 37, 43, 44, 47, 46, 46, 47, 49, 51, 55, 57, 59, 61, 63, 45, 45, 46, 47, - 49, 52, 51, 52, 53, 54, 55, 58, 60, 61, 61, 61, 49, 46, 45, 45, 49, 53, - 57, 59, 61, 62, 64, 66, 68, 69, 68, 67, 52, 49, 47, 47, 50, 54, 59, 63, - 66, 68, 70, 73, 75, 77, 77, 75, 57, 53, 51, 50, 53, 57, 61, 66, 71, 73, - 76, 80, 83, 84, 86, 83, 64, 60, 57, 55, 58, 61, 66, 71, 75, 79, 83, 87, - 91, 93, 94, 92, 68, 64, 61, 59, 60, 63, 67, 71, 74, 79, 83, 87, 91, 95, - 97, 98, - /* Size 16x32 */ - 32, 31, 33, 37, 45, 48, 49, 50, 52, 56, 57, 63, 64, 67, 68, 68, 31, 31, - 34, 38, 45, 47, 47, 48, 50, 53, 54, 60, 61, 63, 64, 65, 31, 32, 34, 39, - 45, 46, 46, 47, 49, 52, 53, 59, 60, 62, 64, 65, 30, 32, 35, 40, 44, 46, - 45, 46, 48, 51, 52, 57, 58, 60, 61, 62, 33, 35, 37, 42, 46, 47, 45, 46, - 47, 50, 51, 56, 57, 60, 61, 62, 33, 36, 38, 43, 46, 47, 46, 46, 47, 50, - 51, 56, 57, 59, 60, 60, 37, 40, 43, 47, 47, 47, 45, 46, 47, 49, 50, 54, - 55, 57, 59, 61, 39, 41, 43, 47, 48, 48, 47, 47, 48, 50, 51, 55, 56, 57, - 58, 59, 42, 43, 44, 47, 49, 50, 49, 50, 50, 53, 53, 57, 58, 60, 60, 59, - 47, 46, 46, 48, 51, 52, 53, 53, 53, 55, 56, 60, 61, 61, 61, 62, 49, 46, - 47, 48, 52, 53, 53, 54, 54, 56, 57, 60, 61, 63, 63, 62, 48, 46, 46, 47, - 51, 53, 56, 56, 57, 59, 60, 64, 64, 65, 64, 65, 48, 45, 46, 46, 51, 53, - 57, 57, 59, 61, 61, 65, 66, 66, 67, 65, 49, 45, 45, 46, 51, 53, 58, 59, - 61, 63, 64, 67, 68, 70, 67, 68, 50, 46, 46, 46, 52, 54, 59, 61, 63, 65, - 66, 70, 71, 70, 71, 68, 50, 46, 46, 46, 52, 54, 59, 61, 64, 66, 67, 71, - 71, 73, 71, 72, 52, 48, 47, 47, 53, 54, 61, 63, 66, 70, 71, 75, 75, 75, - 74, 72, 53, 49, 48, 48, 53, 55, 61, 64, 67, 71, 72, 76, 77, 77, 75, 76, - 54, 50, 49, 49, 54, 55, 62, 65, 68, 72, 73, 78, 79, 80, 79, 76, 56, 51, - 51, 50, 55, 56, 63, 66, 70, 74, 76, 81, 82, 81, 80, 80, 57, 52, 51, 50, - 55, 56, 64, 66, 70, 75, 76, 82, 83, 85, 83, 80, 60, 54, 54, 52, 57, 58, - 65, 68, 72, 77, 79, 85, 86, 86, 85, 84, 61, 56, 55, 53, 58, 59, 66, 69, - 73, 79, 80, 86, 87, 89, 87, 84, 63, 57, 56, 55, 59, 60, 67, 70, 75, 80, - 82, 89, 90, 91, 89, 89, 64, 58, 57, 56, 60, 61, 68, 71, 75, 81, 83, 90, - 91, 93, 91, 89, 64, 59, 58, 56, 60, 61, 68, 71, 75, 81, 83, 90, 91, 94, - 94, 93, 66, 60, 59, 57, 61, 63, 69, 72, 77, 82, 84, 92, 93, 94, 95, 93, - 67, 61, 60, 58, 61, 63, 69, 70, 78, 80, 85, 90, 93, 96, 97, 97, 68, 62, - 61, 59, 61, 64, 68, 71, 77, 79, 86, 88, 94, 96, 97, 98, 69, 63, 62, 59, - 61, 65, 68, 72, 76, 80, 85, 88, 94, 95, 99, 99, 70, 63, 63, 60, 61, 66, - 67, 73, 75, 81, 83, 89, 92, 97, 98, 99, 70, 64, 64, 61, 61, 67, 67, 74, - 74, 82, 82, 90, 90, 98, 98, 102, - /* Size 32x16 */ - 32, 31, 31, 30, 33, 33, 37, 39, 42, 47, 49, 48, 48, 49, 50, 50, 52, 53, - 54, 56, 57, 60, 61, 63, 64, 64, 66, 67, 68, 69, 70, 70, 31, 31, 32, 32, - 35, 36, 40, 41, 43, 46, 46, 46, 45, 45, 46, 46, 48, 49, 50, 51, 52, 54, - 56, 57, 58, 59, 60, 61, 62, 63, 63, 64, 33, 34, 34, 35, 37, 38, 43, 43, - 44, 46, 47, 46, 46, 45, 46, 46, 47, 48, 49, 51, 51, 54, 55, 56, 57, 58, - 59, 60, 61, 62, 63, 64, 37, 38, 39, 40, 42, 43, 47, 47, 47, 48, 48, 47, - 46, 46, 46, 46, 47, 48, 49, 50, 50, 52, 53, 55, 56, 56, 57, 58, 59, 59, - 60, 61, 45, 45, 45, 44, 46, 46, 47, 48, 49, 51, 52, 51, 51, 51, 52, 52, - 53, 53, 54, 55, 55, 57, 58, 59, 60, 60, 61, 61, 61, 61, 61, 61, 48, 47, - 46, 46, 47, 47, 47, 48, 50, 52, 53, 53, 53, 53, 54, 54, 54, 55, 55, 56, - 56, 58, 59, 60, 61, 61, 63, 63, 64, 65, 66, 67, 49, 47, 46, 45, 45, 46, - 45, 47, 49, 53, 53, 56, 57, 58, 59, 59, 61, 61, 62, 63, 64, 65, 66, 67, - 68, 68, 69, 69, 68, 68, 67, 67, 50, 48, 47, 46, 46, 46, 46, 47, 50, 53, - 54, 56, 57, 59, 61, 61, 63, 64, 65, 66, 66, 68, 69, 70, 71, 71, 72, 70, - 71, 72, 73, 74, 52, 50, 49, 48, 47, 47, 47, 48, 50, 53, 54, 57, 59, 61, - 63, 64, 66, 67, 68, 70, 70, 72, 73, 75, 75, 75, 77, 78, 77, 76, 75, 74, - 56, 53, 52, 51, 50, 50, 49, 50, 53, 55, 56, 59, 61, 63, 65, 66, 70, 71, - 72, 74, 75, 77, 79, 80, 81, 81, 82, 80, 79, 80, 81, 82, 57, 54, 53, 52, - 51, 51, 50, 51, 53, 56, 57, 60, 61, 64, 66, 67, 71, 72, 73, 76, 76, 79, - 80, 82, 83, 83, 84, 85, 86, 85, 83, 82, 63, 60, 59, 57, 56, 56, 54, 55, - 57, 60, 60, 64, 65, 67, 70, 71, 75, 76, 78, 81, 82, 85, 86, 89, 90, 90, - 92, 90, 88, 88, 89, 90, 64, 61, 60, 58, 57, 57, 55, 56, 58, 61, 61, 64, - 66, 68, 71, 71, 75, 77, 79, 82, 83, 86, 87, 90, 91, 91, 93, 93, 94, 94, - 92, 90, 67, 63, 62, 60, 60, 59, 57, 57, 60, 61, 63, 65, 66, 70, 70, 73, - 75, 77, 80, 81, 85, 86, 89, 91, 93, 94, 94, 96, 96, 95, 97, 98, 68, 64, - 64, 61, 61, 60, 59, 58, 60, 61, 63, 64, 67, 67, 71, 71, 74, 75, 79, 80, - 83, 85, 87, 89, 91, 94, 95, 97, 97, 99, 98, 98, 68, 65, 65, 62, 62, 60, - 61, 59, 59, 62, 62, 65, 65, 68, 68, 72, 72, 76, 76, 80, 80, 84, 84, 89, - 89, 93, 93, 97, 98, 99, 99, 102, - /* Size 4x16 */ - 31, 48, 56, 67, 32, 46, 52, 62, 35, 47, 50, 60, 40, 47, 49, 57, 43, 50, - 53, 60, 46, 53, 56, 63, 45, 53, 61, 66, 46, 54, 65, 70, 48, 54, 70, 75, - 50, 55, 72, 80, 52, 56, 75, 85, 56, 59, 79, 89, 58, 61, 81, 93, 60, 63, - 82, 94, 62, 64, 79, 96, 63, 66, 81, 97, - /* Size 16x4 */ - 31, 32, 35, 40, 43, 46, 45, 46, 48, 50, 52, 56, 58, 60, 62, 63, 48, 46, - 47, 47, 50, 53, 53, 54, 54, 55, 56, 59, 61, 63, 64, 66, 56, 52, 50, 49, - 53, 56, 61, 65, 70, 72, 75, 79, 81, 82, 79, 81, 67, 62, 60, 57, 60, 63, - 66, 70, 75, 80, 85, 89, 93, 94, 96, 97, - /* Size 8x32 */ - 32, 33, 45, 49, 52, 57, 64, 68, 31, 34, 45, 47, 50, 54, 61, 64, 31, 34, - 45, 46, 49, 53, 60, 64, 30, 35, 44, 45, 48, 52, 58, 61, 33, 37, 46, 45, - 47, 51, 57, 61, 33, 38, 46, 46, 47, 51, 57, 60, 37, 43, 47, 45, 47, 50, - 55, 59, 39, 43, 48, 47, 48, 51, 56, 58, 42, 44, 49, 49, 50, 53, 58, 60, - 47, 46, 51, 53, 53, 56, 61, 61, 49, 47, 52, 53, 54, 57, 61, 63, 48, 46, - 51, 56, 57, 60, 64, 64, 48, 46, 51, 57, 59, 61, 66, 67, 49, 45, 51, 58, - 61, 64, 68, 67, 50, 46, 52, 59, 63, 66, 71, 71, 50, 46, 52, 59, 64, 67, - 71, 71, 52, 47, 53, 61, 66, 71, 75, 74, 53, 48, 53, 61, 67, 72, 77, 75, - 54, 49, 54, 62, 68, 73, 79, 79, 56, 51, 55, 63, 70, 76, 82, 80, 57, 51, - 55, 64, 70, 76, 83, 83, 60, 54, 57, 65, 72, 79, 86, 85, 61, 55, 58, 66, - 73, 80, 87, 87, 63, 56, 59, 67, 75, 82, 90, 89, 64, 57, 60, 68, 75, 83, - 91, 91, 64, 58, 60, 68, 75, 83, 91, 94, 66, 59, 61, 69, 77, 84, 93, 95, - 67, 60, 61, 69, 78, 85, 93, 97, 68, 61, 61, 68, 77, 86, 94, 97, 69, 62, - 61, 68, 76, 85, 94, 99, 70, 63, 61, 67, 75, 83, 92, 98, 70, 64, 61, 67, - 74, 82, 90, 98, - /* Size 32x8 */ - 32, 31, 31, 30, 33, 33, 37, 39, 42, 47, 49, 48, 48, 49, 50, 50, 52, 53, - 54, 56, 57, 60, 61, 63, 64, 64, 66, 67, 68, 69, 70, 70, 33, 34, 34, 35, - 37, 38, 43, 43, 44, 46, 47, 46, 46, 45, 46, 46, 47, 48, 49, 51, 51, 54, - 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 45, 45, 45, 44, 46, 46, 47, 48, - 49, 51, 52, 51, 51, 51, 52, 52, 53, 53, 54, 55, 55, 57, 58, 59, 60, 60, - 61, 61, 61, 61, 61, 61, 49, 47, 46, 45, 45, 46, 45, 47, 49, 53, 53, 56, - 57, 58, 59, 59, 61, 61, 62, 63, 64, 65, 66, 67, 68, 68, 69, 69, 68, 68, - 67, 67, 52, 50, 49, 48, 47, 47, 47, 48, 50, 53, 54, 57, 59, 61, 63, 64, - 66, 67, 68, 70, 70, 72, 73, 75, 75, 75, 77, 78, 77, 76, 75, 74, 57, 54, - 53, 52, 51, 51, 50, 51, 53, 56, 57, 60, 61, 64, 66, 67, 71, 72, 73, 76, - 76, 79, 80, 82, 83, 83, 84, 85, 86, 85, 83, 82, 64, 61, 60, 58, 57, 57, - 55, 56, 58, 61, 61, 64, 66, 68, 71, 71, 75, 77, 79, 82, 83, 86, 87, 90, - 91, 91, 93, 93, 94, 94, 92, 90, 68, 64, 64, 61, 61, 60, 59, 58, 60, 61, - 63, 64, 67, 67, 71, 71, 74, 75, 79, 80, 83, 85, 87, 89, 91, 94, 95, 97, - 97, 99, 98, 98 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 34, 53, 75, 34, 49, 64, 81, 53, 64, 91, 112, 75, 81, 112, 140, - /* Size 8x8 */ - 32, 32, 34, 39, 50, 62, 76, 84, 32, 33, 35, 40, 48, 59, 71, 79, 34, 35, - 39, 46, 53, 63, 74, 81, 39, 40, 46, 56, 65, 75, 86, 92, 50, 48, 53, 65, - 78, 90, 101, 106, 62, 59, 63, 75, 90, 105, 118, 123, 76, 71, 74, 86, - 101, 118, 134, 142, 84, 79, 81, 92, 106, 123, 142, 153, - /* Size 16x16 */ - 32, 31, 31, 32, 33, 36, 39, 44, 48, 54, 59, 66, 74, 81, 86, 91, 31, 32, - 32, 32, 33, 35, 38, 42, 46, 51, 56, 63, 70, 77, 81, 86, 31, 32, 32, 33, - 34, 35, 38, 41, 45, 49, 54, 60, 67, 73, 77, 82, 32, 32, 33, 34, 36, 37, - 40, 42, 45, 49, 53, 59, 66, 71, 75, 80, 33, 33, 34, 36, 38, 42, 44, 46, - 50, 53, 57, 63, 69, 74, 78, 80, 36, 35, 35, 37, 42, 48, 50, 54, 57, 60, - 64, 69, 75, 80, 84, 85, 39, 38, 38, 40, 44, 50, 54, 58, 61, 65, 69, 74, - 80, 85, 89, 91, 44, 42, 41, 42, 46, 54, 58, 63, 67, 71, 75, 80, 86, 91, - 95, 97, 48, 46, 45, 45, 50, 57, 61, 67, 71, 76, 80, 86, 93, 98, 101, - 104, 54, 51, 49, 49, 53, 60, 65, 71, 76, 82, 87, 93, 100, 105, 109, 112, - 59, 56, 54, 53, 57, 64, 69, 75, 80, 87, 92, 99, 106, 112, 116, 120, 66, - 63, 60, 59, 63, 69, 74, 80, 86, 93, 99, 107, 115, 121, 125, 129, 74, 70, - 67, 66, 69, 75, 80, 86, 93, 100, 106, 115, 123, 130, 135, 138, 81, 77, - 73, 71, 74, 80, 85, 91, 98, 105, 112, 121, 130, 137, 142, 148, 86, 81, - 77, 75, 78, 84, 89, 95, 101, 109, 116, 125, 135, 142, 147, 153, 91, 86, - 82, 80, 80, 85, 91, 97, 104, 112, 120, 129, 138, 148, 153, 159, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 31, 32, 32, 33, 34, 36, 36, 39, 41, 44, 46, 48, 52, - 54, 58, 59, 65, 66, 71, 74, 80, 81, 83, 86, 89, 91, 93, 31, 32, 32, 32, - 32, 32, 32, 32, 33, 34, 35, 35, 38, 39, 42, 44, 46, 50, 51, 56, 56, 62, - 63, 68, 71, 76, 77, 78, 82, 84, 86, 88, 31, 32, 32, 32, 32, 32, 32, 32, - 33, 34, 35, 35, 38, 39, 42, 44, 46, 49, 51, 55, 56, 61, 63, 67, 70, 75, - 77, 78, 81, 84, 86, 88, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, - 37, 38, 41, 42, 44, 48, 49, 53, 54, 59, 60, 65, 68, 72, 74, 75, 78, 80, - 82, 84, 31, 32, 32, 32, 32, 33, 33, 33, 34, 34, 35, 35, 38, 39, 41, 43, - 45, 48, 49, 53, 54, 59, 60, 65, 67, 72, 73, 74, 77, 80, 82, 84, 31, 32, - 32, 32, 33, 33, 33, 34, 35, 35, 36, 36, 39, 40, 42, 44, 45, 48, 50, 53, - 54, 59, 60, 64, 67, 71, 73, 74, 77, 79, 81, 83, 32, 32, 32, 32, 33, 33, - 34, 35, 36, 36, 37, 38, 40, 40, 42, 44, 45, 48, 49, 53, 53, 58, 59, 63, - 66, 70, 71, 72, 75, 78, 80, 83, 32, 32, 32, 32, 33, 34, 35, 35, 36, 37, - 38, 38, 40, 41, 42, 44, 46, 48, 49, 53, 53, 58, 59, 63, 65, 69, 71, 72, - 74, 77, 79, 80, 33, 33, 33, 33, 34, 35, 36, 36, 38, 39, 42, 42, 44, 45, - 46, 48, 50, 52, 53, 57, 57, 62, 63, 67, 69, 73, 74, 75, 78, 79, 80, 81, - 34, 34, 34, 33, 34, 35, 36, 37, 39, 39, 42, 43, 45, 46, 47, 49, 51, 53, - 54, 58, 58, 63, 64, 68, 70, 74, 75, 76, 79, 81, 84, 86, 36, 35, 35, 34, - 35, 36, 37, 38, 42, 42, 48, 48, 50, 51, 54, 55, 57, 59, 60, 63, 64, 68, - 69, 73, 75, 79, 80, 81, 84, 85, 85, 86, 36, 35, 35, 34, 35, 36, 38, 38, - 42, 43, 48, 49, 51, 52, 54, 55, 57, 59, 60, 64, 64, 68, 69, 73, 75, 79, - 80, 81, 84, 86, 88, 91, 39, 38, 38, 37, 38, 39, 40, 40, 44, 45, 50, 51, - 54, 55, 58, 59, 61, 64, 65, 68, 69, 73, 74, 78, 80, 84, 85, 86, 89, 91, - 91, 91, 41, 39, 39, 38, 39, 40, 40, 41, 45, 46, 51, 52, 55, 56, 59, 61, - 63, 65, 67, 70, 70, 75, 76, 80, 82, 86, 87, 88, 91, 92, 94, 96, 44, 42, - 42, 41, 41, 42, 42, 42, 46, 47, 54, 54, 58, 59, 63, 65, 67, 70, 71, 75, - 75, 79, 80, 84, 86, 90, 91, 92, 95, 97, 97, 97, 46, 44, 44, 42, 43, 44, - 44, 44, 48, 49, 55, 55, 59, 61, 65, 67, 69, 72, 74, 77, 78, 82, 83, 87, - 89, 93, 94, 95, 98, 98, 100, 103, 48, 46, 46, 44, 45, 45, 45, 46, 50, - 51, 57, 57, 61, 63, 67, 69, 71, 74, 76, 80, 80, 85, 86, 90, 93, 96, 98, - 99, 101, 104, 104, 103, 52, 50, 49, 48, 48, 48, 48, 48, 52, 53, 59, 59, - 64, 65, 70, 72, 74, 78, 80, 84, 85, 90, 91, 95, 97, 101, 103, 104, 106, - 106, 107, 110, 54, 51, 51, 49, 49, 50, 49, 49, 53, 54, 60, 60, 65, 67, - 71, 74, 76, 80, 82, 86, 87, 92, 93, 97, 100, 104, 105, 106, 109, 112, - 112, 110, 58, 56, 55, 53, 53, 53, 53, 53, 57, 58, 63, 64, 68, 70, 75, - 77, 80, 84, 86, 91, 91, 97, 98, 103, 105, 110, 111, 112, 115, 114, 115, - 118, 59, 56, 56, 54, 54, 54, 53, 53, 57, 58, 64, 64, 69, 70, 75, 78, 80, - 85, 87, 91, 92, 98, 99, 103, 106, 110, 112, 113, 116, 119, 120, 119, 65, - 62, 61, 59, 59, 59, 58, 58, 62, 63, 68, 68, 73, 75, 79, 82, 85, 90, 92, - 97, 98, 105, 106, 111, 114, 118, 120, 121, 124, 123, 123, 126, 66, 63, - 63, 60, 60, 60, 59, 59, 63, 64, 69, 69, 74, 76, 80, 83, 86, 91, 93, 98, - 99, 106, 107, 112, 115, 119, 121, 122, 125, 128, 129, 126, 71, 68, 67, - 65, 65, 64, 63, 63, 67, 68, 73, 73, 78, 80, 84, 87, 90, 95, 97, 103, - 103, 111, 112, 117, 120, 125, 127, 128, 131, 132, 132, 135, 74, 71, 70, - 68, 67, 67, 66, 65, 69, 70, 75, 75, 80, 82, 86, 89, 93, 97, 100, 105, - 106, 114, 115, 120, 123, 128, 130, 131, 135, 135, 138, 136, 80, 76, 75, - 72, 72, 71, 70, 69, 73, 74, 79, 79, 84, 86, 90, 93, 96, 101, 104, 110, - 110, 118, 119, 125, 128, 134, 136, 137, 140, 142, 140, 144, 81, 77, 77, - 74, 73, 73, 71, 71, 74, 75, 80, 80, 85, 87, 91, 94, 98, 103, 105, 111, - 112, 120, 121, 127, 130, 136, 137, 139, 142, 145, 148, 144, 83, 78, 78, - 75, 74, 74, 72, 72, 75, 76, 81, 81, 86, 88, 92, 95, 99, 104, 106, 112, - 113, 121, 122, 128, 131, 137, 139, 140, 144, 148, 150, 155, 86, 82, 81, - 78, 77, 77, 75, 74, 78, 79, 84, 84, 89, 91, 95, 98, 101, 106, 109, 115, - 116, 124, 125, 131, 135, 140, 142, 144, 147, 149, 153, 155, 89, 84, 84, - 80, 80, 79, 78, 77, 79, 81, 85, 86, 91, 92, 97, 98, 104, 106, 112, 114, - 119, 123, 128, 132, 135, 142, 145, 148, 149, 153, 154, 159, 91, 86, 86, - 82, 82, 81, 80, 79, 80, 84, 85, 88, 91, 94, 97, 100, 104, 107, 112, 115, - 120, 123, 129, 132, 138, 140, 148, 150, 153, 154, 159, 159, 93, 88, 88, - 84, 84, 83, 83, 80, 81, 86, 86, 91, 91, 96, 97, 103, 103, 110, 110, 118, - 119, 126, 126, 135, 136, 144, 144, 155, 155, 159, 159, 164, - /* Size 4x8 */ - 32, 35, 51, 77, 32, 36, 50, 72, 34, 42, 54, 75, 38, 51, 67, 87, 48, 59, - 80, 103, 60, 68, 92, 119, 72, 79, 104, 135, 81, 86, 112, 144, - /* Size 8x4 */ - 32, 32, 34, 38, 48, 60, 72, 81, 35, 36, 42, 51, 59, 68, 79, 86, 51, 50, - 54, 67, 80, 92, 104, 112, 77, 72, 75, 87, 103, 119, 135, 144, - /* Size 8x16 */ - 32, 31, 33, 40, 51, 65, 79, 87, 31, 32, 33, 39, 49, 61, 74, 82, 31, 32, - 34, 38, 47, 59, 71, 79, 32, 33, 36, 40, 48, 58, 69, 77, 33, 34, 38, 44, - 52, 62, 72, 78, 36, 35, 42, 51, 58, 68, 78, 84, 39, 38, 44, 54, 63, 73, - 84, 89, 44, 41, 46, 59, 69, 79, 90, 96, 48, 45, 50, 62, 74, 85, 96, 103, - 53, 49, 53, 66, 79, 92, 103, 111, 58, 54, 57, 70, 84, 98, 110, 118, 66, - 60, 63, 75, 90, 106, 119, 126, 74, 67, 69, 81, 97, 113, 128, 134, 81, - 73, 75, 86, 102, 120, 135, 143, 86, 78, 78, 90, 106, 124, 140, 147, 91, - 82, 80, 90, 103, 119, 137, 151, - /* Size 16x8 */ - 32, 31, 31, 32, 33, 36, 39, 44, 48, 53, 58, 66, 74, 81, 86, 91, 31, 32, - 32, 33, 34, 35, 38, 41, 45, 49, 54, 60, 67, 73, 78, 82, 33, 33, 34, 36, - 38, 42, 44, 46, 50, 53, 57, 63, 69, 75, 78, 80, 40, 39, 38, 40, 44, 51, - 54, 59, 62, 66, 70, 75, 81, 86, 90, 90, 51, 49, 47, 48, 52, 58, 63, 69, - 74, 79, 84, 90, 97, 102, 106, 103, 65, 61, 59, 58, 62, 68, 73, 79, 85, - 92, 98, 106, 113, 120, 124, 119, 79, 74, 71, 69, 72, 78, 84, 90, 96, - 103, 110, 119, 128, 135, 140, 137, 87, 82, 79, 77, 78, 84, 89, 96, 103, - 111, 118, 126, 134, 143, 147, 151, - /* Size 16x32 */ - 32, 31, 31, 32, 33, 36, 40, 44, 51, 53, 65, 66, 79, 81, 87, 90, 31, 32, - 32, 32, 33, 35, 39, 42, 49, 51, 62, 63, 75, 77, 83, 85, 31, 32, 32, 32, - 33, 35, 39, 42, 49, 51, 61, 62, 74, 76, 82, 85, 31, 32, 32, 33, 33, 34, - 38, 41, 47, 49, 59, 60, 72, 74, 79, 81, 31, 32, 32, 33, 34, 35, 38, 41, - 47, 49, 59, 60, 71, 73, 79, 81, 32, 32, 33, 34, 35, 36, 39, 42, 48, 50, - 59, 60, 71, 72, 78, 80, 32, 32, 33, 35, 36, 37, 40, 42, 48, 49, 58, 59, - 69, 71, 77, 80, 32, 33, 33, 35, 36, 38, 41, 42, 48, 49, 58, 59, 69, 70, - 75, 77, 33, 33, 34, 36, 38, 41, 44, 46, 52, 53, 62, 63, 72, 74, 78, 78, - 34, 34, 34, 37, 39, 42, 45, 48, 53, 54, 63, 64, 73, 75, 80, 83, 36, 34, - 35, 38, 42, 48, 51, 54, 58, 60, 68, 69, 78, 80, 84, 83, 36, 35, 35, 38, - 42, 48, 51, 54, 59, 60, 68, 69, 79, 80, 85, 87, 39, 37, 38, 40, 44, 50, - 54, 58, 63, 65, 73, 74, 84, 85, 89, 88, 40, 38, 39, 41, 45, 51, 56, 59, - 65, 67, 75, 76, 85, 87, 90, 93, 44, 41, 41, 43, 46, 53, 59, 63, 69, 71, - 79, 80, 90, 91, 96, 93, 46, 43, 43, 44, 48, 55, 60, 65, 72, 73, 82, 83, - 93, 94, 97, 100, 48, 45, 45, 46, 50, 56, 62, 67, 74, 76, 85, 86, 96, 98, - 103, 100, 52, 48, 48, 49, 52, 59, 65, 70, 78, 80, 90, 91, 101, 103, 105, - 107, 53, 49, 49, 50, 53, 60, 66, 71, 79, 82, 92, 93, 103, 105, 111, 107, - 58, 53, 53, 53, 57, 63, 69, 74, 83, 86, 97, 98, 109, 111, 113, 115, 58, - 54, 54, 54, 57, 63, 70, 75, 84, 87, 98, 99, 110, 112, 118, 115, 65, 60, - 59, 58, 62, 68, 74, 79, 89, 92, 105, 106, 118, 119, 122, 123, 66, 61, - 60, 59, 63, 69, 75, 80, 90, 93, 106, 107, 119, 121, 126, 123, 71, 65, - 65, 63, 67, 73, 79, 84, 94, 97, 111, 112, 125, 127, 131, 132, 74, 68, - 67, 66, 69, 75, 81, 86, 97, 100, 113, 115, 128, 130, 134, 132, 79, 72, - 72, 70, 73, 79, 85, 90, 101, 104, 118, 119, 133, 135, 141, 140, 81, 74, - 73, 71, 75, 80, 86, 91, 102, 105, 120, 121, 135, 137, 143, 140, 82, 75, - 74, 72, 75, 81, 87, 92, 103, 106, 121, 122, 136, 139, 147, 151, 86, 78, - 78, 75, 78, 84, 90, 95, 106, 109, 124, 125, 140, 142, 147, 151, 88, 81, - 80, 77, 80, 86, 90, 98, 105, 112, 122, 127, 140, 144, 152, 155, 91, 83, - 82, 79, 80, 88, 90, 100, 103, 114, 119, 130, 137, 148, 151, 155, 93, 85, - 85, 81, 81, 90, 90, 102, 103, 117, 117, 134, 134, 151, 152, 160, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 32, 32, 32, 33, 34, 36, 36, 39, 40, 44, 46, 48, 52, - 53, 58, 58, 65, 66, 71, 74, 79, 81, 82, 86, 88, 91, 93, 31, 32, 32, 32, - 32, 32, 32, 33, 33, 34, 34, 35, 37, 38, 41, 43, 45, 48, 49, 53, 54, 60, - 61, 65, 68, 72, 74, 75, 78, 81, 83, 85, 31, 32, 32, 32, 32, 33, 33, 33, - 34, 34, 35, 35, 38, 39, 41, 43, 45, 48, 49, 53, 54, 59, 60, 65, 67, 72, - 73, 74, 78, 80, 82, 85, 32, 32, 32, 33, 33, 34, 35, 35, 36, 37, 38, 38, - 40, 41, 43, 44, 46, 49, 50, 53, 54, 58, 59, 63, 66, 70, 71, 72, 75, 77, - 79, 81, 33, 33, 33, 33, 34, 35, 36, 36, 38, 39, 42, 42, 44, 45, 46, 48, - 50, 52, 53, 57, 57, 62, 63, 67, 69, 73, 75, 75, 78, 80, 80, 81, 36, 35, - 35, 34, 35, 36, 37, 38, 41, 42, 48, 48, 50, 51, 53, 55, 56, 59, 60, 63, - 63, 68, 69, 73, 75, 79, 80, 81, 84, 86, 88, 90, 40, 39, 39, 38, 38, 39, - 40, 41, 44, 45, 51, 51, 54, 56, 59, 60, 62, 65, 66, 69, 70, 74, 75, 79, - 81, 85, 86, 87, 90, 90, 90, 90, 44, 42, 42, 41, 41, 42, 42, 42, 46, 48, - 54, 54, 58, 59, 63, 65, 67, 70, 71, 74, 75, 79, 80, 84, 86, 90, 91, 92, - 95, 98, 100, 102, 51, 49, 49, 47, 47, 48, 48, 48, 52, 53, 58, 59, 63, - 65, 69, 72, 74, 78, 79, 83, 84, 89, 90, 94, 97, 101, 102, 103, 106, 105, - 103, 103, 53, 51, 51, 49, 49, 50, 49, 49, 53, 54, 60, 60, 65, 67, 71, - 73, 76, 80, 82, 86, 87, 92, 93, 97, 100, 104, 105, 106, 109, 112, 114, - 117, 65, 62, 61, 59, 59, 59, 58, 58, 62, 63, 68, 68, 73, 75, 79, 82, 85, - 90, 92, 97, 98, 105, 106, 111, 113, 118, 120, 121, 124, 122, 119, 117, - 66, 63, 62, 60, 60, 60, 59, 59, 63, 64, 69, 69, 74, 76, 80, 83, 86, 91, - 93, 98, 99, 106, 107, 112, 115, 119, 121, 122, 125, 127, 130, 134, 79, - 75, 74, 72, 71, 71, 69, 69, 72, 73, 78, 79, 84, 85, 90, 93, 96, 101, - 103, 109, 110, 118, 119, 125, 128, 133, 135, 136, 140, 140, 137, 134, - 81, 77, 76, 74, 73, 72, 71, 70, 74, 75, 80, 80, 85, 87, 91, 94, 98, 103, - 105, 111, 112, 119, 121, 127, 130, 135, 137, 139, 142, 144, 148, 151, - 87, 83, 82, 79, 79, 78, 77, 75, 78, 80, 84, 85, 89, 90, 96, 97, 103, - 105, 111, 113, 118, 122, 126, 131, 134, 141, 143, 147, 147, 152, 151, - 152, 90, 85, 85, 81, 81, 80, 80, 77, 78, 83, 83, 87, 88, 93, 93, 100, - 100, 107, 107, 115, 115, 123, 123, 132, 132, 140, 140, 151, 151, 155, - 155, 160, - /* Size 4x16 */ - 31, 36, 53, 81, 32, 35, 51, 76, 32, 35, 49, 73, 32, 37, 49, 71, 33, 41, - 53, 74, 34, 48, 60, 80, 37, 50, 65, 85, 41, 53, 71, 91, 45, 56, 76, 98, - 49, 60, 82, 105, 54, 63, 87, 112, 61, 69, 93, 121, 68, 75, 100, 130, 74, - 80, 105, 137, 78, 84, 109, 142, 83, 88, 114, 148, - /* Size 16x4 */ - 31, 32, 32, 32, 33, 34, 37, 41, 45, 49, 54, 61, 68, 74, 78, 83, 36, 35, - 35, 37, 41, 48, 50, 53, 56, 60, 63, 69, 75, 80, 84, 88, 53, 51, 49, 49, - 53, 60, 65, 71, 76, 82, 87, 93, 100, 105, 109, 114, 81, 76, 73, 71, 74, - 80, 85, 91, 98, 105, 112, 121, 130, 137, 142, 148, - /* Size 8x32 */ - 32, 31, 33, 40, 51, 65, 79, 87, 31, 32, 33, 39, 49, 62, 75, 83, 31, 32, - 33, 39, 49, 61, 74, 82, 31, 32, 33, 38, 47, 59, 72, 79, 31, 32, 34, 38, - 47, 59, 71, 79, 32, 33, 35, 39, 48, 59, 71, 78, 32, 33, 36, 40, 48, 58, - 69, 77, 32, 33, 36, 41, 48, 58, 69, 75, 33, 34, 38, 44, 52, 62, 72, 78, - 34, 34, 39, 45, 53, 63, 73, 80, 36, 35, 42, 51, 58, 68, 78, 84, 36, 35, - 42, 51, 59, 68, 79, 85, 39, 38, 44, 54, 63, 73, 84, 89, 40, 39, 45, 56, - 65, 75, 85, 90, 44, 41, 46, 59, 69, 79, 90, 96, 46, 43, 48, 60, 72, 82, - 93, 97, 48, 45, 50, 62, 74, 85, 96, 103, 52, 48, 52, 65, 78, 90, 101, - 105, 53, 49, 53, 66, 79, 92, 103, 111, 58, 53, 57, 69, 83, 97, 109, 113, - 58, 54, 57, 70, 84, 98, 110, 118, 65, 59, 62, 74, 89, 105, 118, 122, 66, - 60, 63, 75, 90, 106, 119, 126, 71, 65, 67, 79, 94, 111, 125, 131, 74, - 67, 69, 81, 97, 113, 128, 134, 79, 72, 73, 85, 101, 118, 133, 141, 81, - 73, 75, 86, 102, 120, 135, 143, 82, 74, 75, 87, 103, 121, 136, 147, 86, - 78, 78, 90, 106, 124, 140, 147, 88, 80, 80, 90, 105, 122, 140, 152, 91, - 82, 80, 90, 103, 119, 137, 151, 93, 85, 81, 90, 103, 117, 134, 152, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 32, 32, 32, 33, 34, 36, 36, 39, 40, 44, 46, 48, 52, - 53, 58, 58, 65, 66, 71, 74, 79, 81, 82, 86, 88, 91, 93, 31, 32, 32, 32, - 32, 33, 33, 33, 34, 34, 35, 35, 38, 39, 41, 43, 45, 48, 49, 53, 54, 59, - 60, 65, 67, 72, 73, 74, 78, 80, 82, 85, 33, 33, 33, 33, 34, 35, 36, 36, - 38, 39, 42, 42, 44, 45, 46, 48, 50, 52, 53, 57, 57, 62, 63, 67, 69, 73, - 75, 75, 78, 80, 80, 81, 40, 39, 39, 38, 38, 39, 40, 41, 44, 45, 51, 51, - 54, 56, 59, 60, 62, 65, 66, 69, 70, 74, 75, 79, 81, 85, 86, 87, 90, 90, - 90, 90, 51, 49, 49, 47, 47, 48, 48, 48, 52, 53, 58, 59, 63, 65, 69, 72, - 74, 78, 79, 83, 84, 89, 90, 94, 97, 101, 102, 103, 106, 105, 103, 103, - 65, 62, 61, 59, 59, 59, 58, 58, 62, 63, 68, 68, 73, 75, 79, 82, 85, 90, - 92, 97, 98, 105, 106, 111, 113, 118, 120, 121, 124, 122, 119, 117, 79, - 75, 74, 72, 71, 71, 69, 69, 72, 73, 78, 79, 84, 85, 90, 93, 96, 101, - 103, 109, 110, 118, 119, 125, 128, 133, 135, 136, 140, 140, 137, 134, - 87, 83, 82, 79, 79, 78, 77, 75, 78, 80, 84, 85, 89, 90, 96, 97, 103, - 105, 111, 113, 118, 122, 126, 131, 134, 141, 143, 147, 147, 152, 151, - 152 }, - { /* Chroma */ - /* Size 4x4 */ - 32, 46, 49, 58, 46, 53, 55, 62, 49, 55, 70, 78, 58, 62, 78, 91, - /* Size 8x8 */ - 31, 34, 42, 47, 49, 54, 60, 64, 34, 39, 45, 46, 47, 51, 56, 59, 42, 45, - 48, 49, 50, 53, 57, 60, 47, 46, 49, 55, 58, 61, 65, 66, 49, 47, 50, 58, - 65, 69, 73, 74, 54, 51, 53, 61, 69, 76, 82, 83, 60, 56, 57, 65, 73, 82, - 89, 92, 64, 59, 60, 66, 74, 83, 92, 96, - /* Size 16x16 */ - 32, 31, 31, 35, 40, 49, 48, 49, 50, 52, 54, 57, 61, 64, 66, 68, 31, 31, - 32, 37, 41, 47, 47, 46, 48, 49, 51, 54, 57, 60, 62, 64, 31, 32, 34, 39, - 43, 46, 46, 45, 46, 47, 49, 52, 55, 57, 59, 61, 35, 37, 39, 44, 46, 47, - 46, 45, 46, 47, 48, 51, 53, 56, 57, 59, 40, 41, 43, 46, 48, 50, 49, 48, - 49, 49, 51, 53, 55, 57, 59, 59, 49, 47, 46, 47, 50, 53, 53, 53, 54, 54, - 55, 57, 59, 61, 62, 62, 48, 47, 46, 46, 49, 53, 54, 55, 56, 57, 58, 60, - 62, 64, 65, 65, 49, 46, 45, 45, 48, 53, 55, 58, 60, 61, 62, 64, 66, 68, - 69, 69, 50, 48, 46, 46, 49, 54, 56, 60, 61, 63, 65, 67, 69, 71, 72, 72, - 52, 49, 47, 47, 49, 54, 57, 61, 63, 66, 68, 71, 73, 75, 76, 77, 54, 51, - 49, 48, 51, 55, 58, 62, 65, 68, 71, 74, 76, 78, 80, 81, 57, 54, 52, 51, - 53, 57, 60, 64, 67, 71, 74, 77, 80, 83, 84, 85, 61, 57, 55, 53, 55, 59, - 62, 66, 69, 73, 76, 80, 84, 87, 89, 89, 64, 60, 57, 56, 57, 61, 64, 68, - 71, 75, 78, 83, 87, 90, 92, 94, 66, 62, 59, 57, 59, 62, 65, 69, 72, 76, - 80, 84, 89, 92, 94, 96, 68, 64, 61, 59, 59, 62, 65, 69, 72, 77, 81, 85, - 89, 94, 96, 98, - /* Size 32x32 */ - 32, 31, 31, 30, 31, 33, 35, 36, 40, 41, 49, 49, 48, 48, 49, 50, 50, 52, - 52, 54, 54, 57, 57, 60, 61, 63, 64, 65, 66, 67, 68, 69, 31, 31, 31, 31, - 32, 34, 37, 38, 41, 42, 47, 47, 47, 47, 47, 47, 48, 49, 50, 52, 52, 54, - 55, 57, 58, 60, 61, 61, 63, 64, 64, 65, 31, 31, 31, 31, 32, 35, 37, 39, - 41, 42, 47, 47, 47, 46, 46, 47, 48, 49, 49, 51, 51, 54, 54, 56, 57, 59, - 60, 61, 62, 63, 64, 65, 30, 31, 31, 32, 33, 35, 38, 40, 42, 42, 46, 46, - 45, 45, 45, 45, 46, 47, 47, 49, 49, 52, 52, 54, 55, 57, 58, 58, 60, 61, - 61, 62, 31, 32, 32, 33, 34, 37, 39, 41, 43, 43, 46, 46, 46, 45, 45, 46, - 46, 47, 47, 49, 49, 51, 52, 54, 55, 57, 57, 58, 59, 60, 61, 62, 33, 34, - 35, 35, 37, 39, 41, 43, 44, 45, 47, 47, 46, 46, 45, 46, 46, 47, 47, 49, - 49, 51, 51, 53, 54, 56, 57, 57, 58, 59, 60, 61, 35, 37, 37, 38, 39, 41, - 44, 46, 46, 46, 47, 47, 46, 46, 45, 46, 46, 47, 47, 48, 48, 50, 51, 52, - 53, 55, 56, 56, 57, 58, 59, 61, 36, 38, 39, 40, 41, 43, 46, 47, 47, 47, - 48, 47, 46, 46, 45, 46, 46, 46, 47, 48, 48, 50, 50, 52, 53, 54, 55, 55, - 56, 57, 58, 58, 40, 41, 41, 42, 43, 44, 46, 47, 48, 48, 50, 49, 49, 49, - 48, 49, 49, 49, 49, 51, 51, 52, 53, 54, 55, 57, 57, 58, 59, 59, 59, 59, - 41, 42, 42, 42, 43, 45, 46, 47, 48, 48, 50, 50, 49, 49, 49, 49, 50, 50, - 50, 52, 52, 53, 53, 55, 56, 57, 58, 58, 59, 60, 61, 62, 49, 47, 47, 46, - 46, 47, 47, 48, 50, 50, 53, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 56, - 57, 58, 59, 60, 61, 61, 62, 62, 62, 62, 49, 47, 47, 46, 46, 47, 47, 47, - 49, 50, 53, 53, 53, 53, 54, 54, 54, 54, 54, 55, 56, 57, 57, 59, 59, 61, - 61, 62, 63, 63, 64, 65, 48, 47, 47, 45, 46, 46, 46, 46, 49, 49, 53, 53, - 54, 54, 55, 56, 56, 57, 57, 58, 58, 60, 60, 61, 62, 63, 64, 64, 65, 66, - 65, 65, 48, 47, 46, 45, 45, 46, 46, 46, 49, 49, 53, 53, 54, 55, 56, 57, - 57, 58, 58, 59, 60, 61, 61, 63, 63, 65, 65, 65, 66, 66, 67, 68, 49, 47, - 46, 45, 45, 45, 45, 45, 48, 49, 53, 54, 55, 56, 58, 59, 60, 61, 61, 62, - 62, 63, 64, 65, 66, 67, 68, 68, 69, 70, 69, 68, 50, 47, 47, 45, 46, 46, - 46, 46, 49, 49, 54, 54, 56, 57, 59, 60, 60, 62, 62, 63, 64, 65, 65, 67, - 68, 69, 69, 70, 70, 70, 71, 71, 50, 48, 48, 46, 46, 46, 46, 46, 49, 50, - 54, 54, 56, 57, 60, 60, 61, 63, 63, 65, 65, 67, 67, 68, 69, 71, 71, 71, - 72, 73, 72, 71, 52, 49, 49, 47, 47, 47, 47, 46, 49, 50, 54, 54, 57, 58, - 61, 62, 63, 65, 65, 67, 67, 69, 70, 71, 72, 73, 74, 74, 75, 74, 74, 75, - 52, 50, 49, 47, 47, 47, 47, 47, 49, 50, 54, 54, 57, 58, 61, 62, 63, 65, - 66, 68, 68, 70, 71, 72, 73, 75, 75, 75, 76, 77, 77, 75, 54, 52, 51, 49, - 49, 49, 48, 48, 51, 52, 55, 55, 58, 59, 62, 63, 65, 67, 68, 70, 70, 73, - 73, 75, 76, 78, 78, 78, 79, 78, 78, 79, 54, 52, 51, 49, 49, 49, 48, 48, - 51, 52, 55, 56, 58, 60, 62, 64, 65, 67, 68, 70, 71, 73, 74, 75, 76, 78, - 78, 79, 80, 81, 81, 79, 57, 54, 54, 52, 51, 51, 50, 50, 52, 53, 56, 57, - 60, 61, 63, 65, 67, 69, 70, 73, 73, 76, 77, 79, 80, 82, 82, 83, 84, 83, - 82, 83, 57, 55, 54, 52, 52, 51, 51, 50, 53, 53, 57, 57, 60, 61, 64, 65, - 67, 70, 71, 73, 74, 77, 77, 79, 80, 82, 83, 83, 84, 85, 85, 83, 60, 57, - 56, 54, 54, 53, 52, 52, 54, 55, 58, 59, 61, 63, 65, 67, 68, 71, 72, 75, - 75, 79, 79, 82, 83, 85, 86, 86, 87, 87, 86, 87, 61, 58, 57, 55, 55, 54, - 53, 53, 55, 56, 59, 59, 62, 63, 66, 68, 69, 72, 73, 76, 76, 80, 80, 83, - 84, 86, 87, 88, 89, 89, 89, 87, 63, 60, 59, 57, 57, 56, 55, 54, 57, 57, - 60, 61, 63, 65, 67, 69, 71, 73, 75, 78, 78, 82, 82, 85, 86, 89, 89, 90, - 91, 92, 90, 91, 64, 61, 60, 58, 57, 57, 56, 55, 57, 58, 61, 61, 64, 65, - 68, 69, 71, 74, 75, 78, 78, 82, 83, 86, 87, 89, 90, 91, 92, 93, 94, 91, - 65, 61, 61, 58, 58, 57, 56, 55, 58, 58, 61, 62, 64, 65, 68, 70, 71, 74, - 75, 78, 79, 83, 83, 86, 88, 90, 91, 91, 93, 94, 94, 96, 66, 63, 62, 60, - 59, 58, 57, 56, 59, 59, 62, 63, 65, 66, 69, 70, 72, 75, 76, 79, 80, 84, - 84, 87, 89, 91, 92, 93, 94, 94, 96, 96, 67, 64, 63, 61, 60, 59, 58, 57, - 59, 60, 62, 63, 66, 66, 70, 70, 73, 74, 77, 78, 81, 83, 85, 87, 89, 92, - 93, 94, 94, 96, 96, 97, 68, 64, 64, 61, 61, 60, 59, 58, 59, 61, 62, 64, - 65, 67, 69, 71, 72, 74, 77, 78, 81, 82, 85, 86, 89, 90, 94, 94, 96, 96, - 98, 97, 69, 65, 65, 62, 62, 61, 61, 58, 59, 62, 62, 65, 65, 68, 68, 71, - 71, 75, 75, 79, 79, 83, 83, 87, 87, 91, 91, 96, 96, 97, 97, 99, - /* Size 4x8 */ - 31, 47, 50, 61, 36, 47, 47, 57, 43, 50, 50, 58, 45, 53, 58, 65, 47, 54, - 66, 74, 52, 56, 70, 82, 57, 60, 75, 90, 61, 63, 77, 93, - /* Size 8x4 */ - 31, 36, 43, 45, 47, 52, 57, 61, 47, 47, 50, 53, 54, 56, 60, 63, 50, 47, - 50, 58, 66, 70, 75, 77, 61, 57, 58, 65, 74, 82, 90, 93, - /* Size 8x16 */ - 32, 32, 40, 49, 51, 57, 63, 67, 31, 33, 41, 47, 49, 54, 59, 63, 31, 35, - 43, 46, 47, 51, 57, 60, 35, 39, 46, 46, 47, 50, 55, 58, 41, 43, 48, 49, - 49, 52, 57, 59, 49, 47, 50, 53, 54, 57, 60, 62, 48, 46, 49, 54, 57, 60, - 64, 65, 49, 45, 48, 56, 61, 64, 67, 69, 50, 46, 49, 57, 63, 67, 71, 73, - 52, 48, 50, 58, 65, 71, 75, 77, 54, 50, 51, 59, 67, 73, 78, 81, 57, 52, - 53, 61, 69, 77, 82, 85, 61, 55, 56, 63, 72, 80, 86, 88, 64, 58, 58, 65, - 73, 82, 89, 92, 66, 59, 59, 66, 75, 84, 91, 94, 68, 61, 59, 65, 72, 81, - 89, 95, - /* Size 16x8 */ - 32, 31, 31, 35, 41, 49, 48, 49, 50, 52, 54, 57, 61, 64, 66, 68, 32, 33, - 35, 39, 43, 47, 46, 45, 46, 48, 50, 52, 55, 58, 59, 61, 40, 41, 43, 46, - 48, 50, 49, 48, 49, 50, 51, 53, 56, 58, 59, 59, 49, 47, 46, 46, 49, 53, - 54, 56, 57, 58, 59, 61, 63, 65, 66, 65, 51, 49, 47, 47, 49, 54, 57, 61, - 63, 65, 67, 69, 72, 73, 75, 72, 57, 54, 51, 50, 52, 57, 60, 64, 67, 71, - 73, 77, 80, 82, 84, 81, 63, 59, 57, 55, 57, 60, 64, 67, 71, 75, 78, 82, - 86, 89, 91, 89, 67, 63, 60, 58, 59, 62, 65, 69, 73, 77, 81, 85, 88, 92, - 94, 95, - /* Size 16x32 */ - 32, 31, 32, 37, 40, 48, 49, 49, 51, 52, 57, 58, 63, 64, 67, 67, 31, 31, - 33, 38, 41, 47, 47, 47, 49, 50, 54, 55, 60, 61, 63, 64, 31, 31, 33, 38, - 41, 47, 47, 47, 49, 49, 54, 54, 59, 60, 63, 64, 30, 32, 33, 40, 42, 46, - 45, 45, 47, 48, 52, 52, 57, 58, 60, 61, 31, 33, 35, 41, 43, 46, 46, 45, - 47, 48, 51, 52, 57, 57, 60, 61, 33, 36, 37, 43, 44, 47, 46, 46, 47, 47, - 51, 52, 56, 57, 59, 60, 35, 38, 39, 45, 46, 47, 46, 45, 47, 47, 50, 51, - 55, 56, 58, 60, 37, 40, 41, 47, 47, 47, 46, 45, 46, 47, 50, 50, 54, 55, - 57, 58, 41, 42, 43, 47, 48, 49, 49, 48, 49, 50, 52, 53, 57, 57, 59, 58, - 42, 43, 43, 47, 48, 50, 49, 49, 50, 50, 53, 54, 57, 58, 60, 61, 49, 46, - 47, 48, 50, 53, 53, 53, 54, 54, 57, 57, 60, 61, 62, 61, 49, 46, 47, 48, - 50, 53, 53, 54, 54, 55, 57, 57, 61, 61, 63, 64, 48, 46, 46, 47, 49, 53, - 54, 56, 57, 57, 60, 60, 64, 64, 65, 64, 48, 45, 46, 46, 49, 53, 55, 56, - 58, 58, 61, 61, 65, 65, 66, 67, 49, 45, 45, 46, 48, 53, 56, 58, 61, 61, - 64, 64, 67, 68, 69, 67, 49, 46, 46, 46, 49, 53, 57, 59, 62, 62, 65, 66, - 69, 69, 70, 70, 50, 46, 46, 46, 49, 54, 57, 59, 63, 64, 67, 67, 71, 71, - 73, 71, 51, 47, 47, 47, 49, 54, 58, 61, 64, 66, 69, 70, 73, 74, 74, 74, - 52, 48, 48, 47, 50, 54, 58, 61, 65, 66, 71, 71, 75, 75, 77, 74, 54, 50, - 49, 48, 51, 55, 59, 62, 67, 68, 73, 73, 77, 78, 78, 78, 54, 50, 50, 49, - 51, 55, 59, 62, 67, 68, 73, 74, 78, 78, 81, 78, 57, 52, 52, 50, 52, 56, - 60, 64, 69, 70, 76, 77, 82, 82, 83, 82, 57, 52, 52, 51, 53, 57, 61, 64, - 69, 71, 77, 77, 82, 83, 85, 82, 60, 54, 54, 52, 55, 58, 62, 65, 71, 72, - 79, 79, 85, 86, 87, 86, 61, 56, 55, 53, 56, 59, 63, 66, 72, 73, 80, 81, - 86, 87, 88, 86, 63, 57, 57, 55, 57, 60, 64, 67, 73, 75, 82, 82, 89, 90, - 92, 90, 64, 58, 58, 55, 58, 61, 65, 68, 73, 75, 82, 83, 89, 90, 92, 90, - 64, 59, 58, 56, 58, 61, 65, 68, 74, 75, 83, 83, 90, 91, 94, 95, 66, 60, - 59, 57, 59, 62, 66, 69, 75, 76, 84, 85, 91, 92, 94, 95, 67, 61, 60, 58, - 59, 63, 66, 70, 74, 77, 82, 85, 91, 93, 96, 96, 68, 62, 61, 58, 59, 64, - 65, 71, 72, 78, 81, 86, 89, 94, 95, 96, 68, 62, 62, 59, 59, 65, 65, 71, - 71, 79, 79, 87, 87, 95, 95, 98, - /* Size 32x16 */ - 32, 31, 31, 30, 31, 33, 35, 37, 41, 42, 49, 49, 48, 48, 49, 49, 50, 51, - 52, 54, 54, 57, 57, 60, 61, 63, 64, 64, 66, 67, 68, 68, 31, 31, 31, 32, - 33, 36, 38, 40, 42, 43, 46, 46, 46, 45, 45, 46, 46, 47, 48, 50, 50, 52, - 52, 54, 56, 57, 58, 59, 60, 61, 62, 62, 32, 33, 33, 33, 35, 37, 39, 41, - 43, 43, 47, 47, 46, 46, 45, 46, 46, 47, 48, 49, 50, 52, 52, 54, 55, 57, - 58, 58, 59, 60, 61, 62, 37, 38, 38, 40, 41, 43, 45, 47, 47, 47, 48, 48, - 47, 46, 46, 46, 46, 47, 47, 48, 49, 50, 51, 52, 53, 55, 55, 56, 57, 58, - 58, 59, 40, 41, 41, 42, 43, 44, 46, 47, 48, 48, 50, 50, 49, 49, 48, 49, - 49, 49, 50, 51, 51, 52, 53, 55, 56, 57, 58, 58, 59, 59, 59, 59, 48, 47, - 47, 46, 46, 47, 47, 47, 49, 50, 53, 53, 53, 53, 53, 53, 54, 54, 54, 55, - 55, 56, 57, 58, 59, 60, 61, 61, 62, 63, 64, 65, 49, 47, 47, 45, 46, 46, - 46, 46, 49, 49, 53, 53, 54, 55, 56, 57, 57, 58, 58, 59, 59, 60, 61, 62, - 63, 64, 65, 65, 66, 66, 65, 65, 49, 47, 47, 45, 45, 46, 45, 45, 48, 49, - 53, 54, 56, 56, 58, 59, 59, 61, 61, 62, 62, 64, 64, 65, 66, 67, 68, 68, - 69, 70, 71, 71, 51, 49, 49, 47, 47, 47, 47, 46, 49, 50, 54, 54, 57, 58, - 61, 62, 63, 64, 65, 67, 67, 69, 69, 71, 72, 73, 73, 74, 75, 74, 72, 71, - 52, 50, 49, 48, 48, 47, 47, 47, 50, 50, 54, 55, 57, 58, 61, 62, 64, 66, - 66, 68, 68, 70, 71, 72, 73, 75, 75, 75, 76, 77, 78, 79, 57, 54, 54, 52, - 51, 51, 50, 50, 52, 53, 57, 57, 60, 61, 64, 65, 67, 69, 71, 73, 73, 76, - 77, 79, 80, 82, 82, 83, 84, 82, 81, 79, 58, 55, 54, 52, 52, 52, 51, 50, - 53, 54, 57, 57, 60, 61, 64, 66, 67, 70, 71, 73, 74, 77, 77, 79, 81, 82, - 83, 83, 85, 85, 86, 87, 63, 60, 59, 57, 57, 56, 55, 54, 57, 57, 60, 61, - 64, 65, 67, 69, 71, 73, 75, 77, 78, 82, 82, 85, 86, 89, 89, 90, 91, 91, - 89, 87, 64, 61, 60, 58, 57, 57, 56, 55, 57, 58, 61, 61, 64, 65, 68, 69, - 71, 74, 75, 78, 78, 82, 83, 86, 87, 90, 90, 91, 92, 93, 94, 95, 67, 63, - 63, 60, 60, 59, 58, 57, 59, 60, 62, 63, 65, 66, 69, 70, 73, 74, 77, 78, - 81, 83, 85, 87, 88, 92, 92, 94, 94, 96, 95, 95, 67, 64, 64, 61, 61, 60, - 60, 58, 58, 61, 61, 64, 64, 67, 67, 70, 71, 74, 74, 78, 78, 82, 82, 86, - 86, 90, 90, 95, 95, 96, 96, 98, - /* Size 4x16 */ - 31, 48, 52, 64, 31, 47, 49, 60, 33, 46, 48, 57, 38, 47, 47, 56, 42, 49, - 50, 57, 46, 53, 54, 61, 46, 53, 57, 64, 45, 53, 61, 68, 46, 54, 64, 71, - 48, 54, 66, 75, 50, 55, 68, 78, 52, 57, 71, 83, 56, 59, 73, 87, 58, 61, - 75, 90, 60, 62, 76, 92, 62, 64, 78, 94, - /* Size 16x4 */ - 31, 31, 33, 38, 42, 46, 46, 45, 46, 48, 50, 52, 56, 58, 60, 62, 48, 47, - 46, 47, 49, 53, 53, 53, 54, 54, 55, 57, 59, 61, 62, 64, 52, 49, 48, 47, - 50, 54, 57, 61, 64, 66, 68, 71, 73, 75, 76, 78, 64, 60, 57, 56, 57, 61, - 64, 68, 71, 75, 78, 83, 87, 90, 92, 94, - /* Size 8x32 */ - 32, 32, 40, 49, 51, 57, 63, 67, 31, 33, 41, 47, 49, 54, 60, 63, 31, 33, - 41, 47, 49, 54, 59, 63, 30, 33, 42, 45, 47, 52, 57, 60, 31, 35, 43, 46, - 47, 51, 57, 60, 33, 37, 44, 46, 47, 51, 56, 59, 35, 39, 46, 46, 47, 50, - 55, 58, 37, 41, 47, 46, 46, 50, 54, 57, 41, 43, 48, 49, 49, 52, 57, 59, - 42, 43, 48, 49, 50, 53, 57, 60, 49, 47, 50, 53, 54, 57, 60, 62, 49, 47, - 50, 53, 54, 57, 61, 63, 48, 46, 49, 54, 57, 60, 64, 65, 48, 46, 49, 55, - 58, 61, 65, 66, 49, 45, 48, 56, 61, 64, 67, 69, 49, 46, 49, 57, 62, 65, - 69, 70, 50, 46, 49, 57, 63, 67, 71, 73, 51, 47, 49, 58, 64, 69, 73, 74, - 52, 48, 50, 58, 65, 71, 75, 77, 54, 49, 51, 59, 67, 73, 77, 78, 54, 50, - 51, 59, 67, 73, 78, 81, 57, 52, 52, 60, 69, 76, 82, 83, 57, 52, 53, 61, - 69, 77, 82, 85, 60, 54, 55, 62, 71, 79, 85, 87, 61, 55, 56, 63, 72, 80, - 86, 88, 63, 57, 57, 64, 73, 82, 89, 92, 64, 58, 58, 65, 73, 82, 89, 92, - 64, 58, 58, 65, 74, 83, 90, 94, 66, 59, 59, 66, 75, 84, 91, 94, 67, 60, - 59, 66, 74, 82, 91, 96, 68, 61, 59, 65, 72, 81, 89, 95, 68, 62, 59, 65, - 71, 79, 87, 95, - /* Size 32x8 */ - 32, 31, 31, 30, 31, 33, 35, 37, 41, 42, 49, 49, 48, 48, 49, 49, 50, 51, - 52, 54, 54, 57, 57, 60, 61, 63, 64, 64, 66, 67, 68, 68, 32, 33, 33, 33, - 35, 37, 39, 41, 43, 43, 47, 47, 46, 46, 45, 46, 46, 47, 48, 49, 50, 52, - 52, 54, 55, 57, 58, 58, 59, 60, 61, 62, 40, 41, 41, 42, 43, 44, 46, 47, - 48, 48, 50, 50, 49, 49, 48, 49, 49, 49, 50, 51, 51, 52, 53, 55, 56, 57, - 58, 58, 59, 59, 59, 59, 49, 47, 47, 45, 46, 46, 46, 46, 49, 49, 53, 53, - 54, 55, 56, 57, 57, 58, 58, 59, 59, 60, 61, 62, 63, 64, 65, 65, 66, 66, - 65, 65, 51, 49, 49, 47, 47, 47, 47, 46, 49, 50, 54, 54, 57, 58, 61, 62, - 63, 64, 65, 67, 67, 69, 69, 71, 72, 73, 73, 74, 75, 74, 72, 71, 57, 54, - 54, 52, 51, 51, 50, 50, 52, 53, 57, 57, 60, 61, 64, 65, 67, 69, 71, 73, - 73, 76, 77, 79, 80, 82, 82, 83, 84, 82, 81, 79, 63, 60, 59, 57, 57, 56, - 55, 54, 57, 57, 60, 61, 64, 65, 67, 69, 71, 73, 75, 77, 78, 82, 82, 85, - 86, 89, 89, 90, 91, 91, 89, 87, 67, 63, 63, 60, 60, 59, 58, 57, 59, 60, - 62, 63, 65, 66, 69, 70, 73, 74, 77, 78, 81, 83, 85, 87, 88, 92, 92, 94, - 94, 96, 95, 95 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 34, 49, 72, 34, 48, 60, 79, 49, 60, 82, 104, 72, 79, 104, 134, - /* Size 8x8 */ - 32, 32, 34, 38, 46, 56, 68, 78, 32, 33, 35, 39, 45, 54, 64, 74, 34, 35, - 39, 45, 51, 58, 68, 76, 38, 39, 45, 54, 61, 69, 78, 86, 46, 45, 51, 61, - 71, 80, 90, 99, 56, 54, 58, 69, 80, 92, 103, 113, 68, 64, 68, 78, 90, - 103, 117, 128, 78, 74, 76, 86, 99, 113, 128, 140, - /* Size 16x16 */ - 32, 31, 31, 31, 32, 34, 36, 39, 44, 48, 54, 59, 65, 71, 80, 83, 31, 32, - 32, 32, 32, 34, 35, 38, 42, 46, 51, 56, 62, 68, 76, 78, 31, 32, 32, 32, - 32, 33, 34, 37, 41, 44, 49, 54, 59, 65, 72, 75, 31, 32, 32, 33, 34, 35, - 36, 39, 42, 45, 50, 54, 59, 64, 71, 74, 32, 32, 32, 34, 35, 37, 38, 40, - 42, 46, 49, 53, 58, 63, 69, 72, 34, 34, 33, 35, 37, 39, 42, 45, 47, 51, - 54, 58, 63, 68, 74, 76, 36, 35, 34, 36, 38, 42, 48, 50, 54, 57, 60, 64, - 68, 73, 79, 81, 39, 38, 37, 39, 40, 45, 50, 54, 58, 61, 65, 69, 73, 78, - 84, 86, 44, 42, 41, 42, 42, 47, 54, 58, 63, 67, 71, 75, 79, 84, 90, 92, - 48, 46, 44, 45, 46, 51, 57, 61, 67, 71, 76, 80, 85, 90, 96, 99, 54, 51, - 49, 50, 49, 54, 60, 65, 71, 76, 82, 87, 92, 97, 104, 106, 59, 56, 54, - 54, 53, 58, 64, 69, 75, 80, 87, 92, 98, 103, 110, 113, 65, 62, 59, 59, - 58, 63, 68, 73, 79, 85, 92, 98, 105, 111, 118, 121, 71, 68, 65, 64, 63, - 68, 73, 78, 84, 90, 97, 103, 111, 117, 125, 128, 80, 76, 72, 71, 69, 74, - 79, 84, 90, 96, 104, 110, 118, 125, 134, 137, 83, 78, 75, 74, 72, 76, - 81, 86, 92, 99, 106, 113, 121, 128, 137, 140, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 31, 31, 32, 32, 34, 34, 36, 36, 39, 39, 44, 44, 48, - 48, 54, 54, 59, 59, 65, 65, 71, 71, 80, 80, 83, 83, 87, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 34, 34, 35, 35, 38, 38, 42, 42, 46, 46, 51, 51, 56, - 56, 62, 62, 68, 68, 76, 76, 78, 78, 83, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 34, 34, 35, 35, 38, 38, 42, 42, 46, 46, 51, 51, 56, 56, 62, 62, 68, - 68, 76, 76, 78, 78, 83, 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, - 34, 37, 37, 41, 41, 44, 44, 49, 49, 54, 54, 59, 59, 65, 65, 72, 72, 75, - 75, 79, 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 37, 37, 41, - 41, 44, 44, 49, 49, 54, 54, 59, 59, 65, 65, 72, 72, 75, 75, 79, 31, 32, - 32, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 39, 39, 42, 42, 45, 45, 50, - 50, 54, 54, 59, 59, 64, 64, 71, 71, 74, 74, 77, 31, 32, 32, 32, 32, 33, - 33, 34, 34, 35, 35, 36, 36, 39, 39, 42, 42, 45, 45, 50, 50, 54, 54, 59, - 59, 64, 64, 71, 71, 74, 74, 77, 32, 32, 32, 32, 32, 34, 34, 35, 35, 37, - 37, 38, 38, 40, 40, 42, 42, 46, 46, 49, 49, 53, 53, 58, 58, 63, 63, 69, - 69, 72, 72, 75, 32, 32, 32, 32, 32, 34, 34, 35, 35, 37, 37, 38, 38, 40, - 40, 42, 42, 46, 46, 49, 49, 53, 53, 58, 58, 63, 63, 69, 69, 72, 72, 75, - 34, 34, 34, 33, 33, 35, 35, 37, 37, 39, 39, 42, 42, 45, 45, 47, 47, 51, - 51, 54, 54, 58, 58, 63, 63, 68, 68, 74, 74, 76, 76, 80, 34, 34, 34, 33, - 33, 35, 35, 37, 37, 39, 39, 42, 42, 45, 45, 47, 47, 51, 51, 54, 54, 58, - 58, 63, 63, 68, 68, 74, 74, 76, 76, 80, 36, 35, 35, 34, 34, 36, 36, 38, - 38, 42, 42, 48, 48, 50, 50, 54, 54, 57, 57, 60, 60, 64, 64, 68, 68, 73, - 73, 79, 79, 81, 81, 84, 36, 35, 35, 34, 34, 36, 36, 38, 38, 42, 42, 48, - 48, 50, 50, 54, 54, 57, 57, 60, 60, 64, 64, 68, 68, 73, 73, 79, 79, 81, - 81, 84, 39, 38, 38, 37, 37, 39, 39, 40, 40, 45, 45, 50, 50, 54, 54, 58, - 58, 61, 61, 65, 65, 69, 69, 73, 73, 78, 78, 84, 84, 86, 86, 90, 39, 38, - 38, 37, 37, 39, 39, 40, 40, 45, 45, 50, 50, 54, 54, 58, 58, 61, 61, 65, - 65, 69, 69, 73, 73, 78, 78, 84, 84, 86, 86, 90, 44, 42, 42, 41, 41, 42, - 42, 42, 42, 47, 47, 54, 54, 58, 58, 63, 63, 67, 67, 71, 71, 75, 75, 79, - 79, 84, 84, 90, 90, 92, 92, 96, 44, 42, 42, 41, 41, 42, 42, 42, 42, 47, - 47, 54, 54, 58, 58, 63, 63, 67, 67, 71, 71, 75, 75, 79, 79, 84, 84, 90, - 90, 92, 92, 96, 48, 46, 46, 44, 44, 45, 45, 46, 46, 51, 51, 57, 57, 61, - 61, 67, 67, 71, 71, 76, 76, 80, 80, 85, 85, 90, 90, 96, 96, 99, 99, 102, - 48, 46, 46, 44, 44, 45, 45, 46, 46, 51, 51, 57, 57, 61, 61, 67, 67, 71, - 71, 76, 76, 80, 80, 85, 85, 90, 90, 96, 96, 99, 99, 102, 54, 51, 51, 49, - 49, 50, 50, 49, 49, 54, 54, 60, 60, 65, 65, 71, 71, 76, 76, 82, 82, 87, - 87, 92, 92, 97, 97, 104, 104, 106, 106, 109, 54, 51, 51, 49, 49, 50, 50, - 49, 49, 54, 54, 60, 60, 65, 65, 71, 71, 76, 76, 82, 82, 87, 87, 92, 92, - 97, 97, 104, 104, 106, 106, 109, 59, 56, 56, 54, 54, 54, 54, 53, 53, 58, - 58, 64, 64, 69, 69, 75, 75, 80, 80, 87, 87, 92, 92, 98, 98, 103, 103, - 110, 110, 113, 113, 116, 59, 56, 56, 54, 54, 54, 54, 53, 53, 58, 58, 64, - 64, 69, 69, 75, 75, 80, 80, 87, 87, 92, 92, 98, 98, 103, 103, 110, 110, - 113, 113, 116, 65, 62, 62, 59, 59, 59, 59, 58, 58, 63, 63, 68, 68, 73, - 73, 79, 79, 85, 85, 92, 92, 98, 98, 105, 105, 111, 111, 118, 118, 121, - 121, 124, 65, 62, 62, 59, 59, 59, 59, 58, 58, 63, 63, 68, 68, 73, 73, - 79, 79, 85, 85, 92, 92, 98, 98, 105, 105, 111, 111, 118, 118, 121, 121, - 124, 71, 68, 68, 65, 65, 64, 64, 63, 63, 68, 68, 73, 73, 78, 78, 84, 84, - 90, 90, 97, 97, 103, 103, 111, 111, 117, 117, 125, 125, 128, 128, 132, - 71, 68, 68, 65, 65, 64, 64, 63, 63, 68, 68, 73, 73, 78, 78, 84, 84, 90, - 90, 97, 97, 103, 103, 111, 111, 117, 117, 125, 125, 128, 128, 132, 80, - 76, 76, 72, 72, 71, 71, 69, 69, 74, 74, 79, 79, 84, 84, 90, 90, 96, 96, - 104, 104, 110, 110, 118, 118, 125, 125, 134, 134, 137, 137, 141, 80, 76, - 76, 72, 72, 71, 71, 69, 69, 74, 74, 79, 79, 84, 84, 90, 90, 96, 96, 104, - 104, 110, 110, 118, 118, 125, 125, 134, 134, 137, 137, 141, 83, 78, 78, - 75, 75, 74, 74, 72, 72, 76, 76, 81, 81, 86, 86, 92, 92, 99, 99, 106, - 106, 113, 113, 121, 121, 128, 128, 137, 137, 140, 140, 144, 83, 78, 78, - 75, 75, 74, 74, 72, 72, 76, 76, 81, 81, 86, 86, 92, 92, 99, 99, 106, - 106, 113, 113, 121, 121, 128, 128, 137, 137, 140, 140, 144, 87, 83, 83, - 79, 79, 77, 77, 75, 75, 80, 80, 84, 84, 90, 90, 96, 96, 102, 102, 109, - 109, 116, 116, 124, 124, 132, 132, 141, 141, 144, 144, 149, - /* Size 4x8 */ - 32, 35, 51, 75, 32, 36, 50, 71, 34, 42, 54, 73, 37, 50, 65, 84, 45, 56, - 76, 96, 54, 63, 87, 110, 65, 73, 97, 125, 75, 81, 106, 136, - /* Size 8x4 */ - 32, 32, 34, 37, 45, 54, 65, 75, 35, 36, 42, 50, 56, 63, 73, 81, 51, 50, - 54, 65, 76, 87, 97, 106, 75, 71, 73, 84, 96, 110, 125, 136, - /* Size 8x16 */ - 32, 31, 32, 36, 44, 53, 65, 79, 31, 32, 32, 35, 42, 51, 62, 75, 31, 32, - 33, 34, 41, 49, 59, 72, 32, 32, 34, 36, 42, 50, 59, 71, 32, 33, 35, 38, - 42, 49, 58, 69, 34, 34, 37, 42, 48, 54, 63, 73, 36, 34, 38, 48, 54, 60, - 68, 78, 39, 37, 40, 50, 58, 65, 73, 84, 44, 41, 43, 53, 63, 71, 79, 90, - 48, 45, 46, 56, 67, 76, 85, 96, 53, 49, 50, 60, 71, 82, 92, 103, 58, 54, - 54, 63, 75, 87, 98, 110, 65, 60, 58, 68, 79, 92, 105, 118, 71, 65, 63, - 73, 84, 97, 111, 125, 79, 72, 70, 79, 90, 104, 118, 133, 82, 75, 72, 81, - 92, 106, 121, 136, - /* Size 16x8 */ - 32, 31, 31, 32, 32, 34, 36, 39, 44, 48, 53, 58, 65, 71, 79, 82, 31, 32, - 32, 32, 33, 34, 34, 37, 41, 45, 49, 54, 60, 65, 72, 75, 32, 32, 33, 34, - 35, 37, 38, 40, 43, 46, 50, 54, 58, 63, 70, 72, 36, 35, 34, 36, 38, 42, - 48, 50, 53, 56, 60, 63, 68, 73, 79, 81, 44, 42, 41, 42, 42, 48, 54, 58, - 63, 67, 71, 75, 79, 84, 90, 92, 53, 51, 49, 50, 49, 54, 60, 65, 71, 76, - 82, 87, 92, 97, 104, 106, 65, 62, 59, 59, 58, 63, 68, 73, 79, 85, 92, - 98, 105, 111, 118, 121, 79, 75, 72, 71, 69, 73, 78, 84, 90, 96, 103, - 110, 118, 125, 133, 136, - /* Size 16x32 */ - 32, 31, 31, 32, 32, 36, 36, 44, 44, 53, 53, 65, 65, 79, 79, 87, 31, 32, - 32, 32, 32, 35, 35, 42, 42, 51, 51, 62, 62, 75, 75, 82, 31, 32, 32, 32, - 32, 35, 35, 42, 42, 51, 51, 62, 62, 75, 75, 82, 31, 32, 32, 33, 33, 34, - 34, 41, 41, 49, 49, 59, 59, 72, 72, 78, 31, 32, 32, 33, 33, 34, 34, 41, - 41, 49, 49, 59, 59, 72, 72, 78, 32, 32, 32, 34, 34, 36, 36, 42, 42, 50, - 50, 59, 59, 71, 71, 77, 32, 32, 32, 34, 34, 36, 36, 42, 42, 50, 50, 59, - 59, 71, 71, 77, 32, 33, 33, 35, 35, 38, 38, 42, 42, 49, 49, 58, 58, 69, - 69, 75, 32, 33, 33, 35, 35, 38, 38, 42, 42, 49, 49, 58, 58, 69, 69, 75, - 34, 34, 34, 37, 37, 42, 42, 48, 48, 54, 54, 63, 63, 73, 73, 79, 34, 34, - 34, 37, 37, 42, 42, 48, 48, 54, 54, 63, 63, 73, 73, 79, 36, 34, 34, 38, - 38, 48, 48, 54, 54, 60, 60, 68, 68, 78, 78, 84, 36, 34, 34, 38, 38, 48, - 48, 54, 54, 60, 60, 68, 68, 78, 78, 84, 39, 37, 37, 40, 40, 50, 50, 58, - 58, 65, 65, 73, 73, 84, 84, 89, 39, 37, 37, 40, 40, 50, 50, 58, 58, 65, - 65, 73, 73, 84, 84, 89, 44, 41, 41, 43, 43, 53, 53, 63, 63, 71, 71, 79, - 79, 90, 90, 95, 44, 41, 41, 43, 43, 53, 53, 63, 63, 71, 71, 79, 79, 90, - 90, 95, 48, 45, 45, 46, 46, 56, 56, 67, 67, 76, 76, 85, 85, 96, 96, 102, - 48, 45, 45, 46, 46, 56, 56, 67, 67, 76, 76, 85, 85, 96, 96, 102, 53, 49, - 49, 50, 50, 60, 60, 71, 71, 82, 82, 92, 92, 103, 103, 109, 53, 49, 49, - 50, 50, 60, 60, 71, 71, 82, 82, 92, 92, 103, 103, 109, 58, 54, 54, 54, - 54, 63, 63, 75, 75, 87, 87, 98, 98, 110, 110, 116, 58, 54, 54, 54, 54, - 63, 63, 75, 75, 87, 87, 98, 98, 110, 110, 116, 65, 60, 60, 58, 58, 68, - 68, 79, 79, 92, 92, 105, 105, 118, 118, 124, 65, 60, 60, 58, 58, 68, 68, - 79, 79, 92, 92, 105, 105, 118, 118, 124, 71, 65, 65, 63, 63, 73, 73, 84, - 84, 97, 97, 111, 111, 125, 125, 132, 71, 65, 65, 63, 63, 73, 73, 84, 84, - 97, 97, 111, 111, 125, 125, 132, 79, 72, 72, 70, 70, 79, 79, 90, 90, - 104, 104, 118, 118, 133, 133, 141, 79, 72, 72, 70, 70, 79, 79, 90, 90, - 104, 104, 118, 118, 133, 133, 141, 82, 75, 75, 72, 72, 81, 81, 92, 92, - 106, 106, 121, 121, 136, 136, 144, 82, 75, 75, 72, 72, 81, 81, 92, 92, - 106, 106, 121, 121, 136, 136, 144, 87, 79, 79, 76, 76, 84, 84, 96, 96, - 109, 109, 124, 124, 141, 141, 149, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 32, 32, 32, 32, 34, 34, 36, 36, 39, 39, 44, 44, 48, - 48, 53, 53, 58, 58, 65, 65, 71, 71, 79, 79, 82, 82, 87, 31, 32, 32, 32, - 32, 32, 32, 33, 33, 34, 34, 34, 34, 37, 37, 41, 41, 45, 45, 49, 49, 54, - 54, 60, 60, 65, 65, 72, 72, 75, 75, 79, 31, 32, 32, 32, 32, 32, 32, 33, - 33, 34, 34, 34, 34, 37, 37, 41, 41, 45, 45, 49, 49, 54, 54, 60, 60, 65, - 65, 72, 72, 75, 75, 79, 32, 32, 32, 33, 33, 34, 34, 35, 35, 37, 37, 38, - 38, 40, 40, 43, 43, 46, 46, 50, 50, 54, 54, 58, 58, 63, 63, 70, 70, 72, - 72, 76, 32, 32, 32, 33, 33, 34, 34, 35, 35, 37, 37, 38, 38, 40, 40, 43, - 43, 46, 46, 50, 50, 54, 54, 58, 58, 63, 63, 70, 70, 72, 72, 76, 36, 35, - 35, 34, 34, 36, 36, 38, 38, 42, 42, 48, 48, 50, 50, 53, 53, 56, 56, 60, - 60, 63, 63, 68, 68, 73, 73, 79, 79, 81, 81, 84, 36, 35, 35, 34, 34, 36, - 36, 38, 38, 42, 42, 48, 48, 50, 50, 53, 53, 56, 56, 60, 60, 63, 63, 68, - 68, 73, 73, 79, 79, 81, 81, 84, 44, 42, 42, 41, 41, 42, 42, 42, 42, 48, - 48, 54, 54, 58, 58, 63, 63, 67, 67, 71, 71, 75, 75, 79, 79, 84, 84, 90, - 90, 92, 92, 96, 44, 42, 42, 41, 41, 42, 42, 42, 42, 48, 48, 54, 54, 58, - 58, 63, 63, 67, 67, 71, 71, 75, 75, 79, 79, 84, 84, 90, 90, 92, 92, 96, - 53, 51, 51, 49, 49, 50, 50, 49, 49, 54, 54, 60, 60, 65, 65, 71, 71, 76, - 76, 82, 82, 87, 87, 92, 92, 97, 97, 104, 104, 106, 106, 109, 53, 51, 51, - 49, 49, 50, 50, 49, 49, 54, 54, 60, 60, 65, 65, 71, 71, 76, 76, 82, 82, - 87, 87, 92, 92, 97, 97, 104, 104, 106, 106, 109, 65, 62, 62, 59, 59, 59, - 59, 58, 58, 63, 63, 68, 68, 73, 73, 79, 79, 85, 85, 92, 92, 98, 98, 105, - 105, 111, 111, 118, 118, 121, 121, 124, 65, 62, 62, 59, 59, 59, 59, 58, - 58, 63, 63, 68, 68, 73, 73, 79, 79, 85, 85, 92, 92, 98, 98, 105, 105, - 111, 111, 118, 118, 121, 121, 124, 79, 75, 75, 72, 72, 71, 71, 69, 69, - 73, 73, 78, 78, 84, 84, 90, 90, 96, 96, 103, 103, 110, 110, 118, 118, - 125, 125, 133, 133, 136, 136, 141, 79, 75, 75, 72, 72, 71, 71, 69, 69, - 73, 73, 78, 78, 84, 84, 90, 90, 96, 96, 103, 103, 110, 110, 118, 118, - 125, 125, 133, 133, 136, 136, 141, 87, 82, 82, 78, 78, 77, 77, 75, 75, - 79, 79, 84, 84, 89, 89, 95, 95, 102, 102, 109, 109, 116, 116, 124, 124, - 132, 132, 141, 141, 144, 144, 149, - /* Size 4x16 */ - 31, 36, 53, 79, 32, 35, 51, 75, 32, 34, 49, 72, 32, 36, 50, 71, 33, 38, - 49, 69, 34, 42, 54, 73, 34, 48, 60, 78, 37, 50, 65, 84, 41, 53, 71, 90, - 45, 56, 76, 96, 49, 60, 82, 103, 54, 63, 87, 110, 60, 68, 92, 118, 65, - 73, 97, 125, 72, 79, 104, 133, 75, 81, 106, 136, - /* Size 16x4 */ - 31, 32, 32, 32, 33, 34, 34, 37, 41, 45, 49, 54, 60, 65, 72, 75, 36, 35, - 34, 36, 38, 42, 48, 50, 53, 56, 60, 63, 68, 73, 79, 81, 53, 51, 49, 50, - 49, 54, 60, 65, 71, 76, 82, 87, 92, 97, 104, 106, 79, 75, 72, 71, 69, - 73, 78, 84, 90, 96, 103, 110, 118, 125, 133, 136, - /* Size 8x32 */ - 32, 31, 32, 36, 44, 53, 65, 79, 31, 32, 32, 35, 42, 51, 62, 75, 31, 32, - 32, 35, 42, 51, 62, 75, 31, 32, 33, 34, 41, 49, 59, 72, 31, 32, 33, 34, - 41, 49, 59, 72, 32, 32, 34, 36, 42, 50, 59, 71, 32, 32, 34, 36, 42, 50, - 59, 71, 32, 33, 35, 38, 42, 49, 58, 69, 32, 33, 35, 38, 42, 49, 58, 69, - 34, 34, 37, 42, 48, 54, 63, 73, 34, 34, 37, 42, 48, 54, 63, 73, 36, 34, - 38, 48, 54, 60, 68, 78, 36, 34, 38, 48, 54, 60, 68, 78, 39, 37, 40, 50, - 58, 65, 73, 84, 39, 37, 40, 50, 58, 65, 73, 84, 44, 41, 43, 53, 63, 71, - 79, 90, 44, 41, 43, 53, 63, 71, 79, 90, 48, 45, 46, 56, 67, 76, 85, 96, - 48, 45, 46, 56, 67, 76, 85, 96, 53, 49, 50, 60, 71, 82, 92, 103, 53, 49, - 50, 60, 71, 82, 92, 103, 58, 54, 54, 63, 75, 87, 98, 110, 58, 54, 54, - 63, 75, 87, 98, 110, 65, 60, 58, 68, 79, 92, 105, 118, 65, 60, 58, 68, - 79, 92, 105, 118, 71, 65, 63, 73, 84, 97, 111, 125, 71, 65, 63, 73, 84, - 97, 111, 125, 79, 72, 70, 79, 90, 104, 118, 133, 79, 72, 70, 79, 90, - 104, 118, 133, 82, 75, 72, 81, 92, 106, 121, 136, 82, 75, 72, 81, 92, - 106, 121, 136, 87, 79, 76, 84, 96, 109, 124, 141, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 32, 32, 32, 32, 34, 34, 36, 36, 39, 39, 44, 44, 48, - 48, 53, 53, 58, 58, 65, 65, 71, 71, 79, 79, 82, 82, 87, 31, 32, 32, 32, - 32, 32, 32, 33, 33, 34, 34, 34, 34, 37, 37, 41, 41, 45, 45, 49, 49, 54, - 54, 60, 60, 65, 65, 72, 72, 75, 75, 79, 32, 32, 32, 33, 33, 34, 34, 35, - 35, 37, 37, 38, 38, 40, 40, 43, 43, 46, 46, 50, 50, 54, 54, 58, 58, 63, - 63, 70, 70, 72, 72, 76, 36, 35, 35, 34, 34, 36, 36, 38, 38, 42, 42, 48, - 48, 50, 50, 53, 53, 56, 56, 60, 60, 63, 63, 68, 68, 73, 73, 79, 79, 81, - 81, 84, 44, 42, 42, 41, 41, 42, 42, 42, 42, 48, 48, 54, 54, 58, 58, 63, - 63, 67, 67, 71, 71, 75, 75, 79, 79, 84, 84, 90, 90, 92, 92, 96, 53, 51, - 51, 49, 49, 50, 50, 49, 49, 54, 54, 60, 60, 65, 65, 71, 71, 76, 76, 82, - 82, 87, 87, 92, 92, 97, 97, 104, 104, 106, 106, 109, 65, 62, 62, 59, 59, - 59, 59, 58, 58, 63, 63, 68, 68, 73, 73, 79, 79, 85, 85, 92, 92, 98, 98, - 105, 105, 111, 111, 118, 118, 121, 121, 124, 79, 75, 75, 72, 72, 71, 71, - 69, 69, 73, 73, 78, 78, 84, 84, 90, 90, 96, 96, 103, 103, 110, 110, 118, - 118, 125, 125, 133, 133, 136, 136, 141 }, - { /* Chroma */ - /* Size 4x4 */ - 32, 46, 47, 57, 46, 53, 54, 60, 47, 54, 66, 75, 57, 60, 75, 89, - /* Size 8x8 */ - 31, 34, 42, 47, 48, 52, 57, 61, 34, 39, 45, 46, 46, 49, 53, 57, 42, 45, - 48, 49, 50, 52, 55, 58, 47, 46, 49, 54, 56, 58, 61, 64, 48, 46, 50, 56, - 61, 65, 68, 71, 52, 49, 52, 58, 65, 71, 75, 79, 57, 53, 55, 61, 68, 75, - 82, 86, 61, 57, 58, 64, 71, 79, 86, 91, - /* Size 16x16 */ - 32, 31, 30, 33, 36, 41, 49, 48, 49, 50, 52, 54, 57, 60, 63, 65, 31, 31, - 31, 34, 38, 42, 47, 47, 47, 48, 50, 52, 54, 57, 60, 61, 30, 31, 32, 35, - 40, 42, 46, 45, 45, 46, 47, 49, 52, 54, 57, 58, 33, 34, 35, 39, 43, 45, - 47, 46, 45, 46, 47, 49, 51, 53, 56, 57, 36, 38, 40, 43, 47, 47, 48, 46, - 45, 46, 47, 48, 50, 52, 54, 55, 41, 42, 42, 45, 47, 48, 50, 49, 49, 50, - 50, 52, 53, 55, 57, 58, 49, 47, 46, 47, 48, 50, 53, 53, 53, 54, 54, 55, - 56, 58, 60, 61, 48, 47, 45, 46, 46, 49, 53, 54, 55, 56, 57, 58, 60, 61, - 63, 64, 49, 47, 45, 45, 45, 49, 53, 55, 58, 60, 61, 62, 63, 65, 67, 68, - 50, 48, 46, 46, 46, 50, 54, 56, 60, 61, 63, 65, 67, 68, 71, 71, 52, 50, - 47, 47, 47, 50, 54, 57, 61, 63, 66, 68, 70, 72, 75, 75, 54, 52, 49, 49, - 48, 52, 55, 58, 62, 65, 68, 71, 73, 75, 78, 79, 57, 54, 52, 51, 50, 53, - 56, 60, 63, 67, 70, 73, 76, 79, 82, 83, 60, 57, 54, 53, 52, 55, 58, 61, - 65, 68, 72, 75, 79, 82, 85, 86, 63, 60, 57, 56, 54, 57, 60, 63, 67, 71, - 75, 78, 82, 85, 89, 90, 65, 61, 58, 57, 55, 58, 61, 64, 68, 71, 75, 79, - 83, 86, 90, 91, - /* Size 32x32 */ - 32, 31, 31, 30, 30, 33, 33, 36, 36, 41, 41, 49, 49, 48, 48, 49, 49, 50, - 50, 52, 52, 54, 54, 57, 57, 60, 60, 63, 63, 65, 65, 67, 31, 31, 31, 31, - 31, 34, 34, 38, 38, 42, 42, 47, 47, 47, 47, 47, 47, 48, 48, 50, 50, 52, - 52, 54, 54, 57, 57, 60, 60, 61, 61, 63, 31, 31, 31, 31, 31, 34, 34, 38, - 38, 42, 42, 47, 47, 47, 47, 47, 47, 48, 48, 50, 50, 52, 52, 54, 54, 57, - 57, 60, 60, 61, 61, 63, 30, 31, 31, 32, 32, 35, 35, 40, 40, 42, 42, 46, - 46, 45, 45, 45, 45, 46, 46, 47, 47, 49, 49, 52, 52, 54, 54, 57, 57, 58, - 58, 60, 30, 31, 31, 32, 32, 35, 35, 40, 40, 42, 42, 46, 46, 45, 45, 45, - 45, 46, 46, 47, 47, 49, 49, 52, 52, 54, 54, 57, 57, 58, 58, 60, 33, 34, - 34, 35, 35, 39, 39, 43, 43, 45, 45, 47, 47, 46, 46, 45, 45, 46, 46, 47, - 47, 49, 49, 51, 51, 53, 53, 56, 56, 57, 57, 59, 33, 34, 34, 35, 35, 39, - 39, 43, 43, 45, 45, 47, 47, 46, 46, 45, 45, 46, 46, 47, 47, 49, 49, 51, - 51, 53, 53, 56, 56, 57, 57, 59, 36, 38, 38, 40, 40, 43, 43, 47, 47, 47, - 47, 48, 48, 46, 46, 45, 45, 46, 46, 47, 47, 48, 48, 50, 50, 52, 52, 54, - 54, 55, 55, 57, 36, 38, 38, 40, 40, 43, 43, 47, 47, 47, 47, 48, 48, 46, - 46, 45, 45, 46, 46, 47, 47, 48, 48, 50, 50, 52, 52, 54, 54, 55, 55, 57, - 41, 42, 42, 42, 42, 45, 45, 47, 47, 48, 48, 50, 50, 49, 49, 49, 49, 50, - 50, 50, 50, 52, 52, 53, 53, 55, 55, 57, 57, 58, 58, 60, 41, 42, 42, 42, - 42, 45, 45, 47, 47, 48, 48, 50, 50, 49, 49, 49, 49, 50, 50, 50, 50, 52, - 52, 53, 53, 55, 55, 57, 57, 58, 58, 60, 49, 47, 47, 46, 46, 47, 47, 48, - 48, 50, 50, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 56, 56, 58, - 58, 60, 60, 61, 61, 62, 49, 47, 47, 46, 46, 47, 47, 48, 48, 50, 50, 53, - 53, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 56, 56, 58, 58, 60, 60, 61, - 61, 62, 48, 47, 47, 45, 45, 46, 46, 46, 46, 49, 49, 53, 53, 54, 54, 55, - 55, 56, 56, 57, 57, 58, 58, 60, 60, 61, 61, 63, 63, 64, 64, 66, 48, 47, - 47, 45, 45, 46, 46, 46, 46, 49, 49, 53, 53, 54, 54, 55, 55, 56, 56, 57, - 57, 58, 58, 60, 60, 61, 61, 63, 63, 64, 64, 66, 49, 47, 47, 45, 45, 45, - 45, 45, 45, 49, 49, 53, 53, 55, 55, 58, 58, 60, 60, 61, 61, 62, 62, 63, - 63, 65, 65, 67, 67, 68, 68, 69, 49, 47, 47, 45, 45, 45, 45, 45, 45, 49, - 49, 53, 53, 55, 55, 58, 58, 60, 60, 61, 61, 62, 62, 63, 63, 65, 65, 67, - 67, 68, 68, 69, 50, 48, 48, 46, 46, 46, 46, 46, 46, 50, 50, 54, 54, 56, - 56, 60, 60, 61, 61, 63, 63, 65, 65, 67, 67, 68, 68, 71, 71, 71, 71, 72, - 50, 48, 48, 46, 46, 46, 46, 46, 46, 50, 50, 54, 54, 56, 56, 60, 60, 61, - 61, 63, 63, 65, 65, 67, 67, 68, 68, 71, 71, 71, 71, 72, 52, 50, 50, 47, - 47, 47, 47, 47, 47, 50, 50, 54, 54, 57, 57, 61, 61, 63, 63, 66, 66, 68, - 68, 70, 70, 72, 72, 75, 75, 75, 75, 76, 52, 50, 50, 47, 47, 47, 47, 47, - 47, 50, 50, 54, 54, 57, 57, 61, 61, 63, 63, 66, 66, 68, 68, 70, 70, 72, - 72, 75, 75, 75, 75, 76, 54, 52, 52, 49, 49, 49, 49, 48, 48, 52, 52, 55, - 55, 58, 58, 62, 62, 65, 65, 68, 68, 71, 71, 73, 73, 75, 75, 78, 78, 79, - 79, 80, 54, 52, 52, 49, 49, 49, 49, 48, 48, 52, 52, 55, 55, 58, 58, 62, - 62, 65, 65, 68, 68, 71, 71, 73, 73, 75, 75, 78, 78, 79, 79, 80, 57, 54, - 54, 52, 52, 51, 51, 50, 50, 53, 53, 56, 56, 60, 60, 63, 63, 67, 67, 70, - 70, 73, 73, 76, 76, 79, 79, 82, 82, 83, 83, 84, 57, 54, 54, 52, 52, 51, - 51, 50, 50, 53, 53, 56, 56, 60, 60, 63, 63, 67, 67, 70, 70, 73, 73, 76, - 76, 79, 79, 82, 82, 83, 83, 84, 60, 57, 57, 54, 54, 53, 53, 52, 52, 55, - 55, 58, 58, 61, 61, 65, 65, 68, 68, 72, 72, 75, 75, 79, 79, 82, 82, 85, - 85, 86, 86, 88, 60, 57, 57, 54, 54, 53, 53, 52, 52, 55, 55, 58, 58, 61, - 61, 65, 65, 68, 68, 72, 72, 75, 75, 79, 79, 82, 82, 85, 85, 86, 86, 88, - 63, 60, 60, 57, 57, 56, 56, 54, 54, 57, 57, 60, 60, 63, 63, 67, 67, 71, - 71, 75, 75, 78, 78, 82, 82, 85, 85, 89, 89, 90, 90, 92, 63, 60, 60, 57, - 57, 56, 56, 54, 54, 57, 57, 60, 60, 63, 63, 67, 67, 71, 71, 75, 75, 78, - 78, 82, 82, 85, 85, 89, 89, 90, 90, 92, 65, 61, 61, 58, 58, 57, 57, 55, - 55, 58, 58, 61, 61, 64, 64, 68, 68, 71, 71, 75, 75, 79, 79, 83, 83, 86, - 86, 90, 90, 91, 91, 93, 65, 61, 61, 58, 58, 57, 57, 55, 55, 58, 58, 61, - 61, 64, 64, 68, 68, 71, 71, 75, 75, 79, 79, 83, 83, 86, 86, 90, 90, 91, - 91, 93, 67, 63, 63, 60, 60, 59, 59, 57, 57, 60, 60, 62, 62, 66, 66, 69, - 69, 72, 72, 76, 76, 80, 80, 84, 84, 88, 88, 92, 92, 93, 93, 95, - /* Size 4x8 */ - 31, 47, 50, 60, 36, 47, 47, 56, 43, 50, 50, 57, 46, 53, 57, 64, 46, 54, - 64, 71, 50, 55, 68, 78, 54, 58, 72, 85, 59, 61, 75, 90, - /* Size 8x4 */ - 31, 36, 43, 46, 46, 50, 54, 59, 47, 47, 50, 53, 54, 55, 58, 61, 50, 47, - 50, 57, 64, 68, 72, 75, 60, 56, 57, 64, 71, 78, 85, 90, - /* Size 8x16 */ - 32, 31, 37, 48, 49, 52, 57, 63, 31, 31, 38, 47, 47, 50, 54, 60, 30, 32, - 40, 46, 45, 48, 52, 57, 33, 36, 43, 47, 46, 47, 51, 56, 37, 40, 47, 47, - 45, 47, 50, 54, 42, 43, 47, 50, 49, 50, 53, 57, 49, 46, 48, 53, 53, 54, - 57, 60, 48, 46, 47, 53, 56, 57, 60, 64, 49, 45, 46, 53, 58, 61, 64, 67, - 50, 46, 46, 54, 59, 64, 67, 71, 52, 48, 47, 54, 61, 66, 71, 75, 54, 50, - 49, 55, 62, 68, 73, 78, 57, 52, 50, 56, 64, 70, 76, 82, 60, 54, 52, 58, - 65, 72, 79, 85, 63, 57, 55, 60, 67, 75, 82, 89, 64, 59, 56, 61, 68, 75, - 83, 90, - /* Size 16x8 */ - 32, 31, 30, 33, 37, 42, 49, 48, 49, 50, 52, 54, 57, 60, 63, 64, 31, 31, - 32, 36, 40, 43, 46, 46, 45, 46, 48, 50, 52, 54, 57, 59, 37, 38, 40, 43, - 47, 47, 48, 47, 46, 46, 47, 49, 50, 52, 55, 56, 48, 47, 46, 47, 47, 50, - 53, 53, 53, 54, 54, 55, 56, 58, 60, 61, 49, 47, 45, 46, 45, 49, 53, 56, - 58, 59, 61, 62, 64, 65, 67, 68, 52, 50, 48, 47, 47, 50, 54, 57, 61, 64, - 66, 68, 70, 72, 75, 75, 57, 54, 52, 51, 50, 53, 57, 60, 64, 67, 71, 73, - 76, 79, 82, 83, 63, 60, 57, 56, 54, 57, 60, 64, 67, 71, 75, 78, 82, 85, - 89, 90, - /* Size 16x32 */ - 32, 31, 31, 37, 37, 48, 48, 49, 49, 52, 52, 57, 57, 63, 63, 66, 31, 31, - 31, 38, 38, 47, 47, 47, 47, 50, 50, 54, 54, 60, 60, 63, 31, 31, 31, 38, - 38, 47, 47, 47, 47, 50, 50, 54, 54, 60, 60, 63, 30, 32, 32, 40, 40, 46, - 46, 45, 45, 48, 48, 52, 52, 57, 57, 60, 30, 32, 32, 40, 40, 46, 46, 45, - 45, 48, 48, 52, 52, 57, 57, 60, 33, 36, 36, 43, 43, 47, 47, 46, 46, 47, - 47, 51, 51, 56, 56, 59, 33, 36, 36, 43, 43, 47, 47, 46, 46, 47, 47, 51, - 51, 56, 56, 59, 37, 40, 40, 47, 47, 47, 47, 45, 45, 47, 47, 50, 50, 54, - 54, 57, 37, 40, 40, 47, 47, 47, 47, 45, 45, 47, 47, 50, 50, 54, 54, 57, - 42, 43, 43, 47, 47, 50, 50, 49, 49, 50, 50, 53, 53, 57, 57, 60, 42, 43, - 43, 47, 47, 50, 50, 49, 49, 50, 50, 53, 53, 57, 57, 60, 49, 46, 46, 48, - 48, 53, 53, 53, 53, 54, 54, 57, 57, 60, 60, 62, 49, 46, 46, 48, 48, 53, - 53, 53, 53, 54, 54, 57, 57, 60, 60, 62, 48, 46, 46, 47, 47, 53, 53, 56, - 56, 57, 57, 60, 60, 64, 64, 66, 48, 46, 46, 47, 47, 53, 53, 56, 56, 57, - 57, 60, 60, 64, 64, 66, 49, 45, 45, 46, 46, 53, 53, 58, 58, 61, 61, 64, - 64, 67, 67, 69, 49, 45, 45, 46, 46, 53, 53, 58, 58, 61, 61, 64, 64, 67, - 67, 69, 50, 46, 46, 46, 46, 54, 54, 59, 59, 64, 64, 67, 67, 71, 71, 73, - 50, 46, 46, 46, 46, 54, 54, 59, 59, 64, 64, 67, 67, 71, 71, 73, 52, 48, - 48, 47, 47, 54, 54, 61, 61, 66, 66, 71, 71, 75, 75, 77, 52, 48, 48, 47, - 47, 54, 54, 61, 61, 66, 66, 71, 71, 75, 75, 77, 54, 50, 50, 49, 49, 55, - 55, 62, 62, 68, 68, 73, 73, 78, 78, 80, 54, 50, 50, 49, 49, 55, 55, 62, - 62, 68, 68, 73, 73, 78, 78, 80, 57, 52, 52, 50, 50, 56, 56, 64, 64, 70, - 70, 76, 76, 82, 82, 84, 57, 52, 52, 50, 50, 56, 56, 64, 64, 70, 70, 76, - 76, 82, 82, 84, 60, 54, 54, 52, 52, 58, 58, 65, 65, 72, 72, 79, 79, 85, - 85, 88, 60, 54, 54, 52, 52, 58, 58, 65, 65, 72, 72, 79, 79, 85, 85, 88, - 63, 57, 57, 55, 55, 60, 60, 67, 67, 75, 75, 82, 82, 89, 89, 92, 63, 57, - 57, 55, 55, 60, 60, 67, 67, 75, 75, 82, 82, 89, 89, 92, 64, 59, 59, 56, - 56, 61, 61, 68, 68, 75, 75, 83, 83, 90, 90, 93, 64, 59, 59, 56, 56, 61, - 61, 68, 68, 75, 75, 83, 83, 90, 90, 93, 66, 60, 60, 57, 57, 63, 63, 69, - 69, 77, 77, 84, 84, 92, 92, 95, - /* Size 32x16 */ - 32, 31, 31, 30, 30, 33, 33, 37, 37, 42, 42, 49, 49, 48, 48, 49, 49, 50, - 50, 52, 52, 54, 54, 57, 57, 60, 60, 63, 63, 64, 64, 66, 31, 31, 31, 32, - 32, 36, 36, 40, 40, 43, 43, 46, 46, 46, 46, 45, 45, 46, 46, 48, 48, 50, - 50, 52, 52, 54, 54, 57, 57, 59, 59, 60, 31, 31, 31, 32, 32, 36, 36, 40, - 40, 43, 43, 46, 46, 46, 46, 45, 45, 46, 46, 48, 48, 50, 50, 52, 52, 54, - 54, 57, 57, 59, 59, 60, 37, 38, 38, 40, 40, 43, 43, 47, 47, 47, 47, 48, - 48, 47, 47, 46, 46, 46, 46, 47, 47, 49, 49, 50, 50, 52, 52, 55, 55, 56, - 56, 57, 37, 38, 38, 40, 40, 43, 43, 47, 47, 47, 47, 48, 48, 47, 47, 46, - 46, 46, 46, 47, 47, 49, 49, 50, 50, 52, 52, 55, 55, 56, 56, 57, 48, 47, - 47, 46, 46, 47, 47, 47, 47, 50, 50, 53, 53, 53, 53, 53, 53, 54, 54, 54, - 54, 55, 55, 56, 56, 58, 58, 60, 60, 61, 61, 63, 48, 47, 47, 46, 46, 47, - 47, 47, 47, 50, 50, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 56, - 56, 58, 58, 60, 60, 61, 61, 63, 49, 47, 47, 45, 45, 46, 46, 45, 45, 49, - 49, 53, 53, 56, 56, 58, 58, 59, 59, 61, 61, 62, 62, 64, 64, 65, 65, 67, - 67, 68, 68, 69, 49, 47, 47, 45, 45, 46, 46, 45, 45, 49, 49, 53, 53, 56, - 56, 58, 58, 59, 59, 61, 61, 62, 62, 64, 64, 65, 65, 67, 67, 68, 68, 69, - 52, 50, 50, 48, 48, 47, 47, 47, 47, 50, 50, 54, 54, 57, 57, 61, 61, 64, - 64, 66, 66, 68, 68, 70, 70, 72, 72, 75, 75, 75, 75, 77, 52, 50, 50, 48, - 48, 47, 47, 47, 47, 50, 50, 54, 54, 57, 57, 61, 61, 64, 64, 66, 66, 68, - 68, 70, 70, 72, 72, 75, 75, 75, 75, 77, 57, 54, 54, 52, 52, 51, 51, 50, - 50, 53, 53, 57, 57, 60, 60, 64, 64, 67, 67, 71, 71, 73, 73, 76, 76, 79, - 79, 82, 82, 83, 83, 84, 57, 54, 54, 52, 52, 51, 51, 50, 50, 53, 53, 57, - 57, 60, 60, 64, 64, 67, 67, 71, 71, 73, 73, 76, 76, 79, 79, 82, 82, 83, - 83, 84, 63, 60, 60, 57, 57, 56, 56, 54, 54, 57, 57, 60, 60, 64, 64, 67, - 67, 71, 71, 75, 75, 78, 78, 82, 82, 85, 85, 89, 89, 90, 90, 92, 63, 60, - 60, 57, 57, 56, 56, 54, 54, 57, 57, 60, 60, 64, 64, 67, 67, 71, 71, 75, - 75, 78, 78, 82, 82, 85, 85, 89, 89, 90, 90, 92, 66, 63, 63, 60, 60, 59, - 59, 57, 57, 60, 60, 62, 62, 66, 66, 69, 69, 73, 73, 77, 77, 80, 80, 84, - 84, 88, 88, 92, 92, 93, 93, 95, - /* Size 4x16 */ - 31, 48, 52, 63, 31, 47, 50, 60, 32, 46, 48, 57, 36, 47, 47, 56, 40, 47, - 47, 54, 43, 50, 50, 57, 46, 53, 54, 60, 46, 53, 57, 64, 45, 53, 61, 67, - 46, 54, 64, 71, 48, 54, 66, 75, 50, 55, 68, 78, 52, 56, 70, 82, 54, 58, - 72, 85, 57, 60, 75, 89, 59, 61, 75, 90, - /* Size 16x4 */ - 31, 31, 32, 36, 40, 43, 46, 46, 45, 46, 48, 50, 52, 54, 57, 59, 48, 47, - 46, 47, 47, 50, 53, 53, 53, 54, 54, 55, 56, 58, 60, 61, 52, 50, 48, 47, - 47, 50, 54, 57, 61, 64, 66, 68, 70, 72, 75, 75, 63, 60, 57, 56, 54, 57, - 60, 64, 67, 71, 75, 78, 82, 85, 89, 90, - /* Size 8x32 */ - 32, 31, 37, 48, 49, 52, 57, 63, 31, 31, 38, 47, 47, 50, 54, 60, 31, 31, - 38, 47, 47, 50, 54, 60, 30, 32, 40, 46, 45, 48, 52, 57, 30, 32, 40, 46, - 45, 48, 52, 57, 33, 36, 43, 47, 46, 47, 51, 56, 33, 36, 43, 47, 46, 47, - 51, 56, 37, 40, 47, 47, 45, 47, 50, 54, 37, 40, 47, 47, 45, 47, 50, 54, - 42, 43, 47, 50, 49, 50, 53, 57, 42, 43, 47, 50, 49, 50, 53, 57, 49, 46, - 48, 53, 53, 54, 57, 60, 49, 46, 48, 53, 53, 54, 57, 60, 48, 46, 47, 53, - 56, 57, 60, 64, 48, 46, 47, 53, 56, 57, 60, 64, 49, 45, 46, 53, 58, 61, - 64, 67, 49, 45, 46, 53, 58, 61, 64, 67, 50, 46, 46, 54, 59, 64, 67, 71, - 50, 46, 46, 54, 59, 64, 67, 71, 52, 48, 47, 54, 61, 66, 71, 75, 52, 48, - 47, 54, 61, 66, 71, 75, 54, 50, 49, 55, 62, 68, 73, 78, 54, 50, 49, 55, - 62, 68, 73, 78, 57, 52, 50, 56, 64, 70, 76, 82, 57, 52, 50, 56, 64, 70, - 76, 82, 60, 54, 52, 58, 65, 72, 79, 85, 60, 54, 52, 58, 65, 72, 79, 85, - 63, 57, 55, 60, 67, 75, 82, 89, 63, 57, 55, 60, 67, 75, 82, 89, 64, 59, - 56, 61, 68, 75, 83, 90, 64, 59, 56, 61, 68, 75, 83, 90, 66, 60, 57, 63, - 69, 77, 84, 92, - /* Size 32x8 */ - 32, 31, 31, 30, 30, 33, 33, 37, 37, 42, 42, 49, 49, 48, 48, 49, 49, 50, - 50, 52, 52, 54, 54, 57, 57, 60, 60, 63, 63, 64, 64, 66, 31, 31, 31, 32, - 32, 36, 36, 40, 40, 43, 43, 46, 46, 46, 46, 45, 45, 46, 46, 48, 48, 50, - 50, 52, 52, 54, 54, 57, 57, 59, 59, 60, 37, 38, 38, 40, 40, 43, 43, 47, - 47, 47, 47, 48, 48, 47, 47, 46, 46, 46, 46, 47, 47, 49, 49, 50, 50, 52, - 52, 55, 55, 56, 56, 57, 48, 47, 47, 46, 46, 47, 47, 47, 47, 50, 50, 53, - 53, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 56, 56, 58, 58, 60, 60, 61, - 61, 63, 49, 47, 47, 45, 45, 46, 46, 45, 45, 49, 49, 53, 53, 56, 56, 58, - 58, 59, 59, 61, 61, 62, 62, 64, 64, 65, 65, 67, 67, 68, 68, 69, 52, 50, - 50, 48, 48, 47, 47, 47, 47, 50, 50, 54, 54, 57, 57, 61, 61, 64, 64, 66, - 66, 68, 68, 70, 70, 72, 72, 75, 75, 75, 75, 77, 57, 54, 54, 52, 52, 51, - 51, 50, 50, 53, 53, 57, 57, 60, 60, 64, 64, 67, 67, 71, 71, 73, 73, 76, - 76, 79, 79, 82, 82, 83, 83, 84, 63, 60, 60, 57, 57, 56, 56, 54, 54, 57, - 57, 60, 60, 64, 64, 67, 67, 71, 71, 75, 75, 78, 78, 82, 82, 85, 85, 89, - 89, 90, 90, 92 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 33, 45, 62, 33, 39, 51, 64, 45, 51, 71, 87, 62, 64, 87, 108, - /* Size 8x8 */ - 31, 32, 32, 35, 42, 51, 59, 69, 32, 32, 33, 35, 41, 49, 56, 65, 32, 33, - 35, 38, 43, 49, 56, 64, 35, 35, 38, 48, 54, 59, 66, 73, 42, 41, 43, 54, - 63, 71, 77, 85, 51, 49, 49, 59, 71, 81, 89, 97, 59, 56, 56, 66, 77, 89, - 98, 108, 69, 65, 64, 73, 85, 97, 108, 119, - /* Size 16x16 */ - 32, 31, 31, 31, 32, 34, 35, 38, 41, 45, 48, 54, 59, 65, 71, 80, 31, 32, - 32, 32, 32, 34, 35, 37, 40, 43, 46, 51, 56, 62, 68, 76, 31, 32, 32, 32, - 32, 33, 34, 36, 38, 41, 44, 49, 54, 59, 65, 72, 31, 32, 32, 33, 34, 35, - 36, 38, 40, 42, 45, 50, 54, 59, 64, 71, 32, 32, 32, 34, 35, 37, 38, 39, - 41, 43, 46, 49, 53, 58, 63, 69, 34, 34, 33, 35, 37, 39, 42, 44, 46, 48, - 51, 54, 58, 63, 68, 74, 35, 35, 34, 36, 38, 42, 46, 48, 50, 53, 55, 59, - 62, 67, 72, 78, 38, 37, 36, 38, 39, 44, 48, 51, 54, 57, 59, 63, 67, 71, - 76, 82, 41, 40, 38, 40, 41, 46, 50, 54, 57, 60, 63, 67, 71, 75, 80, 86, - 45, 43, 41, 42, 43, 48, 53, 57, 60, 65, 68, 72, 76, 81, 85, 91, 48, 46, - 44, 45, 46, 51, 55, 59, 63, 68, 71, 76, 80, 85, 90, 96, 54, 51, 49, 50, - 49, 54, 59, 63, 67, 72, 76, 82, 87, 92, 97, 104, 59, 56, 54, 54, 53, 58, - 62, 67, 71, 76, 80, 87, 92, 98, 103, 110, 65, 62, 59, 59, 58, 63, 67, - 71, 75, 81, 85, 92, 98, 105, 111, 118, 71, 68, 65, 64, 63, 68, 72, 76, - 80, 85, 90, 97, 103, 111, 117, 125, 80, 76, 72, 71, 69, 74, 78, 82, 86, - 91, 96, 104, 110, 118, 125, 134, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 34, 34, 35, 36, 38, 39, 41, 44, - 45, 48, 48, 53, 54, 57, 59, 62, 65, 67, 71, 72, 80, 80, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 34, 34, 35, 35, 37, 38, 40, 42, 43, 46, 46, 51, - 52, 55, 56, 59, 62, 64, 68, 69, 76, 76, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 34, 34, 35, 35, 37, 38, 40, 42, 43, 46, 46, 51, 51, 55, 56, 59, - 62, 64, 68, 69, 76, 76, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 34, 34, 36, 38, 39, 41, 42, 45, 45, 49, 50, 53, 54, 57, 60, 62, 66, 66, - 73, 73, 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 36, 37, - 38, 41, 41, 44, 44, 49, 49, 52, 54, 56, 59, 61, 65, 65, 72, 72, 31, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 35, 35, 37, 38, 39, 41, 42, 45, - 45, 49, 49, 52, 54, 56, 59, 61, 64, 65, 72, 72, 31, 32, 32, 32, 32, 33, - 33, 33, 34, 34, 35, 35, 36, 36, 38, 39, 40, 42, 42, 45, 45, 49, 50, 52, - 54, 56, 59, 60, 64, 65, 71, 71, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, - 35, 35, 36, 37, 38, 39, 40, 42, 43, 45, 45, 49, 49, 52, 54, 56, 59, 60, - 64, 64, 70, 70, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 37, 37, 38, 38, - 39, 40, 41, 42, 43, 46, 46, 49, 49, 52, 53, 55, 58, 59, 63, 63, 69, 69, - 32, 32, 32, 32, 33, 33, 34, 34, 35, 35, 37, 37, 38, 38, 40, 41, 41, 43, - 43, 46, 46, 49, 50, 52, 54, 56, 58, 60, 63, 64, 70, 70, 34, 34, 34, 33, - 33, 34, 35, 35, 37, 37, 39, 39, 42, 42, 44, 45, 46, 47, 48, 51, 51, 54, - 54, 57, 58, 60, 63, 64, 68, 68, 74, 74, 34, 34, 34, 33, 33, 34, 35, 35, - 37, 37, 39, 39, 42, 42, 44, 45, 46, 47, 48, 51, 51, 54, 54, 57, 58, 60, - 63, 64, 68, 68, 74, 74, 35, 35, 35, 34, 34, 35, 36, 36, 38, 38, 42, 42, - 46, 47, 48, 49, 50, 52, 53, 55, 55, 58, 59, 61, 62, 64, 67, 68, 72, 72, - 78, 78, 36, 35, 35, 34, 34, 35, 36, 37, 38, 38, 42, 42, 47, 48, 50, 50, - 52, 54, 54, 57, 57, 59, 60, 62, 64, 66, 68, 69, 73, 73, 79, 79, 38, 37, - 37, 36, 36, 37, 38, 38, 39, 40, 44, 44, 48, 50, 51, 52, 54, 56, 57, 59, - 59, 62, 63, 65, 67, 69, 71, 72, 76, 76, 82, 82, 39, 38, 38, 38, 37, 38, - 39, 39, 40, 41, 45, 45, 49, 50, 52, 54, 55, 58, 58, 61, 61, 64, 65, 67, - 69, 71, 73, 74, 78, 78, 84, 84, 41, 40, 40, 39, 38, 39, 40, 40, 41, 41, - 46, 46, 50, 52, 54, 55, 57, 60, 60, 63, 63, 67, 67, 70, 71, 73, 75, 77, - 80, 81, 86, 86, 44, 42, 42, 41, 41, 41, 42, 42, 42, 43, 47, 47, 52, 54, - 56, 58, 60, 63, 64, 67, 67, 71, 71, 74, 75, 77, 79, 81, 84, 85, 90, 90, - 45, 43, 43, 42, 41, 42, 42, 43, 43, 43, 48, 48, 53, 54, 57, 58, 60, 64, - 65, 68, 68, 72, 72, 75, 76, 78, 81, 82, 85, 86, 91, 91, 48, 46, 46, 45, - 44, 45, 45, 45, 46, 46, 51, 51, 55, 57, 59, 61, 63, 67, 68, 71, 71, 75, - 76, 79, 80, 83, 85, 87, 90, 91, 96, 96, 48, 46, 46, 45, 44, 45, 45, 45, - 46, 46, 51, 51, 55, 57, 59, 61, 63, 67, 68, 71, 71, 75, 76, 79, 80, 83, - 85, 87, 90, 91, 96, 96, 53, 51, 51, 49, 49, 49, 49, 49, 49, 49, 54, 54, - 58, 59, 62, 64, 67, 71, 72, 75, 75, 81, 81, 85, 86, 89, 91, 93, 97, 97, - 103, 103, 54, 52, 51, 50, 49, 49, 50, 49, 49, 50, 54, 54, 59, 60, 63, - 65, 67, 71, 72, 76, 76, 81, 82, 85, 87, 89, 92, 94, 97, 98, 104, 104, - 57, 55, 55, 53, 52, 52, 52, 52, 52, 52, 57, 57, 61, 62, 65, 67, 70, 74, - 75, 79, 79, 85, 85, 89, 90, 93, 96, 98, 102, 102, 108, 108, 59, 56, 56, - 54, 54, 54, 54, 54, 53, 54, 58, 58, 62, 64, 67, 69, 71, 75, 76, 80, 80, - 86, 87, 90, 92, 95, 98, 99, 103, 104, 110, 110, 62, 59, 59, 57, 56, 56, - 56, 56, 55, 56, 60, 60, 64, 66, 69, 71, 73, 77, 78, 83, 83, 89, 89, 93, - 95, 98, 101, 103, 107, 108, 114, 114, 65, 62, 62, 60, 59, 59, 59, 59, - 58, 58, 63, 63, 67, 68, 71, 73, 75, 79, 81, 85, 85, 91, 92, 96, 98, 101, - 105, 106, 111, 111, 118, 118, 67, 64, 64, 62, 61, 61, 60, 60, 59, 60, - 64, 64, 68, 69, 72, 74, 77, 81, 82, 87, 87, 93, 94, 98, 99, 103, 106, - 108, 113, 113, 120, 120, 71, 68, 68, 66, 65, 64, 64, 64, 63, 63, 68, 68, - 72, 73, 76, 78, 80, 84, 85, 90, 90, 97, 97, 102, 103, 107, 111, 113, - 117, 118, 125, 125, 72, 69, 69, 66, 65, 65, 65, 64, 63, 64, 68, 68, 72, - 73, 76, 78, 81, 85, 86, 91, 91, 97, 98, 102, 104, 108, 111, 113, 118, - 119, 126, 126, 80, 76, 76, 73, 72, 72, 71, 70, 69, 70, 74, 74, 78, 79, - 82, 84, 86, 90, 91, 96, 96, 103, 104, 108, 110, 114, 118, 120, 125, 126, - 134, 134, 80, 76, 76, 73, 72, 72, 71, 70, 69, 70, 74, 74, 78, 79, 82, - 84, 86, 90, 91, 96, 96, 103, 104, 108, 110, 114, 118, 120, 125, 126, - 134, 134, - /* Size 4x8 */ - 32, 34, 43, 62, 32, 34, 42, 59, 33, 37, 44, 58, 35, 43, 54, 68, 41, 48, - 64, 79, 49, 54, 71, 91, 57, 60, 78, 101, 66, 68, 86, 111, - /* Size 8x4 */ - 32, 32, 33, 35, 41, 49, 57, 66, 34, 34, 37, 43, 48, 54, 60, 68, 43, 42, - 44, 54, 64, 71, 78, 86, 62, 59, 58, 68, 79, 91, 101, 111, - /* Size 8x16 */ - 32, 31, 32, 36, 44, 53, 62, 73, 31, 32, 32, 35, 42, 51, 59, 69, 31, 32, - 33, 34, 41, 49, 57, 66, 32, 32, 34, 36, 42, 50, 57, 65, 32, 33, 35, 38, - 42, 49, 56, 64, 34, 34, 37, 42, 48, 54, 61, 69, 35, 34, 38, 47, 52, 59, - 65, 73, 38, 36, 40, 49, 56, 63, 69, 77, 41, 39, 41, 51, 60, 67, 74, 81, - 44, 42, 43, 54, 64, 72, 79, 86, 48, 45, 46, 56, 67, 76, 83, 91, 53, 49, - 50, 60, 71, 82, 90, 99, 58, 54, 54, 63, 75, 87, 95, 105, 65, 60, 58, 68, - 79, 92, 102, 112, 71, 65, 63, 73, 84, 97, 108, 119, 79, 72, 70, 79, 90, - 104, 115, 127, - /* Size 16x8 */ - 32, 31, 31, 32, 32, 34, 35, 38, 41, 44, 48, 53, 58, 65, 71, 79, 31, 32, - 32, 32, 33, 34, 34, 36, 39, 42, 45, 49, 54, 60, 65, 72, 32, 32, 33, 34, - 35, 37, 38, 40, 41, 43, 46, 50, 54, 58, 63, 70, 36, 35, 34, 36, 38, 42, - 47, 49, 51, 54, 56, 60, 63, 68, 73, 79, 44, 42, 41, 42, 42, 48, 52, 56, - 60, 64, 67, 71, 75, 79, 84, 90, 53, 51, 49, 50, 49, 54, 59, 63, 67, 72, - 76, 82, 87, 92, 97, 104, 62, 59, 57, 57, 56, 61, 65, 69, 74, 79, 83, 90, - 95, 102, 108, 115, 73, 69, 66, 65, 64, 69, 73, 77, 81, 86, 91, 99, 105, - 112, 119, 127, - /* Size 16x32 */ - 32, 31, 31, 32, 32, 34, 36, 38, 44, 44, 53, 53, 62, 65, 73, 79, 31, 32, - 32, 32, 32, 34, 35, 37, 42, 43, 51, 51, 60, 62, 70, 75, 31, 32, 32, 32, - 32, 34, 35, 37, 42, 43, 51, 51, 59, 62, 69, 75, 31, 32, 32, 32, 32, 33, - 35, 36, 41, 42, 50, 50, 58, 60, 67, 73, 31, 32, 32, 32, 33, 33, 34, 36, - 41, 41, 49, 49, 57, 59, 66, 72, 31, 32, 32, 33, 33, 34, 35, 37, 41, 42, - 49, 49, 57, 59, 66, 71, 32, 32, 32, 33, 34, 35, 36, 38, 42, 43, 50, 50, - 57, 59, 65, 71, 32, 32, 32, 34, 34, 35, 37, 38, 42, 43, 49, 49, 56, 59, - 65, 70, 32, 32, 33, 34, 35, 37, 38, 39, 42, 43, 49, 49, 56, 58, 64, 69, - 32, 33, 33, 34, 35, 37, 39, 40, 43, 44, 50, 50, 56, 58, 64, 69, 34, 34, - 34, 36, 37, 39, 42, 44, 48, 48, 54, 54, 61, 63, 69, 73, 34, 34, 34, 36, - 37, 39, 42, 44, 48, 48, 54, 54, 61, 63, 69, 73, 35, 34, 34, 37, 38, 42, - 47, 48, 52, 53, 59, 59, 65, 67, 73, 77, 36, 35, 34, 37, 38, 43, 48, 49, - 54, 54, 60, 60, 66, 68, 74, 78, 38, 36, 36, 38, 40, 44, 49, 51, 56, 57, - 63, 63, 69, 71, 77, 81, 39, 38, 37, 40, 40, 45, 50, 52, 58, 58, 65, 65, - 71, 73, 79, 84, 41, 39, 39, 41, 41, 46, 51, 54, 60, 60, 67, 67, 74, 76, - 81, 86, 44, 41, 41, 42, 43, 48, 53, 56, 63, 64, 71, 71, 78, 79, 85, 90, - 44, 42, 42, 43, 43, 48, 54, 56, 64, 64, 72, 72, 79, 81, 86, 91, 48, 45, - 45, 46, 46, 51, 56, 59, 67, 67, 76, 76, 83, 85, 91, 96, 48, 45, 45, 46, - 46, 51, 56, 59, 67, 67, 76, 76, 83, 85, 91, 96, 53, 49, 49, 49, 49, 54, - 59, 62, 71, 71, 81, 81, 89, 91, 98, 103, 53, 50, 49, 50, 50, 54, 60, 63, - 71, 72, 82, 82, 90, 92, 99, 103, 57, 53, 52, 52, 52, 57, 62, 65, 74, 75, - 85, 85, 94, 96, 103, 108, 58, 54, 54, 54, 54, 58, 63, 67, 75, 76, 87, - 87, 95, 98, 105, 110, 61, 57, 57, 56, 56, 60, 66, 69, 77, 78, 89, 89, - 98, 101, 108, 114, 65, 60, 60, 59, 58, 63, 68, 71, 79, 80, 92, 92, 102, - 105, 112, 118, 67, 62, 61, 60, 60, 64, 69, 72, 81, 82, 94, 94, 103, 106, - 114, 120, 71, 66, 65, 64, 63, 68, 73, 76, 84, 85, 97, 97, 108, 111, 119, - 125, 72, 66, 66, 64, 64, 68, 73, 76, 85, 86, 98, 98, 108, 111, 119, 125, - 79, 73, 72, 71, 70, 74, 79, 82, 90, 91, 104, 104, 115, 118, 127, 133, - 79, 73, 72, 71, 70, 74, 79, 82, 90, 91, 104, 104, 115, 118, 127, 133, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 34, 34, 35, 36, 38, 39, 41, 44, - 44, 48, 48, 53, 53, 57, 58, 61, 65, 67, 71, 72, 79, 79, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 34, 34, 34, 35, 36, 38, 39, 41, 42, 45, 45, 49, - 50, 53, 54, 57, 60, 62, 66, 66, 73, 73, 31, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 34, 34, 34, 34, 36, 37, 39, 41, 42, 45, 45, 49, 49, 52, 54, 57, - 60, 61, 65, 66, 72, 72, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 36, 36, - 37, 37, 38, 40, 41, 42, 43, 46, 46, 49, 50, 52, 54, 56, 59, 60, 64, 64, - 71, 71, 32, 32, 32, 32, 33, 33, 34, 34, 35, 35, 37, 37, 38, 38, 40, 40, - 41, 43, 43, 46, 46, 49, 50, 52, 54, 56, 58, 60, 63, 64, 70, 70, 34, 34, - 34, 33, 33, 34, 35, 35, 37, 37, 39, 39, 42, 43, 44, 45, 46, 48, 48, 51, - 51, 54, 54, 57, 58, 60, 63, 64, 68, 68, 74, 74, 36, 35, 35, 35, 34, 35, - 36, 37, 38, 39, 42, 42, 47, 48, 49, 50, 51, 53, 54, 56, 56, 59, 60, 62, - 63, 66, 68, 69, 73, 73, 79, 79, 38, 37, 37, 36, 36, 37, 38, 38, 39, 40, - 44, 44, 48, 49, 51, 52, 54, 56, 56, 59, 59, 62, 63, 65, 67, 69, 71, 72, - 76, 76, 82, 82, 44, 42, 42, 41, 41, 41, 42, 42, 42, 43, 48, 48, 52, 54, - 56, 58, 60, 63, 64, 67, 67, 71, 71, 74, 75, 77, 79, 81, 84, 85, 90, 90, - 44, 43, 43, 42, 41, 42, 43, 43, 43, 44, 48, 48, 53, 54, 57, 58, 60, 64, - 64, 67, 67, 71, 72, 75, 76, 78, 80, 82, 85, 86, 91, 91, 53, 51, 51, 50, - 49, 49, 50, 49, 49, 50, 54, 54, 59, 60, 63, 65, 67, 71, 72, 76, 76, 81, - 82, 85, 87, 89, 92, 94, 97, 98, 104, 104, 53, 51, 51, 50, 49, 49, 50, - 49, 49, 50, 54, 54, 59, 60, 63, 65, 67, 71, 72, 76, 76, 81, 82, 85, 87, - 89, 92, 94, 97, 98, 104, 104, 62, 60, 59, 58, 57, 57, 57, 56, 56, 56, - 61, 61, 65, 66, 69, 71, 74, 78, 79, 83, 83, 89, 90, 94, 95, 98, 102, - 103, 108, 108, 115, 115, 65, 62, 62, 60, 59, 59, 59, 59, 58, 58, 63, 63, - 67, 68, 71, 73, 76, 79, 81, 85, 85, 91, 92, 96, 98, 101, 105, 106, 111, - 111, 118, 118, 73, 70, 69, 67, 66, 66, 65, 65, 64, 64, 69, 69, 73, 74, - 77, 79, 81, 85, 86, 91, 91, 98, 99, 103, 105, 108, 112, 114, 119, 119, - 127, 127, 79, 75, 75, 73, 72, 71, 71, 70, 69, 69, 73, 73, 77, 78, 81, - 84, 86, 90, 91, 96, 96, 103, 103, 108, 110, 114, 118, 120, 125, 125, - 133, 133, - /* Size 4x16 */ - 31, 34, 44, 65, 32, 34, 43, 62, 32, 33, 41, 59, 32, 35, 43, 59, 32, 37, - 43, 58, 34, 39, 48, 63, 34, 42, 53, 67, 36, 44, 57, 71, 39, 46, 60, 76, - 42, 48, 64, 81, 45, 51, 67, 85, 50, 54, 72, 92, 54, 58, 76, 98, 60, 63, - 80, 105, 66, 68, 85, 111, 73, 74, 91, 118, - /* Size 16x4 */ - 31, 32, 32, 32, 32, 34, 34, 36, 39, 42, 45, 50, 54, 60, 66, 73, 34, 34, - 33, 35, 37, 39, 42, 44, 46, 48, 51, 54, 58, 63, 68, 74, 44, 43, 41, 43, - 43, 48, 53, 57, 60, 64, 67, 72, 76, 80, 85, 91, 65, 62, 59, 59, 58, 63, - 67, 71, 76, 81, 85, 92, 98, 105, 111, 118, - /* Size 8x32 */ - 32, 31, 32, 36, 44, 53, 62, 73, 31, 32, 32, 35, 42, 51, 60, 70, 31, 32, - 32, 35, 42, 51, 59, 69, 31, 32, 32, 35, 41, 50, 58, 67, 31, 32, 33, 34, - 41, 49, 57, 66, 31, 32, 33, 35, 41, 49, 57, 66, 32, 32, 34, 36, 42, 50, - 57, 65, 32, 32, 34, 37, 42, 49, 56, 65, 32, 33, 35, 38, 42, 49, 56, 64, - 32, 33, 35, 39, 43, 50, 56, 64, 34, 34, 37, 42, 48, 54, 61, 69, 34, 34, - 37, 42, 48, 54, 61, 69, 35, 34, 38, 47, 52, 59, 65, 73, 36, 34, 38, 48, - 54, 60, 66, 74, 38, 36, 40, 49, 56, 63, 69, 77, 39, 37, 40, 50, 58, 65, - 71, 79, 41, 39, 41, 51, 60, 67, 74, 81, 44, 41, 43, 53, 63, 71, 78, 85, - 44, 42, 43, 54, 64, 72, 79, 86, 48, 45, 46, 56, 67, 76, 83, 91, 48, 45, - 46, 56, 67, 76, 83, 91, 53, 49, 49, 59, 71, 81, 89, 98, 53, 49, 50, 60, - 71, 82, 90, 99, 57, 52, 52, 62, 74, 85, 94, 103, 58, 54, 54, 63, 75, 87, - 95, 105, 61, 57, 56, 66, 77, 89, 98, 108, 65, 60, 58, 68, 79, 92, 102, - 112, 67, 61, 60, 69, 81, 94, 103, 114, 71, 65, 63, 73, 84, 97, 108, 119, - 72, 66, 64, 73, 85, 98, 108, 119, 79, 72, 70, 79, 90, 104, 115, 127, 79, - 72, 70, 79, 90, 104, 115, 127, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 34, 34, 35, 36, 38, 39, 41, 44, - 44, 48, 48, 53, 53, 57, 58, 61, 65, 67, 71, 72, 79, 79, 31, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 34, 34, 34, 34, 36, 37, 39, 41, 42, 45, 45, 49, - 49, 52, 54, 57, 60, 61, 65, 66, 72, 72, 32, 32, 32, 32, 33, 33, 34, 34, - 35, 35, 37, 37, 38, 38, 40, 40, 41, 43, 43, 46, 46, 49, 50, 52, 54, 56, - 58, 60, 63, 64, 70, 70, 36, 35, 35, 35, 34, 35, 36, 37, 38, 39, 42, 42, - 47, 48, 49, 50, 51, 53, 54, 56, 56, 59, 60, 62, 63, 66, 68, 69, 73, 73, - 79, 79, 44, 42, 42, 41, 41, 41, 42, 42, 42, 43, 48, 48, 52, 54, 56, 58, - 60, 63, 64, 67, 67, 71, 71, 74, 75, 77, 79, 81, 84, 85, 90, 90, 53, 51, - 51, 50, 49, 49, 50, 49, 49, 50, 54, 54, 59, 60, 63, 65, 67, 71, 72, 76, - 76, 81, 82, 85, 87, 89, 92, 94, 97, 98, 104, 104, 62, 60, 59, 58, 57, - 57, 57, 56, 56, 56, 61, 61, 65, 66, 69, 71, 74, 78, 79, 83, 83, 89, 90, - 94, 95, 98, 102, 103, 108, 108, 115, 115, 73, 70, 69, 67, 66, 66, 65, - 65, 64, 64, 69, 69, 73, 74, 77, 79, 81, 85, 86, 91, 91, 98, 99, 103, - 105, 108, 112, 114, 119, 119, 127, 127 }, - { /* Chroma */ - /* Size 4x4 */ - 31, 42, 47, 53, 42, 48, 50, 54, 47, 50, 61, 67, 53, 54, 67, 78, - /* Size 8x8 */ - 31, 32, 38, 48, 47, 50, 53, 57, 32, 35, 42, 47, 45, 47, 50, 54, 38, 42, - 47, 48, 45, 47, 49, 52, 48, 47, 48, 53, 53, 54, 56, 58, 47, 45, 45, 53, - 58, 61, 63, 65, 50, 47, 47, 54, 61, 66, 69, 72, 53, 50, 49, 56, 63, 69, - 73, 77, 57, 54, 52, 58, 65, 72, 77, 82, - /* Size 16x16 */ - 32, 31, 30, 33, 36, 41, 47, 49, 49, 49, 50, 52, 54, 57, 60, 63, 31, 31, - 31, 34, 38, 42, 46, 47, 47, 47, 48, 50, 52, 54, 57, 60, 30, 31, 32, 35, - 40, 42, 45, 46, 45, 45, 46, 47, 49, 52, 54, 57, 33, 34, 35, 39, 43, 45, - 47, 46, 46, 45, 46, 47, 49, 51, 53, 56, 36, 38, 40, 43, 47, 47, 47, 47, - 46, 45, 46, 47, 48, 50, 52, 54, 41, 42, 42, 45, 47, 48, 50, 50, 49, 49, - 50, 50, 52, 53, 55, 57, 47, 46, 45, 47, 47, 50, 52, 52, 52, 52, 53, 53, - 55, 56, 58, 60, 49, 47, 46, 46, 47, 50, 52, 53, 54, 55, 55, 56, 57, 58, - 60, 62, 49, 47, 45, 46, 46, 49, 52, 54, 55, 57, 58, 59, 60, 61, 63, 65, - 49, 47, 45, 45, 45, 49, 52, 55, 57, 59, 60, 61, 63, 64, 66, 68, 50, 48, - 46, 46, 46, 50, 53, 55, 58, 60, 61, 63, 65, 67, 68, 71, 52, 50, 47, 47, - 47, 50, 53, 56, 59, 61, 63, 66, 68, 70, 72, 75, 54, 52, 49, 49, 48, 52, - 55, 57, 60, 63, 65, 68, 71, 73, 75, 78, 57, 54, 52, 51, 50, 53, 56, 58, - 61, 64, 67, 70, 73, 76, 79, 82, 60, 57, 54, 53, 52, 55, 58, 60, 63, 66, - 68, 72, 75, 79, 82, 85, 63, 60, 57, 56, 54, 57, 60, 62, 65, 68, 71, 75, - 78, 82, 85, 89, - /* Size 32x32 */ - 32, 31, 31, 30, 30, 32, 33, 34, 36, 37, 41, 41, 47, 49, 49, 48, 49, 49, - 49, 50, 50, 52, 52, 54, 54, 56, 57, 58, 60, 60, 63, 63, 31, 31, 31, 31, - 31, 32, 34, 35, 38, 38, 42, 42, 46, 48, 47, 47, 47, 47, 47, 48, 48, 50, - 50, 51, 52, 53, 54, 55, 57, 57, 60, 60, 31, 31, 31, 31, 31, 33, 34, 35, - 38, 39, 42, 42, 46, 47, 47, 47, 47, 47, 47, 48, 48, 49, 50, 51, 52, 53, - 54, 55, 57, 57, 60, 60, 30, 31, 31, 31, 31, 33, 35, 36, 39, 40, 42, 42, - 46, 47, 46, 46, 46, 45, 46, 47, 47, 48, 48, 50, 50, 51, 52, 53, 55, 55, - 58, 58, 30, 31, 31, 31, 32, 33, 35, 36, 40, 40, 42, 42, 45, 46, 46, 45, - 45, 45, 45, 46, 46, 47, 47, 49, 49, 51, 52, 52, 54, 54, 57, 57, 32, 32, - 33, 33, 33, 35, 37, 38, 41, 42, 43, 43, 46, 47, 46, 46, 45, 45, 45, 46, - 46, 47, 47, 49, 49, 50, 51, 52, 54, 54, 57, 57, 33, 34, 34, 35, 35, 37, - 39, 40, 43, 43, 45, 45, 47, 47, 46, 46, 46, 45, 45, 46, 46, 47, 47, 49, - 49, 50, 51, 52, 53, 54, 56, 56, 34, 35, 35, 36, 36, 38, 40, 41, 44, 44, - 45, 45, 47, 47, 47, 46, 46, 45, 45, 46, 46, 47, 47, 48, 49, 50, 51, 51, - 53, 53, 55, 55, 36, 38, 38, 39, 40, 41, 43, 44, 47, 47, 47, 47, 47, 48, - 47, 46, 46, 45, 45, 46, 46, 46, 47, 48, 48, 49, 50, 50, 52, 52, 54, 54, - 37, 38, 39, 40, 40, 42, 43, 44, 47, 47, 47, 47, 48, 48, 47, 47, 46, 45, - 46, 46, 46, 47, 47, 48, 48, 49, 50, 51, 52, 52, 55, 55, 41, 42, 42, 42, - 42, 43, 45, 45, 47, 47, 48, 48, 50, 50, 50, 49, 49, 49, 49, 50, 50, 50, - 50, 51, 52, 52, 53, 54, 55, 55, 57, 57, 41, 42, 42, 42, 42, 43, 45, 45, - 47, 47, 48, 48, 50, 50, 50, 49, 49, 49, 49, 50, 50, 50, 50, 51, 52, 52, - 53, 54, 55, 55, 57, 57, 47, 46, 46, 46, 45, 46, 47, 47, 47, 48, 50, 50, - 52, 52, 52, 52, 52, 52, 52, 53, 53, 53, 53, 54, 55, 55, 56, 56, 58, 58, - 60, 60, 49, 48, 47, 47, 46, 47, 47, 47, 48, 48, 50, 50, 52, 53, 53, 53, - 53, 53, 53, 54, 54, 54, 54, 55, 55, 56, 56, 57, 58, 58, 60, 60, 49, 47, - 47, 46, 46, 46, 46, 47, 47, 47, 50, 50, 52, 53, 53, 54, 54, 55, 55, 55, - 55, 56, 56, 57, 57, 58, 58, 59, 60, 60, 62, 62, 48, 47, 47, 46, 45, 46, - 46, 46, 46, 47, 49, 49, 52, 53, 54, 54, 55, 55, 56, 56, 56, 57, 57, 58, - 58, 59, 60, 60, 61, 62, 63, 63, 49, 47, 47, 46, 45, 45, 46, 46, 46, 46, - 49, 49, 52, 53, 54, 55, 55, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, - 63, 63, 65, 65, 49, 47, 47, 45, 45, 45, 45, 45, 45, 45, 49, 49, 52, 53, - 55, 55, 57, 58, 59, 60, 60, 61, 61, 62, 62, 63, 63, 64, 65, 65, 67, 67, - 49, 47, 47, 46, 45, 45, 45, 45, 45, 46, 49, 49, 52, 53, 55, 56, 57, 59, - 59, 60, 60, 61, 61, 62, 63, 63, 64, 65, 66, 66, 68, 68, 50, 48, 48, 47, - 46, 46, 46, 46, 46, 46, 50, 50, 53, 54, 55, 56, 58, 60, 60, 61, 61, 63, - 63, 65, 65, 66, 67, 67, 68, 69, 71, 71, 50, 48, 48, 47, 46, 46, 46, 46, - 46, 46, 50, 50, 53, 54, 55, 56, 58, 60, 60, 61, 61, 63, 63, 65, 65, 66, - 67, 67, 68, 69, 71, 71, 52, 50, 49, 48, 47, 47, 47, 47, 46, 47, 50, 50, - 53, 54, 56, 57, 59, 61, 61, 63, 63, 66, 66, 67, 68, 69, 70, 71, 72, 72, - 74, 74, 52, 50, 50, 48, 47, 47, 47, 47, 47, 47, 50, 50, 53, 54, 56, 57, - 59, 61, 61, 63, 63, 66, 66, 68, 68, 69, 70, 71, 72, 73, 75, 75, 54, 51, - 51, 50, 49, 49, 49, 48, 48, 48, 51, 51, 54, 55, 57, 58, 60, 62, 62, 65, - 65, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 77, 54, 52, 52, 50, 49, 49, - 49, 49, 48, 48, 52, 52, 55, 55, 57, 58, 60, 62, 63, 65, 65, 68, 68, 70, - 71, 72, 73, 74, 75, 76, 78, 78, 56, 53, 53, 51, 51, 50, 50, 50, 49, 49, - 52, 52, 55, 56, 58, 59, 61, 63, 63, 66, 66, 69, 69, 71, 72, 73, 75, 75, - 77, 77, 80, 80, 57, 54, 54, 52, 52, 51, 51, 51, 50, 50, 53, 53, 56, 56, - 58, 60, 61, 63, 64, 67, 67, 70, 70, 72, 73, 75, 76, 77, 79, 79, 82, 82, - 58, 55, 55, 53, 52, 52, 52, 51, 50, 51, 54, 54, 56, 57, 59, 60, 62, 64, - 65, 67, 67, 71, 71, 73, 74, 75, 77, 78, 80, 80, 83, 83, 60, 57, 57, 55, - 54, 54, 53, 53, 52, 52, 55, 55, 58, 58, 60, 61, 63, 65, 66, 68, 68, 72, - 72, 74, 75, 77, 79, 80, 82, 82, 85, 85, 60, 57, 57, 55, 54, 54, 54, 53, - 52, 52, 55, 55, 58, 58, 60, 62, 63, 65, 66, 69, 69, 72, 73, 75, 76, 77, - 79, 80, 82, 82, 85, 85, 63, 60, 60, 58, 57, 57, 56, 55, 54, 55, 57, 57, - 60, 60, 62, 63, 65, 67, 68, 71, 71, 74, 75, 77, 78, 80, 82, 83, 85, 85, - 89, 89, 63, 60, 60, 58, 57, 57, 56, 55, 54, 55, 57, 57, 60, 60, 62, 63, - 65, 67, 68, 71, 71, 74, 75, 77, 78, 80, 82, 83, 85, 85, 89, 89, - /* Size 4x8 */ - 31, 42, 47, 54, 33, 44, 45, 51, 40, 47, 46, 50, 47, 50, 54, 57, 45, 49, - 59, 64, 48, 50, 61, 70, 51, 52, 63, 75, 55, 55, 66, 79, - /* Size 8x4 */ - 31, 33, 40, 47, 45, 48, 51, 55, 42, 44, 47, 50, 49, 50, 52, 55, 47, 45, - 46, 54, 59, 61, 63, 66, 54, 51, 50, 57, 64, 70, 75, 79, - /* Size 8x16 */ - 32, 31, 37, 48, 49, 52, 56, 61, 31, 31, 38, 47, 47, 50, 53, 57, 30, 32, - 40, 46, 45, 48, 51, 55, 33, 36, 43, 47, 46, 47, 50, 54, 37, 40, 47, 47, - 45, 47, 49, 52, 42, 43, 47, 50, 49, 50, 53, 56, 47, 46, 48, 52, 53, 53, - 55, 58, 48, 46, 47, 53, 55, 56, 58, 61, 48, 45, 46, 53, 57, 59, 61, 63, - 49, 45, 46, 53, 58, 62, 64, 66, 50, 46, 46, 54, 59, 64, 66, 69, 52, 48, - 47, 54, 61, 66, 70, 73, 54, 50, 49, 55, 62, 68, 72, 76, 57, 52, 50, 56, - 64, 70, 75, 79, 60, 54, 52, 58, 65, 72, 77, 82, 63, 57, 55, 60, 67, 75, - 80, 86, - /* Size 16x8 */ - 32, 31, 30, 33, 37, 42, 47, 48, 48, 49, 50, 52, 54, 57, 60, 63, 31, 31, - 32, 36, 40, 43, 46, 46, 45, 45, 46, 48, 50, 52, 54, 57, 37, 38, 40, 43, - 47, 47, 48, 47, 46, 46, 46, 47, 49, 50, 52, 55, 48, 47, 46, 47, 47, 50, - 52, 53, 53, 53, 54, 54, 55, 56, 58, 60, 49, 47, 45, 46, 45, 49, 53, 55, - 57, 58, 59, 61, 62, 64, 65, 67, 52, 50, 48, 47, 47, 50, 53, 56, 59, 62, - 64, 66, 68, 70, 72, 75, 56, 53, 51, 50, 49, 53, 55, 58, 61, 64, 66, 70, - 72, 75, 77, 80, 61, 57, 55, 54, 52, 56, 58, 61, 63, 66, 69, 73, 76, 79, - 82, 86, - /* Size 16x32 */ - 32, 31, 31, 35, 37, 42, 48, 48, 49, 49, 52, 52, 56, 57, 61, 63, 31, 31, - 31, 36, 38, 42, 47, 47, 47, 47, 50, 50, 54, 54, 58, 60, 31, 31, 31, 36, - 38, 42, 47, 47, 47, 47, 50, 50, 53, 54, 57, 60, 30, 32, 32, 37, 39, 42, - 46, 46, 46, 46, 48, 48, 52, 52, 56, 58, 30, 32, 32, 37, 40, 42, 46, 46, - 45, 45, 48, 48, 51, 52, 55, 57, 32, 33, 34, 39, 41, 44, 46, 46, 45, 45, - 48, 48, 51, 51, 54, 57, 33, 35, 36, 40, 43, 45, 47, 46, 46, 46, 47, 47, - 50, 51, 54, 56, 34, 37, 37, 42, 44, 45, 47, 47, 45, 46, 47, 47, 50, 51, - 53, 55, 37, 40, 40, 45, 47, 47, 47, 47, 45, 46, 47, 47, 49, 50, 52, 54, - 37, 40, 40, 45, 47, 47, 48, 47, 46, 46, 47, 47, 49, 50, 53, 55, 42, 43, - 43, 46, 47, 48, 50, 50, 49, 49, 50, 50, 53, 53, 56, 57, 42, 43, 43, 46, - 47, 48, 50, 50, 49, 49, 50, 50, 53, 53, 56, 57, 47, 46, 46, 47, 48, 50, - 52, 52, 53, 53, 53, 53, 55, 56, 58, 60, 49, 47, 46, 47, 48, 50, 53, 53, - 53, 54, 54, 54, 56, 57, 59, 60, 48, 46, 46, 47, 47, 50, 53, 53, 55, 55, - 56, 56, 58, 58, 61, 62, 48, 46, 46, 46, 47, 50, 53, 54, 56, 56, 57, 57, - 59, 60, 62, 64, 48, 46, 45, 46, 46, 49, 53, 54, 57, 57, 59, 59, 61, 61, - 63, 65, 49, 45, 45, 45, 46, 49, 53, 55, 58, 59, 61, 61, 63, 64, 66, 67, - 49, 46, 45, 46, 46, 49, 53, 55, 58, 59, 62, 62, 64, 64, 66, 68, 50, 47, - 46, 46, 46, 50, 54, 55, 59, 60, 64, 64, 66, 67, 69, 71, 50, 47, 46, 46, - 46, 50, 54, 55, 59, 60, 64, 64, 66, 67, 69, 71, 52, 48, 48, 47, 47, 50, - 54, 56, 61, 61, 66, 66, 69, 70, 72, 74, 52, 48, 48, 47, 47, 50, 54, 56, - 61, 61, 66, 66, 70, 71, 73, 75, 53, 50, 49, 48, 48, 51, 55, 57, 62, 62, - 68, 68, 71, 72, 75, 77, 54, 50, 50, 49, 49, 52, 55, 57, 62, 63, 68, 68, - 72, 73, 76, 78, 55, 51, 51, 50, 49, 52, 56, 58, 63, 63, 69, 69, 74, 75, - 78, 80, 57, 52, 52, 51, 50, 53, 56, 58, 64, 64, 70, 70, 75, 76, 79, 82, - 58, 53, 53, 51, 51, 54, 57, 59, 64, 65, 71, 71, 76, 77, 80, 83, 60, 55, - 54, 53, 52, 55, 58, 60, 65, 66, 72, 72, 77, 79, 82, 85, 60, 55, 55, 53, - 53, 55, 59, 60, 65, 66, 73, 73, 78, 79, 83, 85, 63, 58, 57, 56, 55, 58, - 60, 62, 67, 68, 75, 75, 80, 82, 86, 89, 63, 58, 57, 56, 55, 58, 60, 62, - 67, 68, 75, 75, 80, 82, 86, 89, - /* Size 32x16 */ - 32, 31, 31, 30, 30, 32, 33, 34, 37, 37, 42, 42, 47, 49, 48, 48, 48, 49, - 49, 50, 50, 52, 52, 53, 54, 55, 57, 58, 60, 60, 63, 63, 31, 31, 31, 32, - 32, 33, 35, 37, 40, 40, 43, 43, 46, 47, 46, 46, 46, 45, 46, 47, 47, 48, - 48, 50, 50, 51, 52, 53, 55, 55, 58, 58, 31, 31, 31, 32, 32, 34, 36, 37, - 40, 40, 43, 43, 46, 46, 46, 46, 45, 45, 45, 46, 46, 48, 48, 49, 50, 51, - 52, 53, 54, 55, 57, 57, 35, 36, 36, 37, 37, 39, 40, 42, 45, 45, 46, 46, - 47, 47, 47, 46, 46, 45, 46, 46, 46, 47, 47, 48, 49, 50, 51, 51, 53, 53, - 56, 56, 37, 38, 38, 39, 40, 41, 43, 44, 47, 47, 47, 47, 48, 48, 47, 47, - 46, 46, 46, 46, 46, 47, 47, 48, 49, 49, 50, 51, 52, 53, 55, 55, 42, 42, - 42, 42, 42, 44, 45, 45, 47, 47, 48, 48, 50, 50, 50, 50, 49, 49, 49, 50, - 50, 50, 50, 51, 52, 52, 53, 54, 55, 55, 58, 58, 48, 47, 47, 46, 46, 46, - 47, 47, 47, 48, 50, 50, 52, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 55, - 55, 56, 56, 57, 58, 59, 60, 60, 48, 47, 47, 46, 46, 46, 46, 47, 47, 47, - 50, 50, 52, 53, 53, 54, 54, 55, 55, 55, 55, 56, 56, 57, 57, 58, 58, 59, - 60, 60, 62, 62, 49, 47, 47, 46, 45, 45, 46, 45, 45, 46, 49, 49, 53, 53, - 55, 56, 57, 58, 58, 59, 59, 61, 61, 62, 62, 63, 64, 64, 65, 65, 67, 67, - 49, 47, 47, 46, 45, 45, 46, 46, 46, 46, 49, 49, 53, 54, 55, 56, 57, 59, - 59, 60, 60, 61, 61, 62, 63, 63, 64, 65, 66, 66, 68, 68, 52, 50, 50, 48, - 48, 48, 47, 47, 47, 47, 50, 50, 53, 54, 56, 57, 59, 61, 62, 64, 64, 66, - 66, 68, 68, 69, 70, 71, 72, 73, 75, 75, 52, 50, 50, 48, 48, 48, 47, 47, - 47, 47, 50, 50, 53, 54, 56, 57, 59, 61, 62, 64, 64, 66, 66, 68, 68, 69, - 70, 71, 72, 73, 75, 75, 56, 54, 53, 52, 51, 51, 50, 50, 49, 49, 53, 53, - 55, 56, 58, 59, 61, 63, 64, 66, 66, 69, 70, 71, 72, 74, 75, 76, 77, 78, - 80, 80, 57, 54, 54, 52, 52, 51, 51, 51, 50, 50, 53, 53, 56, 57, 58, 60, - 61, 64, 64, 67, 67, 70, 71, 72, 73, 75, 76, 77, 79, 79, 82, 82, 61, 58, - 57, 56, 55, 54, 54, 53, 52, 53, 56, 56, 58, 59, 61, 62, 63, 66, 66, 69, - 69, 72, 73, 75, 76, 78, 79, 80, 82, 83, 86, 86, 63, 60, 60, 58, 57, 57, - 56, 55, 54, 55, 57, 57, 60, 60, 62, 64, 65, 67, 68, 71, 71, 74, 75, 77, - 78, 80, 82, 83, 85, 85, 89, 89, - /* Size 4x16 */ - 31, 42, 49, 57, 31, 42, 47, 54, 32, 42, 45, 52, 35, 45, 46, 51, 40, 47, - 46, 50, 43, 48, 49, 53, 46, 50, 53, 56, 46, 50, 55, 58, 46, 49, 57, 61, - 46, 49, 59, 64, 47, 50, 60, 67, 48, 50, 61, 71, 50, 52, 63, 73, 52, 53, - 64, 76, 55, 55, 66, 79, 58, 58, 68, 82, - /* Size 16x4 */ - 31, 31, 32, 35, 40, 43, 46, 46, 46, 46, 47, 48, 50, 52, 55, 58, 42, 42, - 42, 45, 47, 48, 50, 50, 49, 49, 50, 50, 52, 53, 55, 58, 49, 47, 45, 46, - 46, 49, 53, 55, 57, 59, 60, 61, 63, 64, 66, 68, 57, 54, 52, 51, 50, 53, - 56, 58, 61, 64, 67, 71, 73, 76, 79, 82, - /* Size 8x32 */ - 32, 31, 37, 48, 49, 52, 56, 61, 31, 31, 38, 47, 47, 50, 54, 58, 31, 31, - 38, 47, 47, 50, 53, 57, 30, 32, 39, 46, 46, 48, 52, 56, 30, 32, 40, 46, - 45, 48, 51, 55, 32, 34, 41, 46, 45, 48, 51, 54, 33, 36, 43, 47, 46, 47, - 50, 54, 34, 37, 44, 47, 45, 47, 50, 53, 37, 40, 47, 47, 45, 47, 49, 52, - 37, 40, 47, 48, 46, 47, 49, 53, 42, 43, 47, 50, 49, 50, 53, 56, 42, 43, - 47, 50, 49, 50, 53, 56, 47, 46, 48, 52, 53, 53, 55, 58, 49, 46, 48, 53, - 53, 54, 56, 59, 48, 46, 47, 53, 55, 56, 58, 61, 48, 46, 47, 53, 56, 57, - 59, 62, 48, 45, 46, 53, 57, 59, 61, 63, 49, 45, 46, 53, 58, 61, 63, 66, - 49, 45, 46, 53, 58, 62, 64, 66, 50, 46, 46, 54, 59, 64, 66, 69, 50, 46, - 46, 54, 59, 64, 66, 69, 52, 48, 47, 54, 61, 66, 69, 72, 52, 48, 47, 54, - 61, 66, 70, 73, 53, 49, 48, 55, 62, 68, 71, 75, 54, 50, 49, 55, 62, 68, - 72, 76, 55, 51, 49, 56, 63, 69, 74, 78, 57, 52, 50, 56, 64, 70, 75, 79, - 58, 53, 51, 57, 64, 71, 76, 80, 60, 54, 52, 58, 65, 72, 77, 82, 60, 55, - 53, 59, 65, 73, 78, 83, 63, 57, 55, 60, 67, 75, 80, 86, 63, 57, 55, 60, - 67, 75, 80, 86, - /* Size 32x8 */ - 32, 31, 31, 30, 30, 32, 33, 34, 37, 37, 42, 42, 47, 49, 48, 48, 48, 49, - 49, 50, 50, 52, 52, 53, 54, 55, 57, 58, 60, 60, 63, 63, 31, 31, 31, 32, - 32, 34, 36, 37, 40, 40, 43, 43, 46, 46, 46, 46, 45, 45, 45, 46, 46, 48, - 48, 49, 50, 51, 52, 53, 54, 55, 57, 57, 37, 38, 38, 39, 40, 41, 43, 44, - 47, 47, 47, 47, 48, 48, 47, 47, 46, 46, 46, 46, 46, 47, 47, 48, 49, 49, - 50, 51, 52, 53, 55, 55, 48, 47, 47, 46, 46, 46, 47, 47, 47, 48, 50, 50, - 52, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 56, 56, 57, 58, 59, - 60, 60, 49, 47, 47, 46, 45, 45, 46, 45, 45, 46, 49, 49, 53, 53, 55, 56, - 57, 58, 58, 59, 59, 61, 61, 62, 62, 63, 64, 64, 65, 65, 67, 67, 52, 50, - 50, 48, 48, 48, 47, 47, 47, 47, 50, 50, 53, 54, 56, 57, 59, 61, 62, 64, - 64, 66, 66, 68, 68, 69, 70, 71, 72, 73, 75, 75, 56, 54, 53, 52, 51, 51, - 50, 50, 49, 49, 53, 53, 55, 56, 58, 59, 61, 63, 64, 66, 66, 69, 70, 71, - 72, 74, 75, 76, 77, 78, 80, 80, 61, 58, 57, 56, 55, 54, 54, 53, 52, 53, - 56, 56, 58, 59, 61, 62, 63, 66, 66, 69, 69, 72, 73, 75, 76, 78, 79, 80, - 82, 83, 86, 86 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 33, 42, 55, 33, 38, 46, 57, 42, 46, 63, 75, 55, 57, 75, 92, - /* Size 8x8 */ - 31, 32, 32, 34, 38, 46, 52, 63, 32, 32, 32, 34, 37, 44, 49, 59, 32, 32, - 35, 37, 40, 45, 49, 58, 34, 34, 37, 42, 47, 52, 56, 65, 38, 37, 40, 47, - 54, 60, 65, 73, 46, 44, 45, 52, 60, 69, 75, 84, 52, 49, 49, 56, 65, 75, - 82, 92, 63, 59, 58, 65, 73, 84, 92, 105, - /* Size 16x16 */ - 32, 31, 31, 31, 32, 32, 34, 36, 38, 41, 44, 48, 54, 58, 61, 65, 31, 32, - 32, 32, 32, 32, 34, 35, 38, 40, 42, 46, 51, 55, 58, 62, 31, 32, 32, 32, - 32, 32, 33, 34, 37, 38, 41, 44, 49, 53, 56, 59, 31, 32, 32, 33, 33, 33, - 35, 36, 38, 40, 42, 45, 49, 53, 56, 59, 32, 32, 32, 33, 34, 34, 36, 37, - 39, 40, 42, 45, 49, 53, 55, 59, 32, 32, 32, 33, 34, 35, 37, 38, 40, 41, - 42, 46, 49, 52, 55, 58, 34, 34, 33, 35, 36, 37, 39, 42, 44, 46, 47, 51, - 54, 57, 60, 63, 36, 35, 34, 36, 37, 38, 42, 48, 50, 52, 54, 57, 60, 63, - 65, 68, 38, 38, 37, 38, 39, 40, 44, 50, 52, 54, 57, 60, 64, 67, 69, 72, - 41, 40, 38, 40, 40, 41, 46, 52, 54, 57, 60, 63, 67, 70, 73, 75, 44, 42, - 41, 42, 42, 42, 47, 54, 57, 60, 63, 67, 71, 74, 77, 79, 48, 46, 44, 45, - 45, 46, 51, 57, 60, 63, 67, 71, 76, 79, 82, 85, 54, 51, 49, 49, 49, 49, - 54, 60, 64, 67, 71, 76, 82, 86, 89, 92, 58, 55, 53, 53, 53, 52, 57, 63, - 67, 70, 74, 79, 86, 90, 93, 97, 61, 58, 56, 56, 55, 55, 60, 65, 69, 73, - 77, 82, 89, 93, 97, 101, 65, 62, 59, 59, 59, 58, 63, 68, 72, 75, 79, 85, - 92, 97, 101, 105, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 33, 34, 34, 36, 36, 38, 39, - 41, 44, 44, 47, 48, 50, 54, 54, 58, 59, 61, 65, 65, 70, 31, 31, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 38, 38, 40, 42, 42, 46, - 47, 49, 52, 52, 56, 57, 59, 63, 63, 67, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 33, 34, 34, 35, 35, 38, 38, 40, 42, 42, 45, 46, 48, 51, 51, - 55, 56, 58, 62, 62, 67, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, - 34, 34, 35, 35, 37, 38, 39, 42, 42, 45, 45, 47, 50, 50, 54, 55, 57, 61, - 61, 65, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, - 37, 37, 38, 41, 41, 44, 44, 46, 49, 49, 53, 54, 56, 59, 59, 64, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 37, 37, 38, 41, - 41, 44, 44, 46, 49, 49, 53, 54, 56, 59, 59, 64, 31, 32, 32, 32, 32, 32, - 33, 33, 33, 33, 33, 34, 35, 35, 36, 36, 38, 39, 40, 42, 42, 44, 45, 47, - 49, 49, 53, 54, 56, 59, 59, 63, 31, 32, 32, 32, 32, 32, 33, 33, 33, 34, - 34, 35, 35, 36, 36, 36, 38, 39, 40, 42, 42, 45, 45, 47, 50, 50, 53, 54, - 56, 59, 59, 63, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 35, 36, 36, - 37, 37, 39, 39, 40, 42, 42, 45, 45, 47, 49, 49, 53, 54, 55, 59, 59, 63, - 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 36, 37, 37, 38, 38, 40, 40, - 41, 42, 42, 45, 46, 47, 49, 49, 52, 53, 55, 58, 58, 62, 32, 32, 32, 32, - 32, 32, 33, 34, 34, 35, 35, 36, 37, 37, 38, 38, 40, 40, 41, 42, 42, 45, - 46, 47, 49, 49, 52, 53, 55, 58, 58, 62, 33, 33, 33, 33, 33, 33, 34, 35, - 35, 36, 36, 38, 39, 40, 42, 42, 43, 44, 45, 46, 46, 49, 50, 51, 53, 53, - 56, 57, 59, 62, 62, 66, 34, 34, 34, 34, 33, 33, 35, 35, 36, 37, 37, 39, - 39, 41, 42, 42, 44, 45, 46, 47, 47, 50, 51, 52, 54, 54, 57, 58, 60, 63, - 63, 67, 34, 34, 34, 34, 34, 34, 35, 36, 36, 37, 37, 40, 41, 42, 45, 45, - 46, 47, 48, 50, 50, 52, 53, 54, 56, 56, 59, 60, 62, 65, 65, 69, 36, 35, - 35, 35, 34, 34, 36, 36, 37, 38, 38, 42, 42, 45, 48, 48, 50, 50, 52, 54, - 54, 56, 57, 58, 60, 60, 63, 64, 65, 68, 68, 72, 36, 35, 35, 35, 34, 34, - 36, 36, 37, 38, 38, 42, 42, 45, 48, 48, 50, 50, 52, 54, 54, 56, 57, 58, - 60, 60, 63, 64, 65, 68, 68, 72, 38, 38, 38, 37, 37, 37, 38, 38, 39, 40, - 40, 43, 44, 46, 50, 50, 52, 53, 54, 57, 57, 59, 60, 61, 64, 64, 67, 68, - 69, 72, 72, 76, 39, 38, 38, 38, 37, 37, 39, 39, 39, 40, 40, 44, 45, 47, - 50, 50, 53, 54, 55, 58, 58, 60, 61, 62, 65, 65, 68, 69, 70, 73, 73, 77, - 41, 40, 40, 39, 38, 38, 40, 40, 40, 41, 41, 45, 46, 48, 52, 52, 54, 55, - 57, 60, 60, 62, 63, 65, 67, 67, 70, 71, 73, 75, 75, 79, 44, 42, 42, 42, - 41, 41, 42, 42, 42, 42, 42, 46, 47, 50, 54, 54, 57, 58, 60, 63, 63, 66, - 67, 68, 71, 71, 74, 75, 77, 79, 79, 83, 44, 42, 42, 42, 41, 41, 42, 42, - 42, 42, 42, 46, 47, 50, 54, 54, 57, 58, 60, 63, 63, 66, 67, 68, 71, 71, - 74, 75, 77, 79, 79, 83, 47, 46, 45, 45, 44, 44, 44, 45, 45, 45, 45, 49, - 50, 52, 56, 56, 59, 60, 62, 66, 66, 69, 70, 72, 75, 75, 78, 79, 81, 84, - 84, 88, 48, 47, 46, 45, 44, 44, 45, 45, 45, 46, 46, 50, 51, 53, 57, 57, - 60, 61, 63, 67, 67, 70, 71, 73, 76, 76, 79, 80, 82, 85, 85, 89, 50, 49, - 48, 47, 46, 46, 47, 47, 47, 47, 47, 51, 52, 54, 58, 58, 61, 62, 65, 68, - 68, 72, 73, 75, 78, 78, 82, 83, 85, 88, 88, 92, 54, 52, 51, 50, 49, 49, - 49, 50, 49, 49, 49, 53, 54, 56, 60, 60, 64, 65, 67, 71, 71, 75, 76, 78, - 82, 82, 86, 87, 89, 92, 92, 96, 54, 52, 51, 50, 49, 49, 49, 50, 49, 49, - 49, 53, 54, 56, 60, 60, 64, 65, 67, 71, 71, 75, 76, 78, 82, 82, 86, 87, - 89, 92, 92, 96, 58, 56, 55, 54, 53, 53, 53, 53, 53, 52, 52, 56, 57, 59, - 63, 63, 67, 68, 70, 74, 74, 78, 79, 82, 86, 86, 90, 91, 93, 97, 97, 101, - 59, 57, 56, 55, 54, 54, 54, 54, 54, 53, 53, 57, 58, 60, 64, 64, 68, 69, - 71, 75, 75, 79, 80, 83, 87, 87, 91, 92, 94, 98, 98, 102, 61, 59, 58, 57, - 56, 56, 56, 56, 55, 55, 55, 59, 60, 62, 65, 65, 69, 70, 73, 77, 77, 81, - 82, 85, 89, 89, 93, 94, 97, 101, 101, 105, 65, 63, 62, 61, 59, 59, 59, - 59, 59, 58, 58, 62, 63, 65, 68, 68, 72, 73, 75, 79, 79, 84, 85, 88, 92, - 92, 97, 98, 101, 105, 105, 109, 65, 63, 62, 61, 59, 59, 59, 59, 59, 58, - 58, 62, 63, 65, 68, 68, 72, 73, 75, 79, 79, 84, 85, 88, 92, 92, 97, 98, - 101, 105, 105, 109, 70, 67, 67, 65, 64, 64, 63, 63, 63, 62, 62, 66, 67, - 69, 72, 72, 76, 77, 79, 83, 83, 88, 89, 92, 96, 96, 101, 102, 105, 109, - 109, 114, - /* Size 4x8 */ - 32, 32, 42, 56, 32, 33, 41, 53, 32, 35, 42, 52, 34, 37, 50, 59, 38, 40, - 58, 68, 44, 45, 66, 78, 50, 50, 71, 86, 61, 58, 79, 97, - /* Size 8x4 */ - 32, 32, 32, 34, 38, 44, 50, 61, 32, 33, 35, 37, 40, 45, 50, 58, 42, 41, - 42, 50, 58, 66, 71, 79, 56, 53, 52, 59, 68, 78, 86, 97, - /* Size 8x16 */ - 32, 31, 32, 35, 39, 44, 53, 65, 31, 32, 32, 35, 38, 42, 51, 62, 31, 32, - 33, 34, 37, 41, 49, 59, 31, 32, 34, 35, 38, 42, 49, 59, 32, 32, 34, 36, - 39, 42, 49, 58, 32, 33, 35, 37, 40, 42, 49, 58, 34, 34, 37, 41, 44, 48, - 54, 63, 36, 34, 38, 46, 50, 54, 60, 68, 38, 37, 40, 47, 52, 57, 64, 72, - 41, 39, 41, 49, 54, 60, 67, 76, 44, 41, 43, 51, 57, 63, 71, 79, 48, 45, - 46, 54, 60, 67, 76, 85, 53, 49, 50, 57, 64, 71, 82, 92, 57, 53, 53, 60, - 67, 74, 86, 97, 61, 56, 56, 63, 69, 77, 89, 100, 65, 60, 58, 66, 72, 79, - 92, 105, - /* Size 16x8 */ - 32, 31, 31, 31, 32, 32, 34, 36, 38, 41, 44, 48, 53, 57, 61, 65, 31, 32, - 32, 32, 32, 33, 34, 34, 37, 39, 41, 45, 49, 53, 56, 60, 32, 32, 33, 34, - 34, 35, 37, 38, 40, 41, 43, 46, 50, 53, 56, 58, 35, 35, 34, 35, 36, 37, - 41, 46, 47, 49, 51, 54, 57, 60, 63, 66, 39, 38, 37, 38, 39, 40, 44, 50, - 52, 54, 57, 60, 64, 67, 69, 72, 44, 42, 41, 42, 42, 42, 48, 54, 57, 60, - 63, 67, 71, 74, 77, 79, 53, 51, 49, 49, 49, 49, 54, 60, 64, 67, 71, 76, - 82, 86, 89, 92, 65, 62, 59, 59, 58, 58, 63, 68, 72, 76, 79, 85, 92, 97, - 100, 105, - /* Size 16x32 */ - 32, 31, 31, 31, 32, 32, 35, 36, 39, 44, 44, 51, 53, 58, 65, 65, 31, 32, - 32, 32, 32, 32, 35, 35, 38, 42, 42, 49, 52, 56, 63, 63, 31, 32, 32, 32, - 32, 32, 35, 35, 38, 42, 42, 49, 51, 55, 62, 62, 31, 32, 32, 32, 32, 32, - 34, 35, 37, 41, 41, 48, 50, 54, 61, 61, 31, 32, 32, 32, 33, 33, 34, 34, - 37, 41, 41, 47, 49, 53, 59, 59, 31, 32, 32, 32, 33, 33, 34, 34, 37, 41, - 41, 47, 49, 53, 59, 59, 31, 32, 32, 33, 34, 34, 35, 36, 38, 42, 42, 48, - 49, 53, 59, 59, 32, 32, 32, 33, 34, 34, 36, 36, 38, 42, 42, 48, 50, 53, - 59, 59, 32, 32, 32, 33, 34, 34, 36, 37, 39, 42, 42, 48, 49, 53, 58, 58, - 32, 32, 33, 34, 35, 35, 37, 38, 40, 42, 42, 48, 49, 52, 58, 58, 32, 32, - 33, 34, 35, 35, 37, 38, 40, 42, 42, 48, 49, 52, 58, 58, 33, 33, 33, 35, - 36, 36, 40, 41, 43, 46, 46, 52, 53, 56, 62, 62, 34, 34, 34, 35, 37, 37, - 41, 42, 44, 48, 48, 53, 54, 57, 63, 63, 34, 34, 34, 35, 37, 37, 43, 44, - 46, 50, 50, 55, 56, 59, 65, 65, 36, 35, 34, 36, 38, 38, 46, 48, 50, 54, - 54, 58, 60, 63, 68, 68, 36, 35, 34, 36, 38, 38, 46, 48, 50, 54, 54, 58, - 60, 63, 68, 68, 38, 37, 37, 38, 40, 40, 47, 50, 52, 57, 57, 62, 64, 67, - 72, 72, 39, 38, 37, 39, 40, 40, 48, 50, 53, 58, 58, 63, 65, 68, 73, 73, - 41, 39, 39, 40, 41, 41, 49, 51, 54, 60, 60, 66, 67, 70, 76, 76, 44, 41, - 41, 42, 43, 43, 51, 53, 57, 63, 63, 69, 71, 74, 79, 79, 44, 41, 41, 42, - 43, 43, 51, 53, 57, 63, 63, 69, 71, 74, 79, 79, 47, 44, 44, 44, 45, 45, - 53, 56, 59, 66, 66, 73, 75, 78, 84, 84, 48, 45, 45, 45, 46, 46, 54, 56, - 60, 67, 67, 74, 76, 79, 85, 85, 50, 47, 46, 47, 47, 47, 55, 58, 61, 68, - 68, 76, 78, 82, 88, 88, 53, 50, 49, 50, 50, 50, 57, 60, 64, 71, 71, 79, - 82, 86, 92, 92, 53, 50, 49, 50, 50, 50, 57, 60, 64, 71, 71, 79, 82, 86, - 92, 92, 57, 54, 53, 53, 53, 53, 60, 63, 67, 74, 74, 83, 86, 90, 97, 97, - 58, 55, 54, 54, 54, 54, 61, 63, 68, 75, 75, 84, 87, 91, 98, 98, 61, 57, - 56, 56, 56, 56, 63, 65, 69, 77, 77, 86, 89, 93, 100, 100, 65, 61, 60, - 59, 58, 58, 66, 68, 72, 79, 79, 89, 92, 97, 105, 105, 65, 61, 60, 59, - 58, 58, 66, 68, 72, 79, 79, 89, 92, 97, 105, 105, 70, 65, 64, 63, 62, - 62, 70, 72, 76, 83, 83, 93, 96, 101, 109, 109, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 33, 34, 34, 36, 36, 38, 39, - 41, 44, 44, 47, 48, 50, 53, 53, 57, 58, 61, 65, 65, 70, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 37, 38, 39, 41, 41, 44, - 45, 47, 50, 50, 54, 55, 57, 61, 61, 65, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 33, 34, 34, 34, 34, 37, 37, 39, 41, 41, 44, 45, 46, 49, 49, - 53, 54, 56, 60, 60, 64, 31, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 35, - 35, 35, 36, 36, 38, 39, 40, 42, 42, 44, 45, 47, 50, 50, 53, 54, 56, 59, - 59, 63, 32, 32, 32, 32, 33, 33, 34, 34, 34, 35, 35, 36, 37, 37, 38, 38, - 40, 40, 41, 43, 43, 45, 46, 47, 50, 50, 53, 54, 56, 58, 58, 62, 32, 32, - 32, 32, 33, 33, 34, 34, 34, 35, 35, 36, 37, 37, 38, 38, 40, 40, 41, 43, - 43, 45, 46, 47, 50, 50, 53, 54, 56, 58, 58, 62, 35, 35, 35, 34, 34, 34, - 35, 36, 36, 37, 37, 40, 41, 43, 46, 46, 47, 48, 49, 51, 51, 53, 54, 55, - 57, 57, 60, 61, 63, 66, 66, 70, 36, 35, 35, 35, 34, 34, 36, 36, 37, 38, - 38, 41, 42, 44, 48, 48, 50, 50, 51, 53, 53, 56, 56, 58, 60, 60, 63, 63, - 65, 68, 68, 72, 39, 38, 38, 37, 37, 37, 38, 38, 39, 40, 40, 43, 44, 46, - 50, 50, 52, 53, 54, 57, 57, 59, 60, 61, 64, 64, 67, 68, 69, 72, 72, 76, - 44, 42, 42, 41, 41, 41, 42, 42, 42, 42, 42, 46, 48, 50, 54, 54, 57, 58, - 60, 63, 63, 66, 67, 68, 71, 71, 74, 75, 77, 79, 79, 83, 44, 42, 42, 41, - 41, 41, 42, 42, 42, 42, 42, 46, 48, 50, 54, 54, 57, 58, 60, 63, 63, 66, - 67, 68, 71, 71, 74, 75, 77, 79, 79, 83, 51, 49, 49, 48, 47, 47, 48, 48, - 48, 48, 48, 52, 53, 55, 58, 58, 62, 63, 66, 69, 69, 73, 74, 76, 79, 79, - 83, 84, 86, 89, 89, 93, 53, 52, 51, 50, 49, 49, 49, 50, 49, 49, 49, 53, - 54, 56, 60, 60, 64, 65, 67, 71, 71, 75, 76, 78, 82, 82, 86, 87, 89, 92, - 92, 96, 58, 56, 55, 54, 53, 53, 53, 53, 53, 52, 52, 56, 57, 59, 63, 63, - 67, 68, 70, 74, 74, 78, 79, 82, 86, 86, 90, 91, 93, 97, 97, 101, 65, 63, - 62, 61, 59, 59, 59, 59, 58, 58, 58, 62, 63, 65, 68, 68, 72, 73, 76, 79, - 79, 84, 85, 88, 92, 92, 97, 98, 100, 105, 105, 109, 65, 63, 62, 61, 59, - 59, 59, 59, 58, 58, 58, 62, 63, 65, 68, 68, 72, 73, 76, 79, 79, 84, 85, - 88, 92, 92, 97, 98, 100, 105, 105, 109, - /* Size 4x16 */ - 31, 32, 44, 58, 32, 32, 42, 55, 32, 33, 41, 53, 32, 34, 42, 53, 32, 34, - 42, 53, 32, 35, 42, 52, 34, 37, 48, 57, 35, 38, 54, 63, 37, 40, 57, 67, - 39, 41, 60, 70, 41, 43, 63, 74, 45, 46, 67, 79, 50, 50, 71, 86, 54, 53, - 74, 90, 57, 56, 77, 93, 61, 58, 79, 97, - /* Size 16x4 */ - 31, 32, 32, 32, 32, 32, 34, 35, 37, 39, 41, 45, 50, 54, 57, 61, 32, 32, - 33, 34, 34, 35, 37, 38, 40, 41, 43, 46, 50, 53, 56, 58, 44, 42, 41, 42, - 42, 42, 48, 54, 57, 60, 63, 67, 71, 74, 77, 79, 58, 55, 53, 53, 53, 52, - 57, 63, 67, 70, 74, 79, 86, 90, 93, 97, - /* Size 8x32 */ - 32, 31, 32, 35, 39, 44, 53, 65, 31, 32, 32, 35, 38, 42, 52, 63, 31, 32, - 32, 35, 38, 42, 51, 62, 31, 32, 32, 34, 37, 41, 50, 61, 31, 32, 33, 34, - 37, 41, 49, 59, 31, 32, 33, 34, 37, 41, 49, 59, 31, 32, 34, 35, 38, 42, - 49, 59, 32, 32, 34, 36, 38, 42, 50, 59, 32, 32, 34, 36, 39, 42, 49, 58, - 32, 33, 35, 37, 40, 42, 49, 58, 32, 33, 35, 37, 40, 42, 49, 58, 33, 33, - 36, 40, 43, 46, 53, 62, 34, 34, 37, 41, 44, 48, 54, 63, 34, 34, 37, 43, - 46, 50, 56, 65, 36, 34, 38, 46, 50, 54, 60, 68, 36, 34, 38, 46, 50, 54, - 60, 68, 38, 37, 40, 47, 52, 57, 64, 72, 39, 37, 40, 48, 53, 58, 65, 73, - 41, 39, 41, 49, 54, 60, 67, 76, 44, 41, 43, 51, 57, 63, 71, 79, 44, 41, - 43, 51, 57, 63, 71, 79, 47, 44, 45, 53, 59, 66, 75, 84, 48, 45, 46, 54, - 60, 67, 76, 85, 50, 46, 47, 55, 61, 68, 78, 88, 53, 49, 50, 57, 64, 71, - 82, 92, 53, 49, 50, 57, 64, 71, 82, 92, 57, 53, 53, 60, 67, 74, 86, 97, - 58, 54, 54, 61, 68, 75, 87, 98, 61, 56, 56, 63, 69, 77, 89, 100, 65, 60, - 58, 66, 72, 79, 92, 105, 65, 60, 58, 66, 72, 79, 92, 105, 70, 64, 62, - 70, 76, 83, 96, 109, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 33, 34, 34, 36, 36, 38, 39, - 41, 44, 44, 47, 48, 50, 53, 53, 57, 58, 61, 65, 65, 70, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 37, 37, 39, 41, 41, 44, - 45, 46, 49, 49, 53, 54, 56, 60, 60, 64, 32, 32, 32, 32, 33, 33, 34, 34, - 34, 35, 35, 36, 37, 37, 38, 38, 40, 40, 41, 43, 43, 45, 46, 47, 50, 50, - 53, 54, 56, 58, 58, 62, 35, 35, 35, 34, 34, 34, 35, 36, 36, 37, 37, 40, - 41, 43, 46, 46, 47, 48, 49, 51, 51, 53, 54, 55, 57, 57, 60, 61, 63, 66, - 66, 70, 39, 38, 38, 37, 37, 37, 38, 38, 39, 40, 40, 43, 44, 46, 50, 50, - 52, 53, 54, 57, 57, 59, 60, 61, 64, 64, 67, 68, 69, 72, 72, 76, 44, 42, - 42, 41, 41, 41, 42, 42, 42, 42, 42, 46, 48, 50, 54, 54, 57, 58, 60, 63, - 63, 66, 67, 68, 71, 71, 74, 75, 77, 79, 79, 83, 53, 52, 51, 50, 49, 49, - 49, 50, 49, 49, 49, 53, 54, 56, 60, 60, 64, 65, 67, 71, 71, 75, 76, 78, - 82, 82, 86, 87, 89, 92, 92, 96, 65, 63, 62, 61, 59, 59, 59, 59, 58, 58, - 58, 62, 63, 65, 68, 68, 72, 73, 76, 79, 79, 84, 85, 88, 92, 92, 97, 98, - 100, 105, 105, 109 }, - { /* Chroma */ - /* Size 4x4 */ - 31, 41, 46, 51, 41, 48, 48, 51, 46, 48, 58, 62, 51, 51, 62, 71, - /* Size 8x8 */ - 31, 31, 38, 44, 47, 48, 50, 55, 31, 32, 40, 44, 45, 46, 47, 52, 38, 40, - 47, 47, 46, 46, 47, 50, 44, 44, 47, 50, 51, 51, 52, 54, 47, 45, 46, 51, - 54, 56, 57, 60, 48, 46, 46, 51, 56, 61, 63, 66, 50, 47, 47, 52, 57, 63, - 66, 70, 55, 52, 50, 54, 60, 66, 70, 76, - /* Size 16x16 */ - 32, 31, 30, 33, 34, 36, 41, 49, 48, 49, 49, 50, 52, 54, 55, 57, 31, 31, - 31, 34, 36, 38, 42, 47, 47, 47, 47, 48, 50, 51, 53, 54, 30, 31, 32, 34, - 37, 40, 42, 46, 45, 45, 45, 46, 47, 49, 50, 52, 33, 34, 34, 37, 40, 42, - 44, 47, 46, 46, 45, 46, 47, 49, 50, 51, 34, 36, 37, 40, 42, 45, 46, 47, - 46, 46, 45, 46, 47, 48, 49, 50, 36, 38, 40, 42, 45, 47, 47, 48, 47, 46, - 45, 46, 47, 48, 49, 50, 41, 42, 42, 44, 46, 47, 48, 50, 50, 49, 49, 50, - 50, 51, 52, 53, 49, 47, 46, 47, 47, 48, 50, 53, 53, 53, 53, 54, 54, 55, - 56, 56, 48, 47, 45, 46, 46, 47, 50, 53, 54, 54, 55, 56, 57, 58, 58, 59, - 49, 47, 45, 46, 46, 46, 49, 53, 54, 55, 57, 58, 59, 60, 60, 61, 49, 47, - 45, 45, 45, 45, 49, 53, 55, 57, 58, 60, 61, 62, 63, 63, 50, 48, 46, 46, - 46, 46, 50, 54, 56, 58, 60, 61, 63, 65, 66, 67, 52, 50, 47, 47, 47, 47, - 50, 54, 57, 59, 61, 63, 66, 68, 69, 70, 54, 51, 49, 49, 48, 48, 51, 55, - 58, 60, 62, 65, 68, 70, 71, 73, 55, 53, 50, 50, 49, 49, 52, 56, 58, 60, - 63, 66, 69, 71, 73, 74, 57, 54, 52, 51, 50, 50, 53, 56, 59, 61, 63, 67, - 70, 73, 74, 76, - /* Size 32x32 */ - 32, 31, 31, 31, 30, 30, 33, 33, 34, 36, 36, 40, 41, 44, 49, 49, 48, 48, - 49, 49, 49, 50, 50, 51, 52, 52, 54, 54, 55, 57, 57, 59, 31, 31, 31, 31, - 31, 31, 33, 34, 36, 38, 38, 41, 42, 44, 48, 48, 47, 47, 47, 47, 47, 48, - 49, 49, 50, 50, 52, 52, 53, 55, 55, 57, 31, 31, 31, 31, 31, 31, 34, 34, - 36, 38, 38, 41, 42, 44, 47, 47, 47, 47, 47, 47, 47, 48, 48, 49, 50, 50, - 51, 52, 53, 54, 54, 56, 31, 31, 31, 31, 31, 31, 34, 35, 36, 39, 39, 41, - 42, 44, 47, 47, 46, 46, 46, 46, 46, 47, 47, 48, 49, 49, 50, 51, 52, 53, - 53, 55, 30, 31, 31, 31, 32, 32, 34, 35, 37, 40, 40, 42, 42, 44, 46, 46, - 45, 45, 45, 45, 45, 46, 46, 47, 47, 47, 49, 49, 50, 52, 52, 54, 30, 31, - 31, 31, 32, 32, 34, 35, 37, 40, 40, 42, 42, 44, 46, 46, 45, 45, 45, 45, - 45, 46, 46, 47, 47, 47, 49, 49, 50, 52, 52, 54, 33, 33, 34, 34, 34, 34, - 37, 38, 40, 42, 42, 44, 44, 45, 47, 47, 46, 46, 46, 45, 45, 46, 46, 47, - 47, 47, 49, 49, 50, 51, 51, 53, 33, 34, 34, 35, 35, 35, 38, 39, 40, 43, - 43, 44, 45, 46, 47, 47, 46, 46, 46, 45, 45, 46, 46, 47, 47, 47, 49, 49, - 50, 51, 51, 53, 34, 36, 36, 36, 37, 37, 40, 40, 42, 45, 45, 45, 46, 46, - 47, 47, 46, 46, 46, 45, 45, 46, 46, 47, 47, 47, 48, 49, 49, 50, 50, 52, - 36, 38, 38, 39, 40, 40, 42, 43, 45, 47, 47, 47, 47, 47, 48, 48, 47, 46, - 46, 45, 45, 46, 46, 46, 47, 47, 48, 48, 49, 50, 50, 51, 36, 38, 38, 39, - 40, 40, 42, 43, 45, 47, 47, 47, 47, 47, 48, 48, 47, 46, 46, 45, 45, 46, - 46, 46, 47, 47, 48, 48, 49, 50, 50, 51, 40, 41, 41, 41, 42, 42, 44, 44, - 45, 47, 47, 48, 48, 49, 50, 50, 49, 49, 49, 48, 48, 49, 49, 49, 49, 49, - 51, 51, 51, 52, 52, 54, 41, 42, 42, 42, 42, 42, 44, 45, 46, 47, 47, 48, - 48, 49, 50, 50, 50, 49, 49, 49, 49, 50, 50, 50, 50, 50, 51, 52, 52, 53, - 53, 55, 44, 44, 44, 44, 44, 44, 45, 46, 46, 47, 47, 49, 49, 50, 51, 51, - 51, 51, 51, 51, 51, 51, 51, 51, 52, 52, 53, 53, 54, 54, 54, 56, 49, 48, - 47, 47, 46, 46, 47, 47, 47, 48, 48, 50, 50, 51, 53, 53, 53, 53, 53, 53, - 53, 54, 54, 54, 54, 54, 55, 55, 56, 56, 56, 58, 49, 48, 47, 47, 46, 46, - 47, 47, 47, 48, 48, 50, 50, 51, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, - 54, 54, 55, 55, 56, 56, 56, 58, 48, 47, 47, 46, 45, 45, 46, 46, 46, 47, - 47, 49, 50, 51, 53, 53, 54, 54, 54, 55, 55, 56, 56, 56, 57, 57, 58, 58, - 58, 59, 59, 60, 48, 47, 47, 46, 45, 45, 46, 46, 46, 46, 46, 49, 49, 51, - 53, 53, 54, 54, 55, 55, 55, 56, 56, 57, 57, 57, 58, 58, 59, 60, 60, 61, - 49, 47, 47, 46, 45, 45, 46, 46, 46, 46, 46, 49, 49, 51, 53, 53, 54, 55, - 55, 57, 57, 57, 58, 58, 59, 59, 60, 60, 60, 61, 61, 63, 49, 47, 47, 46, - 45, 45, 45, 45, 45, 45, 45, 48, 49, 51, 53, 53, 55, 55, 57, 58, 58, 59, - 60, 60, 61, 61, 62, 62, 63, 63, 63, 65, 49, 47, 47, 46, 45, 45, 45, 45, - 45, 45, 45, 48, 49, 51, 53, 53, 55, 55, 57, 58, 58, 59, 60, 60, 61, 61, - 62, 62, 63, 63, 63, 65, 50, 48, 48, 47, 46, 46, 46, 46, 46, 46, 46, 49, - 50, 51, 54, 54, 56, 56, 57, 59, 59, 61, 61, 62, 63, 63, 64, 64, 65, 66, - 66, 67, 50, 49, 48, 47, 46, 46, 46, 46, 46, 46, 46, 49, 50, 51, 54, 54, - 56, 56, 58, 60, 60, 61, 61, 62, 63, 63, 65, 65, 66, 67, 67, 68, 51, 49, - 49, 48, 47, 47, 47, 47, 47, 46, 46, 49, 50, 51, 54, 54, 56, 57, 58, 60, - 60, 62, 62, 63, 65, 65, 66, 66, 67, 68, 68, 70, 52, 50, 50, 49, 47, 47, - 47, 47, 47, 47, 47, 49, 50, 52, 54, 54, 57, 57, 59, 61, 61, 63, 63, 65, - 66, 66, 68, 68, 69, 70, 70, 72, 52, 50, 50, 49, 47, 47, 47, 47, 47, 47, - 47, 49, 50, 52, 54, 54, 57, 57, 59, 61, 61, 63, 63, 65, 66, 66, 68, 68, - 69, 70, 70, 72, 54, 52, 51, 50, 49, 49, 49, 49, 48, 48, 48, 51, 51, 53, - 55, 55, 58, 58, 60, 62, 62, 64, 65, 66, 68, 68, 70, 70, 71, 73, 73, 74, - 54, 52, 52, 51, 49, 49, 49, 49, 49, 48, 48, 51, 52, 53, 55, 55, 58, 58, - 60, 62, 62, 64, 65, 66, 68, 68, 70, 71, 72, 73, 73, 75, 55, 53, 53, 52, - 50, 50, 50, 50, 49, 49, 49, 51, 52, 54, 56, 56, 58, 59, 60, 63, 63, 65, - 66, 67, 69, 69, 71, 72, 73, 74, 74, 76, 57, 55, 54, 53, 52, 52, 51, 51, - 50, 50, 50, 52, 53, 54, 56, 56, 59, 60, 61, 63, 63, 66, 67, 68, 70, 70, - 73, 73, 74, 76, 76, 78, 57, 55, 54, 53, 52, 52, 51, 51, 50, 50, 50, 52, - 53, 54, 56, 56, 59, 60, 61, 63, 63, 66, 67, 68, 70, 70, 73, 73, 74, 76, - 76, 78, 59, 57, 56, 55, 54, 54, 53, 53, 52, 51, 51, 54, 55, 56, 58, 58, - 60, 61, 63, 65, 65, 67, 68, 70, 72, 72, 74, 75, 76, 78, 78, 80, - /* Size 4x8 */ - 31, 38, 47, 52, 32, 40, 45, 49, 39, 47, 45, 48, 44, 47, 51, 53, 46, 47, - 56, 58, 47, 46, 59, 64, 48, 47, 61, 68, 53, 50, 64, 73, - /* Size 8x4 */ - 31, 32, 39, 44, 46, 47, 48, 53, 38, 40, 47, 47, 47, 46, 47, 50, 47, 45, - 45, 51, 56, 59, 61, 64, 52, 49, 48, 53, 58, 64, 68, 73, - /* Size 8x16 */ - 32, 31, 37, 45, 48, 49, 52, 57, 31, 31, 38, 45, 47, 47, 50, 54, 30, 32, - 40, 44, 45, 45, 48, 52, 33, 35, 42, 46, 46, 45, 47, 51, 35, 37, 44, 46, - 46, 45, 47, 51, 37, 40, 47, 47, 47, 45, 47, 50, 42, 43, 47, 49, 50, 49, - 50, 53, 49, 46, 48, 52, 53, 53, 54, 57, 48, 46, 47, 51, 54, 55, 57, 59, - 48, 45, 46, 51, 54, 57, 59, 61, 49, 45, 46, 51, 55, 58, 61, 64, 50, 46, - 46, 52, 56, 59, 64, 67, 52, 48, 47, 53, 57, 61, 66, 71, 54, 49, 48, 54, - 58, 62, 68, 73, 55, 51, 49, 54, 58, 63, 69, 74, 57, 52, 50, 55, 59, 64, - 70, 76, - /* Size 16x8 */ - 32, 31, 30, 33, 35, 37, 42, 49, 48, 48, 49, 50, 52, 54, 55, 57, 31, 31, - 32, 35, 37, 40, 43, 46, 46, 45, 45, 46, 48, 49, 51, 52, 37, 38, 40, 42, - 44, 47, 47, 48, 47, 46, 46, 46, 47, 48, 49, 50, 45, 45, 44, 46, 46, 47, - 49, 52, 51, 51, 51, 52, 53, 54, 54, 55, 48, 47, 45, 46, 46, 47, 50, 53, - 54, 54, 55, 56, 57, 58, 58, 59, 49, 47, 45, 45, 45, 45, 49, 53, 55, 57, - 58, 59, 61, 62, 63, 64, 52, 50, 48, 47, 47, 47, 50, 54, 57, 59, 61, 64, - 66, 68, 69, 70, 57, 54, 52, 51, 51, 50, 53, 57, 59, 61, 64, 67, 71, 73, - 74, 76, - /* Size 16x32 */ - 32, 31, 31, 33, 37, 37, 45, 48, 48, 49, 49, 51, 52, 54, 57, 57, 31, 31, - 31, 34, 38, 38, 45, 47, 47, 47, 47, 50, 50, 52, 55, 55, 31, 31, 31, 34, - 38, 38, 45, 47, 47, 47, 47, 49, 50, 51, 54, 54, 31, 31, 32, 34, 39, 39, - 45, 46, 46, 46, 46, 48, 49, 51, 53, 53, 30, 32, 32, 35, 40, 40, 44, 46, - 45, 45, 45, 47, 48, 49, 52, 52, 30, 32, 32, 35, 40, 40, 44, 46, 45, 45, - 45, 47, 48, 49, 52, 52, 33, 34, 35, 37, 42, 42, 46, 47, 46, 45, 45, 47, - 47, 49, 51, 51, 33, 35, 36, 38, 43, 43, 46, 47, 46, 46, 46, 47, 47, 49, - 51, 51, 35, 37, 37, 40, 44, 44, 46, 47, 46, 45, 45, 47, 47, 48, 51, 51, - 37, 39, 40, 43, 47, 47, 47, 47, 47, 45, 45, 46, 47, 48, 50, 50, 37, 39, - 40, 43, 47, 47, 47, 47, 47, 45, 45, 46, 47, 48, 50, 50, 41, 42, 42, 44, - 47, 47, 49, 49, 49, 48, 48, 49, 50, 51, 52, 52, 42, 42, 43, 44, 47, 47, - 49, 50, 50, 49, 49, 50, 50, 51, 53, 53, 44, 44, 44, 45, 47, 47, 50, 51, - 51, 51, 51, 52, 52, 53, 54, 54, 49, 47, 46, 47, 48, 48, 52, 53, 53, 53, - 53, 54, 54, 55, 57, 57, 49, 47, 46, 47, 48, 48, 52, 53, 53, 53, 53, 54, - 54, 55, 57, 57, 48, 46, 46, 46, 47, 47, 51, 53, 54, 55, 55, 56, 57, 58, - 59, 59, 48, 46, 46, 46, 47, 47, 51, 53, 54, 56, 56, 57, 57, 58, 60, 60, - 48, 46, 45, 46, 46, 46, 51, 53, 54, 57, 57, 58, 59, 60, 61, 61, 49, 46, - 45, 45, 46, 46, 51, 53, 55, 58, 58, 61, 61, 62, 64, 64, 49, 46, 45, 45, - 46, 46, 51, 53, 55, 58, 58, 61, 61, 62, 64, 64, 50, 47, 46, 46, 46, 46, - 52, 54, 56, 59, 59, 62, 63, 64, 66, 66, 50, 47, 46, 46, 46, 46, 52, 54, - 56, 59, 59, 63, 64, 65, 67, 67, 51, 48, 47, 47, 47, 47, 52, 54, 56, 60, - 60, 64, 65, 66, 68, 68, 52, 48, 48, 47, 47, 47, 53, 54, 57, 61, 61, 65, - 66, 68, 71, 71, 52, 48, 48, 47, 47, 47, 53, 54, 57, 61, 61, 65, 66, 68, - 71, 71, 54, 50, 49, 49, 48, 48, 54, 55, 58, 62, 62, 67, 68, 70, 73, 73, - 54, 51, 50, 49, 49, 49, 54, 55, 58, 62, 62, 67, 68, 70, 73, 73, 55, 51, - 51, 50, 49, 49, 54, 56, 58, 63, 63, 68, 69, 71, 74, 74, 57, 53, 52, 51, - 50, 50, 55, 56, 59, 64, 64, 69, 70, 73, 76, 76, 57, 53, 52, 51, 50, 50, - 55, 56, 59, 64, 64, 69, 70, 73, 76, 76, 59, 55, 54, 53, 52, 52, 57, 58, - 61, 65, 65, 70, 72, 74, 78, 78, - /* Size 32x16 */ - 32, 31, 31, 31, 30, 30, 33, 33, 35, 37, 37, 41, 42, 44, 49, 49, 48, 48, - 48, 49, 49, 50, 50, 51, 52, 52, 54, 54, 55, 57, 57, 59, 31, 31, 31, 31, - 32, 32, 34, 35, 37, 39, 39, 42, 42, 44, 47, 47, 46, 46, 46, 46, 46, 47, - 47, 48, 48, 48, 50, 51, 51, 53, 53, 55, 31, 31, 31, 32, 32, 32, 35, 36, - 37, 40, 40, 42, 43, 44, 46, 46, 46, 46, 45, 45, 45, 46, 46, 47, 48, 48, - 49, 50, 51, 52, 52, 54, 33, 34, 34, 34, 35, 35, 37, 38, 40, 43, 43, 44, - 44, 45, 47, 47, 46, 46, 46, 45, 45, 46, 46, 47, 47, 47, 49, 49, 50, 51, - 51, 53, 37, 38, 38, 39, 40, 40, 42, 43, 44, 47, 47, 47, 47, 47, 48, 48, - 47, 47, 46, 46, 46, 46, 46, 47, 47, 47, 48, 49, 49, 50, 50, 52, 37, 38, - 38, 39, 40, 40, 42, 43, 44, 47, 47, 47, 47, 47, 48, 48, 47, 47, 46, 46, - 46, 46, 46, 47, 47, 47, 48, 49, 49, 50, 50, 52, 45, 45, 45, 45, 44, 44, - 46, 46, 46, 47, 47, 49, 49, 50, 52, 52, 51, 51, 51, 51, 51, 52, 52, 52, - 53, 53, 54, 54, 54, 55, 55, 57, 48, 47, 47, 46, 46, 46, 47, 47, 47, 47, - 47, 49, 50, 51, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 54, 55, 55, - 56, 56, 56, 58, 48, 47, 47, 46, 45, 45, 46, 46, 46, 47, 47, 49, 50, 51, - 53, 53, 54, 54, 54, 55, 55, 56, 56, 56, 57, 57, 58, 58, 58, 59, 59, 61, - 49, 47, 47, 46, 45, 45, 45, 46, 45, 45, 45, 48, 49, 51, 53, 53, 55, 56, - 57, 58, 58, 59, 59, 60, 61, 61, 62, 62, 63, 64, 64, 65, 49, 47, 47, 46, - 45, 45, 45, 46, 45, 45, 45, 48, 49, 51, 53, 53, 55, 56, 57, 58, 58, 59, - 59, 60, 61, 61, 62, 62, 63, 64, 64, 65, 51, 50, 49, 48, 47, 47, 47, 47, - 47, 46, 46, 49, 50, 52, 54, 54, 56, 57, 58, 61, 61, 62, 63, 64, 65, 65, - 67, 67, 68, 69, 69, 70, 52, 50, 50, 49, 48, 48, 47, 47, 47, 47, 47, 50, - 50, 52, 54, 54, 57, 57, 59, 61, 61, 63, 64, 65, 66, 66, 68, 68, 69, 70, - 70, 72, 54, 52, 51, 51, 49, 49, 49, 49, 48, 48, 48, 51, 51, 53, 55, 55, - 58, 58, 60, 62, 62, 64, 65, 66, 68, 68, 70, 70, 71, 73, 73, 74, 57, 55, - 54, 53, 52, 52, 51, 51, 51, 50, 50, 52, 53, 54, 57, 57, 59, 60, 61, 64, - 64, 66, 67, 68, 71, 71, 73, 73, 74, 76, 76, 78, 57, 55, 54, 53, 52, 52, - 51, 51, 51, 50, 50, 52, 53, 54, 57, 57, 59, 60, 61, 64, 64, 66, 67, 68, - 71, 71, 73, 73, 74, 76, 76, 78, - /* Size 4x16 */ - 31, 37, 49, 54, 31, 38, 47, 51, 32, 40, 45, 49, 34, 42, 45, 49, 37, 44, - 45, 48, 39, 47, 45, 48, 42, 47, 49, 51, 47, 48, 53, 55, 46, 47, 55, 58, - 46, 46, 57, 60, 46, 46, 58, 62, 47, 46, 59, 65, 48, 47, 61, 68, 50, 48, - 62, 70, 51, 49, 63, 71, 53, 50, 64, 73, - /* Size 16x4 */ - 31, 31, 32, 34, 37, 39, 42, 47, 46, 46, 46, 47, 48, 50, 51, 53, 37, 38, - 40, 42, 44, 47, 47, 48, 47, 46, 46, 46, 47, 48, 49, 50, 49, 47, 45, 45, - 45, 45, 49, 53, 55, 57, 58, 59, 61, 62, 63, 64, 54, 51, 49, 49, 48, 48, - 51, 55, 58, 60, 62, 65, 68, 70, 71, 73, - /* Size 8x32 */ - 32, 31, 37, 45, 48, 49, 52, 57, 31, 31, 38, 45, 47, 47, 50, 55, 31, 31, - 38, 45, 47, 47, 50, 54, 31, 32, 39, 45, 46, 46, 49, 53, 30, 32, 40, 44, - 45, 45, 48, 52, 30, 32, 40, 44, 45, 45, 48, 52, 33, 35, 42, 46, 46, 45, - 47, 51, 33, 36, 43, 46, 46, 46, 47, 51, 35, 37, 44, 46, 46, 45, 47, 51, - 37, 40, 47, 47, 47, 45, 47, 50, 37, 40, 47, 47, 47, 45, 47, 50, 41, 42, - 47, 49, 49, 48, 50, 52, 42, 43, 47, 49, 50, 49, 50, 53, 44, 44, 47, 50, - 51, 51, 52, 54, 49, 46, 48, 52, 53, 53, 54, 57, 49, 46, 48, 52, 53, 53, - 54, 57, 48, 46, 47, 51, 54, 55, 57, 59, 48, 46, 47, 51, 54, 56, 57, 60, - 48, 45, 46, 51, 54, 57, 59, 61, 49, 45, 46, 51, 55, 58, 61, 64, 49, 45, - 46, 51, 55, 58, 61, 64, 50, 46, 46, 52, 56, 59, 63, 66, 50, 46, 46, 52, - 56, 59, 64, 67, 51, 47, 47, 52, 56, 60, 65, 68, 52, 48, 47, 53, 57, 61, - 66, 71, 52, 48, 47, 53, 57, 61, 66, 71, 54, 49, 48, 54, 58, 62, 68, 73, - 54, 50, 49, 54, 58, 62, 68, 73, 55, 51, 49, 54, 58, 63, 69, 74, 57, 52, - 50, 55, 59, 64, 70, 76, 57, 52, 50, 55, 59, 64, 70, 76, 59, 54, 52, 57, - 61, 65, 72, 78, - /* Size 32x8 */ - 32, 31, 31, 31, 30, 30, 33, 33, 35, 37, 37, 41, 42, 44, 49, 49, 48, 48, - 48, 49, 49, 50, 50, 51, 52, 52, 54, 54, 55, 57, 57, 59, 31, 31, 31, 32, - 32, 32, 35, 36, 37, 40, 40, 42, 43, 44, 46, 46, 46, 46, 45, 45, 45, 46, - 46, 47, 48, 48, 49, 50, 51, 52, 52, 54, 37, 38, 38, 39, 40, 40, 42, 43, - 44, 47, 47, 47, 47, 47, 48, 48, 47, 47, 46, 46, 46, 46, 46, 47, 47, 47, - 48, 49, 49, 50, 50, 52, 45, 45, 45, 45, 44, 44, 46, 46, 46, 47, 47, 49, - 49, 50, 52, 52, 51, 51, 51, 51, 51, 52, 52, 52, 53, 53, 54, 54, 54, 55, - 55, 57, 48, 47, 47, 46, 45, 45, 46, 46, 46, 47, 47, 49, 50, 51, 53, 53, - 54, 54, 54, 55, 55, 56, 56, 56, 57, 57, 58, 58, 58, 59, 59, 61, 49, 47, - 47, 46, 45, 45, 45, 46, 45, 45, 45, 48, 49, 51, 53, 53, 55, 56, 57, 58, - 58, 59, 59, 60, 61, 61, 62, 62, 63, 64, 64, 65, 52, 50, 50, 49, 48, 48, - 47, 47, 47, 47, 47, 50, 50, 52, 54, 54, 57, 57, 59, 61, 61, 63, 64, 65, - 66, 66, 68, 68, 69, 70, 70, 72, 57, 55, 54, 53, 52, 52, 51, 51, 51, 50, - 50, 52, 53, 54, 57, 57, 59, 60, 61, 64, 64, 66, 67, 68, 71, 71, 73, 73, - 74, 76, 76, 78 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 32, 38, 51, 32, 35, 40, 49, 38, 40, 54, 64, 51, 49, 64, 81, - /* Size 8x8 */ - 31, 32, 32, 34, 35, 41, 47, 53, 32, 32, 32, 33, 34, 40, 44, 50, 32, 32, - 34, 35, 37, 41, 45, 51, 34, 33, 35, 39, 42, 47, 51, 55, 35, 34, 37, 42, - 48, 53, 57, 61, 41, 40, 41, 47, 53, 60, 65, 70, 47, 44, 45, 51, 57, 65, - 71, 77, 53, 50, 51, 55, 61, 70, 77, 85, - /* Size 16x16 */ - 32, 31, 31, 31, 31, 32, 32, 34, 36, 38, 39, 44, 47, 49, 54, 59, 31, 32, - 32, 32, 32, 32, 33, 34, 35, 37, 38, 42, 45, 47, 51, 56, 31, 32, 32, 32, - 32, 32, 33, 33, 34, 36, 37, 41, 44, 46, 50, 54, 31, 32, 32, 32, 32, 33, - 33, 34, 35, 36, 38, 41, 44, 45, 49, 54, 31, 32, 32, 32, 33, 34, 34, 35, - 36, 38, 39, 42, 45, 46, 50, 54, 32, 32, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 42, 45, 46, 49, 53, 32, 33, 33, 33, 34, 36, 36, 38, 40, 41, 42, 44, - 47, 48, 51, 55, 34, 34, 33, 34, 35, 37, 38, 39, 42, 44, 45, 47, 50, 51, - 54, 58, 36, 35, 34, 35, 36, 38, 40, 42, 48, 50, 50, 54, 56, 57, 60, 64, - 38, 37, 36, 36, 38, 39, 41, 44, 50, 51, 52, 56, 58, 60, 63, 67, 39, 38, - 37, 38, 39, 40, 42, 45, 50, 52, 54, 58, 60, 62, 65, 69, 44, 42, 41, 41, - 42, 42, 44, 47, 54, 56, 58, 63, 66, 68, 71, 75, 47, 45, 44, 44, 45, 45, - 47, 50, 56, 58, 60, 66, 69, 71, 75, 79, 49, 47, 46, 45, 46, 46, 48, 51, - 57, 60, 62, 68, 71, 73, 77, 81, 54, 51, 50, 49, 50, 49, 51, 54, 60, 63, - 65, 71, 75, 77, 82, 87, 59, 56, 54, 54, 54, 53, 55, 58, 64, 67, 69, 75, - 79, 81, 87, 92, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 34, 34, 35, 36, 36, - 38, 39, 39, 42, 44, 44, 47, 48, 49, 53, 54, 55, 59, 59, 31, 31, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 35, 35, 37, 39, 39, 41, - 43, 43, 46, 47, 48, 51, 52, 53, 57, 57, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 34, 34, 34, 35, 35, 37, 38, 38, 41, 42, 43, 45, 46, - 47, 51, 51, 53, 56, 56, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 34, 34, 34, 35, 35, 37, 38, 38, 41, 42, 42, 45, 46, 47, 51, 51, 52, - 56, 56, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, - 34, 34, 36, 37, 37, 40, 41, 41, 44, 45, 46, 49, 50, 51, 54, 54, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 36, 37, - 37, 40, 41, 41, 44, 44, 45, 49, 49, 50, 54, 54, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 35, 35, 36, 38, 38, 40, 41, 41, - 44, 45, 45, 49, 49, 50, 54, 54, 31, 32, 32, 32, 32, 32, 32, 33, 33, 33, - 34, 34, 34, 35, 35, 35, 36, 36, 38, 39, 39, 41, 42, 42, 44, 45, 46, 49, - 50, 51, 54, 54, 31, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 35, - 35, 36, 36, 36, 38, 39, 39, 41, 42, 42, 45, 45, 46, 49, 50, 51, 54, 54, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 35, 35, 35, 36, 37, 37, - 38, 39, 39, 41, 42, 42, 45, 45, 46, 49, 49, 51, 54, 54, 32, 32, 32, 32, - 32, 32, 33, 34, 34, 34, 35, 35, 36, 37, 37, 37, 38, 38, 39, 40, 40, 42, - 42, 43, 45, 46, 46, 49, 49, 50, 53, 53, 32, 32, 32, 32, 32, 32, 33, 34, - 34, 34, 35, 35, 36, 37, 37, 37, 38, 38, 39, 40, 40, 42, 42, 43, 45, 46, - 46, 49, 49, 50, 53, 53, 32, 33, 33, 33, 33, 33, 33, 34, 34, 35, 36, 36, - 36, 38, 38, 39, 40, 40, 41, 42, 42, 44, 44, 45, 47, 47, 48, 51, 51, 52, - 55, 55, 34, 34, 34, 34, 33, 33, 34, 35, 35, 35, 37, 37, 38, 39, 39, 41, - 42, 42, 44, 45, 45, 47, 47, 48, 50, 51, 51, 54, 54, 55, 58, 58, 34, 34, - 34, 34, 33, 33, 34, 35, 35, 35, 37, 37, 38, 39, 39, 41, 42, 42, 44, 45, - 45, 47, 47, 48, 50, 51, 51, 54, 54, 55, 58, 58, 35, 34, 34, 34, 34, 34, - 34, 35, 36, 36, 37, 37, 39, 41, 41, 43, 45, 45, 47, 47, 47, 49, 50, 51, - 53, 53, 54, 57, 57, 58, 61, 61, 36, 35, 35, 35, 34, 34, 35, 36, 36, 37, - 38, 38, 40, 42, 42, 45, 48, 48, 50, 50, 50, 53, 54, 54, 56, 57, 57, 59, - 60, 61, 64, 64, 36, 35, 35, 35, 34, 34, 35, 36, 36, 37, 38, 38, 40, 42, - 42, 45, 48, 48, 50, 50, 50, 53, 54, 54, 56, 57, 57, 59, 60, 61, 64, 64, - 38, 37, 37, 37, 36, 36, 36, 38, 38, 38, 39, 39, 41, 44, 44, 47, 50, 50, - 51, 52, 52, 55, 56, 56, 58, 59, 60, 62, 63, 64, 67, 67, 39, 39, 38, 38, - 37, 37, 38, 39, 39, 39, 40, 40, 42, 45, 45, 47, 50, 50, 52, 54, 54, 56, - 58, 58, 60, 61, 62, 64, 65, 66, 69, 69, 39, 39, 38, 38, 37, 37, 38, 39, - 39, 39, 40, 40, 42, 45, 45, 47, 50, 50, 52, 54, 54, 56, 58, 58, 60, 61, - 62, 64, 65, 66, 69, 69, 42, 41, 41, 41, 40, 40, 40, 41, 41, 41, 42, 42, - 44, 47, 47, 49, 53, 53, 55, 56, 56, 60, 61, 62, 64, 65, 66, 69, 69, 70, - 73, 73, 44, 43, 42, 42, 41, 41, 41, 42, 42, 42, 42, 42, 44, 47, 47, 50, - 54, 54, 56, 58, 58, 61, 63, 64, 66, 67, 68, 71, 71, 72, 75, 75, 44, 43, - 43, 42, 41, 41, 41, 42, 42, 42, 43, 43, 45, 48, 48, 51, 54, 54, 56, 58, - 58, 62, 64, 64, 66, 67, 68, 71, 72, 73, 76, 76, 47, 46, 45, 45, 44, 44, - 44, 44, 45, 45, 45, 45, 47, 50, 50, 53, 56, 56, 58, 60, 60, 64, 66, 66, - 69, 70, 71, 74, 75, 76, 79, 79, 48, 47, 46, 46, 45, 44, 45, 45, 45, 45, - 46, 46, 47, 51, 51, 53, 57, 57, 59, 61, 61, 65, 67, 67, 70, 71, 72, 75, - 76, 77, 80, 80, 49, 48, 47, 47, 46, 45, 45, 46, 46, 46, 46, 46, 48, 51, - 51, 54, 57, 57, 60, 62, 62, 66, 68, 68, 71, 72, 73, 77, 77, 78, 81, 81, - 53, 51, 51, 51, 49, 49, 49, 49, 49, 49, 49, 49, 51, 54, 54, 57, 59, 59, - 62, 64, 64, 69, 71, 71, 74, 75, 77, 81, 81, 83, 86, 86, 54, 52, 51, 51, - 50, 49, 49, 50, 50, 49, 49, 49, 51, 54, 54, 57, 60, 60, 63, 65, 65, 69, - 71, 72, 75, 76, 77, 81, 82, 83, 87, 87, 55, 53, 53, 52, 51, 50, 50, 51, - 51, 51, 50, 50, 52, 55, 55, 58, 61, 61, 64, 66, 66, 70, 72, 73, 76, 77, - 78, 83, 83, 85, 88, 88, 59, 57, 56, 56, 54, 54, 54, 54, 54, 54, 53, 53, - 55, 58, 58, 61, 64, 64, 67, 69, 69, 73, 75, 76, 79, 80, 81, 86, 87, 88, - 92, 92, 59, 57, 56, 56, 54, 54, 54, 54, 54, 54, 53, 53, 55, 58, 58, 61, - 64, 64, 67, 69, 69, 73, 75, 76, 79, 80, 81, 86, 87, 88, 92, 92, - /* Size 4x8 */ - 32, 32, 37, 52, 32, 33, 36, 49, 32, 34, 38, 49, 34, 37, 44, 54, 35, 38, - 49, 60, 40, 42, 55, 69, 46, 46, 59, 76, 52, 51, 64, 83, - /* Size 8x4 */ - 32, 32, 32, 34, 35, 40, 46, 52, 32, 33, 34, 37, 38, 42, 46, 51, 37, 36, - 38, 44, 49, 55, 59, 64, 52, 49, 49, 54, 60, 69, 76, 83, - /* Size 8x16 */ - 32, 31, 32, 32, 36, 44, 47, 53, 31, 32, 32, 33, 35, 42, 45, 51, 31, 32, - 32, 33, 35, 41, 44, 49, 31, 32, 33, 33, 35, 41, 44, 49, 32, 32, 34, 34, - 36, 42, 45, 50, 32, 33, 35, 36, 38, 42, 45, 49, 32, 33, 35, 36, 40, 44, - 47, 51, 34, 34, 36, 38, 42, 48, 50, 54, 36, 34, 37, 40, 48, 54, 56, 60, - 38, 36, 39, 41, 49, 56, 58, 63, 39, 37, 40, 42, 50, 58, 60, 65, 44, 41, - 42, 45, 53, 63, 66, 71, 47, 44, 45, 47, 56, 66, 69, 75, 49, 46, 47, 48, - 57, 67, 71, 77, 53, 49, 50, 51, 60, 71, 75, 82, 58, 54, 54, 55, 63, 75, - 79, 87, - /* Size 16x8 */ - 32, 31, 31, 31, 32, 32, 32, 34, 36, 38, 39, 44, 47, 49, 53, 58, 31, 32, - 32, 32, 32, 33, 33, 34, 34, 36, 37, 41, 44, 46, 49, 54, 32, 32, 32, 33, - 34, 35, 35, 36, 37, 39, 40, 42, 45, 47, 50, 54, 32, 33, 33, 33, 34, 36, - 36, 38, 40, 41, 42, 45, 47, 48, 51, 55, 36, 35, 35, 35, 36, 38, 40, 42, - 48, 49, 50, 53, 56, 57, 60, 63, 44, 42, 41, 41, 42, 42, 44, 48, 54, 56, - 58, 63, 66, 67, 71, 75, 47, 45, 44, 44, 45, 45, 47, 50, 56, 58, 60, 66, - 69, 71, 75, 79, 53, 51, 49, 49, 50, 49, 51, 54, 60, 63, 65, 71, 75, 77, - 82, 87, - /* Size 16x32 */ - 32, 31, 31, 31, 32, 32, 32, 35, 36, 38, 44, 44, 47, 53, 53, 59, 31, 32, - 32, 32, 32, 32, 33, 35, 35, 37, 43, 43, 46, 52, 52, 57, 31, 32, 32, 32, - 32, 32, 33, 35, 35, 37, 42, 42, 45, 51, 51, 56, 31, 32, 32, 32, 32, 32, - 33, 35, 35, 37, 42, 42, 45, 51, 51, 56, 31, 32, 32, 32, 32, 32, 33, 34, - 35, 36, 41, 41, 44, 49, 49, 54, 31, 32, 32, 32, 32, 33, 33, 34, 34, 36, - 41, 41, 44, 49, 49, 54, 31, 32, 32, 32, 33, 33, 33, 35, 35, 36, 41, 41, - 44, 49, 49, 54, 32, 32, 32, 32, 33, 34, 34, 36, 36, 38, 42, 42, 45, 49, - 49, 54, 32, 32, 32, 33, 34, 34, 34, 36, 36, 38, 42, 42, 45, 50, 50, 54, - 32, 32, 32, 33, 34, 34, 35, 37, 37, 38, 42, 42, 45, 49, 49, 54, 32, 32, - 33, 33, 35, 35, 36, 38, 38, 39, 42, 42, 45, 49, 49, 53, 32, 32, 33, 33, - 35, 35, 36, 38, 38, 39, 42, 42, 45, 49, 49, 53, 32, 33, 33, 33, 35, 36, - 36, 39, 40, 41, 44, 44, 47, 51, 51, 55, 34, 34, 34, 34, 36, 37, 38, 42, - 42, 44, 48, 48, 50, 54, 54, 58, 34, 34, 34, 34, 36, 37, 38, 42, 42, 44, - 48, 48, 50, 54, 54, 58, 35, 34, 34, 34, 37, 37, 39, 44, 45, 46, 50, 50, - 53, 57, 57, 61, 36, 35, 34, 35, 37, 38, 40, 47, 48, 49, 54, 54, 56, 60, - 60, 64, 36, 35, 34, 35, 37, 38, 40, 47, 48, 49, 54, 54, 56, 60, 60, 64, - 38, 37, 36, 37, 39, 40, 41, 48, 49, 51, 56, 56, 58, 63, 63, 67, 39, 38, - 37, 38, 40, 40, 42, 49, 50, 52, 58, 58, 60, 65, 65, 69, 39, 38, 37, 38, - 40, 40, 42, 49, 50, 52, 58, 58, 60, 65, 65, 69, 42, 40, 40, 40, 42, 42, - 44, 51, 52, 55, 61, 61, 64, 69, 69, 73, 44, 42, 41, 41, 42, 43, 45, 52, - 53, 56, 63, 63, 66, 71, 71, 75, 44, 42, 41, 41, 43, 43, 45, 52, 54, 56, - 63, 63, 66, 72, 72, 76, 47, 45, 44, 44, 45, 45, 47, 54, 56, 58, 66, 66, - 69, 75, 75, 79, 48, 46, 45, 45, 46, 46, 48, 55, 56, 59, 67, 67, 70, 76, - 76, 80, 49, 47, 46, 46, 47, 47, 48, 56, 57, 60, 67, 67, 71, 77, 77, 81, - 53, 50, 49, 49, 49, 49, 51, 58, 59, 62, 71, 71, 74, 81, 81, 86, 53, 51, - 49, 49, 50, 50, 51, 59, 60, 63, 71, 71, 75, 82, 82, 87, 55, 52, 51, 51, - 51, 51, 53, 60, 61, 64, 72, 72, 76, 83, 83, 88, 58, 55, 54, 54, 54, 54, - 55, 62, 63, 67, 75, 75, 79, 87, 87, 92, 58, 55, 54, 54, 54, 54, 55, 62, - 63, 67, 75, 75, 79, 87, 87, 92, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 34, 34, 35, 36, 36, - 38, 39, 39, 42, 44, 44, 47, 48, 49, 53, 53, 55, 58, 58, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 35, 35, 37, 38, 38, 40, - 42, 42, 45, 46, 47, 50, 51, 52, 55, 55, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 36, 37, 37, 40, 41, 41, 44, 45, - 46, 49, 49, 51, 54, 54, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 33, 34, 34, 34, 35, 35, 37, 38, 38, 40, 41, 41, 44, 45, 46, 49, 49, 51, - 54, 54, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 35, 35, 35, 36, 36, 37, - 37, 37, 39, 40, 40, 42, 42, 43, 45, 46, 47, 49, 50, 51, 54, 54, 32, 32, - 32, 32, 32, 33, 33, 34, 34, 34, 35, 35, 36, 37, 37, 37, 38, 38, 40, 40, - 40, 42, 43, 43, 45, 46, 47, 49, 50, 51, 54, 54, 32, 33, 33, 33, 33, 33, - 33, 34, 34, 35, 36, 36, 36, 38, 38, 39, 40, 40, 41, 42, 42, 44, 45, 45, - 47, 48, 48, 51, 51, 53, 55, 55, 35, 35, 35, 35, 34, 34, 35, 36, 36, 37, - 38, 38, 39, 42, 42, 44, 47, 47, 48, 49, 49, 51, 52, 52, 54, 55, 56, 58, - 59, 60, 62, 62, 36, 35, 35, 35, 35, 34, 35, 36, 36, 37, 38, 38, 40, 42, - 42, 45, 48, 48, 49, 50, 50, 52, 53, 54, 56, 56, 57, 59, 60, 61, 63, 63, - 38, 37, 37, 37, 36, 36, 36, 38, 38, 38, 39, 39, 41, 44, 44, 46, 49, 49, - 51, 52, 52, 55, 56, 56, 58, 59, 60, 62, 63, 64, 67, 67, 44, 43, 42, 42, - 41, 41, 41, 42, 42, 42, 42, 42, 44, 48, 48, 50, 54, 54, 56, 58, 58, 61, - 63, 63, 66, 67, 67, 71, 71, 72, 75, 75, 44, 43, 42, 42, 41, 41, 41, 42, - 42, 42, 42, 42, 44, 48, 48, 50, 54, 54, 56, 58, 58, 61, 63, 63, 66, 67, - 67, 71, 71, 72, 75, 75, 47, 46, 45, 45, 44, 44, 44, 45, 45, 45, 45, 45, - 47, 50, 50, 53, 56, 56, 58, 60, 60, 64, 66, 66, 69, 70, 71, 74, 75, 76, - 79, 79, 53, 52, 51, 51, 49, 49, 49, 49, 50, 49, 49, 49, 51, 54, 54, 57, - 60, 60, 63, 65, 65, 69, 71, 72, 75, 76, 77, 81, 82, 83, 87, 87, 53, 52, - 51, 51, 49, 49, 49, 49, 50, 49, 49, 49, 51, 54, 54, 57, 60, 60, 63, 65, - 65, 69, 71, 72, 75, 76, 77, 81, 82, 83, 87, 87, 59, 57, 56, 56, 54, 54, - 54, 54, 54, 54, 53, 53, 55, 58, 58, 61, 64, 64, 67, 69, 69, 73, 75, 76, - 79, 80, 81, 86, 87, 88, 92, 92, - /* Size 4x16 */ - 31, 32, 38, 53, 32, 32, 37, 51, 32, 32, 36, 49, 32, 33, 36, 49, 32, 34, - 38, 50, 32, 35, 39, 49, 33, 36, 41, 51, 34, 37, 44, 54, 35, 38, 49, 60, - 37, 40, 51, 63, 38, 40, 52, 65, 42, 43, 56, 71, 45, 45, 58, 75, 47, 47, - 60, 77, 51, 50, 63, 82, 55, 54, 67, 87, - /* Size 16x4 */ - 31, 32, 32, 32, 32, 32, 33, 34, 35, 37, 38, 42, 45, 47, 51, 55, 32, 32, - 32, 33, 34, 35, 36, 37, 38, 40, 40, 43, 45, 47, 50, 54, 38, 37, 36, 36, - 38, 39, 41, 44, 49, 51, 52, 56, 58, 60, 63, 67, 53, 51, 49, 49, 50, 49, - 51, 54, 60, 63, 65, 71, 75, 77, 82, 87, - /* Size 8x32 */ - 32, 31, 32, 32, 36, 44, 47, 53, 31, 32, 32, 33, 35, 43, 46, 52, 31, 32, - 32, 33, 35, 42, 45, 51, 31, 32, 32, 33, 35, 42, 45, 51, 31, 32, 32, 33, - 35, 41, 44, 49, 31, 32, 32, 33, 34, 41, 44, 49, 31, 32, 33, 33, 35, 41, - 44, 49, 32, 32, 33, 34, 36, 42, 45, 49, 32, 32, 34, 34, 36, 42, 45, 50, - 32, 32, 34, 35, 37, 42, 45, 49, 32, 33, 35, 36, 38, 42, 45, 49, 32, 33, - 35, 36, 38, 42, 45, 49, 32, 33, 35, 36, 40, 44, 47, 51, 34, 34, 36, 38, - 42, 48, 50, 54, 34, 34, 36, 38, 42, 48, 50, 54, 35, 34, 37, 39, 45, 50, - 53, 57, 36, 34, 37, 40, 48, 54, 56, 60, 36, 34, 37, 40, 48, 54, 56, 60, - 38, 36, 39, 41, 49, 56, 58, 63, 39, 37, 40, 42, 50, 58, 60, 65, 39, 37, - 40, 42, 50, 58, 60, 65, 42, 40, 42, 44, 52, 61, 64, 69, 44, 41, 42, 45, - 53, 63, 66, 71, 44, 41, 43, 45, 54, 63, 66, 72, 47, 44, 45, 47, 56, 66, - 69, 75, 48, 45, 46, 48, 56, 67, 70, 76, 49, 46, 47, 48, 57, 67, 71, 77, - 53, 49, 49, 51, 59, 71, 74, 81, 53, 49, 50, 51, 60, 71, 75, 82, 55, 51, - 51, 53, 61, 72, 76, 83, 58, 54, 54, 55, 63, 75, 79, 87, 58, 54, 54, 55, - 63, 75, 79, 87, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 34, 34, 35, 36, 36, - 38, 39, 39, 42, 44, 44, 47, 48, 49, 53, 53, 55, 58, 58, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 36, 37, 37, 40, - 41, 41, 44, 45, 46, 49, 49, 51, 54, 54, 32, 32, 32, 32, 32, 32, 33, 33, - 34, 34, 35, 35, 35, 36, 36, 37, 37, 37, 39, 40, 40, 42, 42, 43, 45, 46, - 47, 49, 50, 51, 54, 54, 32, 33, 33, 33, 33, 33, 33, 34, 34, 35, 36, 36, - 36, 38, 38, 39, 40, 40, 41, 42, 42, 44, 45, 45, 47, 48, 48, 51, 51, 53, - 55, 55, 36, 35, 35, 35, 35, 34, 35, 36, 36, 37, 38, 38, 40, 42, 42, 45, - 48, 48, 49, 50, 50, 52, 53, 54, 56, 56, 57, 59, 60, 61, 63, 63, 44, 43, - 42, 42, 41, 41, 41, 42, 42, 42, 42, 42, 44, 48, 48, 50, 54, 54, 56, 58, - 58, 61, 63, 63, 66, 67, 67, 71, 71, 72, 75, 75, 47, 46, 45, 45, 44, 44, - 44, 45, 45, 45, 45, 45, 47, 50, 50, 53, 56, 56, 58, 60, 60, 64, 66, 66, - 69, 70, 71, 74, 75, 76, 79, 79, 53, 52, 51, 51, 49, 49, 49, 49, 50, 49, - 49, 49, 51, 54, 54, 57, 60, 60, 63, 65, 65, 69, 71, 72, 75, 76, 77, 81, - 82, 83, 87, 87 }, - { /* Chroma */ - /* Size 4x4 */ - 31, 38, 47, 49, 38, 47, 46, 46, 47, 46, 54, 57, 49, 46, 57, 66, - /* Size 8x8 */ - 31, 31, 35, 42, 48, 47, 49, 51, 31, 32, 36, 42, 46, 45, 46, 48, 35, 36, - 41, 45, 47, 45, 46, 48, 42, 42, 45, 48, 50, 49, 50, 51, 48, 46, 47, 50, - 53, 53, 54, 54, 47, 45, 45, 49, 53, 57, 59, 60, 49, 46, 46, 50, 54, 59, - 61, 64, 51, 48, 48, 51, 54, 60, 64, 68, - /* Size 16x16 */ - 32, 31, 30, 31, 33, 36, 38, 41, 49, 49, 48, 49, 50, 51, 52, 54, 31, 31, - 31, 32, 34, 38, 40, 42, 47, 47, 47, 47, 48, 48, 50, 52, 30, 31, 31, 32, - 35, 39, 41, 42, 46, 46, 46, 45, 46, 47, 48, 50, 31, 32, 32, 33, 36, 40, - 41, 43, 46, 46, 45, 45, 46, 46, 47, 49, 33, 34, 35, 36, 39, 43, 44, 45, - 47, 46, 46, 45, 46, 47, 47, 49, 36, 38, 39, 40, 43, 47, 47, 47, 48, 47, - 46, 45, 46, 46, 47, 48, 38, 40, 41, 41, 44, 47, 47, 48, 49, 48, 48, 47, - 47, 47, 48, 49, 41, 42, 42, 43, 45, 47, 48, 48, 50, 50, 49, 49, 50, 50, - 50, 52, 49, 47, 46, 46, 47, 48, 49, 50, 53, 53, 53, 53, 54, 54, 54, 55, - 49, 47, 46, 46, 46, 47, 48, 50, 53, 53, 54, 55, 55, 55, 56, 57, 48, 47, - 46, 45, 46, 46, 48, 49, 53, 54, 54, 55, 56, 56, 57, 58, 49, 47, 45, 45, - 45, 45, 47, 49, 53, 55, 55, 58, 59, 60, 61, 62, 50, 48, 46, 46, 46, 46, - 47, 50, 54, 55, 56, 59, 61, 61, 63, 64, 51, 48, 47, 46, 47, 46, 47, 50, - 54, 55, 56, 60, 61, 62, 64, 66, 52, 50, 48, 47, 47, 47, 48, 50, 54, 56, - 57, 61, 63, 64, 66, 68, 54, 52, 50, 49, 49, 48, 49, 52, 55, 57, 58, 62, - 64, 66, 68, 71, - /* Size 32x32 */ - 32, 31, 31, 31, 30, 30, 31, 33, 33, 34, 36, 36, 38, 41, 41, 45, 49, 49, - 49, 48, 48, 49, 49, 49, 50, 50, 51, 52, 52, 53, 54, 54, 31, 31, 31, 31, - 31, 31, 31, 34, 34, 35, 38, 38, 39, 42, 42, 45, 48, 48, 47, 47, 47, 47, - 47, 47, 49, 49, 49, 50, 50, 51, 53, 53, 31, 31, 31, 31, 31, 31, 32, 34, - 34, 35, 38, 38, 40, 42, 42, 45, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, - 48, 49, 50, 50, 52, 52, 31, 31, 31, 31, 31, 31, 32, 34, 34, 36, 38, 38, - 40, 42, 42, 45, 47, 47, 47, 47, 47, 47, 46, 47, 48, 48, 48, 49, 49, 50, - 52, 52, 30, 31, 31, 31, 31, 31, 32, 35, 35, 36, 39, 39, 41, 42, 42, 44, - 46, 46, 46, 46, 46, 45, 45, 45, 46, 47, 47, 48, 48, 48, 50, 50, 30, 31, - 31, 31, 31, 32, 32, 35, 35, 36, 40, 40, 41, 42, 42, 44, 46, 46, 46, 45, - 45, 45, 45, 45, 46, 46, 46, 47, 47, 48, 49, 49, 31, 31, 32, 32, 32, 32, - 33, 35, 36, 37, 40, 40, 41, 43, 43, 44, 46, 46, 46, 45, 45, 45, 45, 45, - 46, 46, 46, 47, 47, 48, 49, 49, 33, 34, 34, 34, 35, 35, 35, 38, 38, 40, - 43, 43, 43, 44, 44, 46, 47, 47, 46, 46, 46, 45, 45, 45, 46, 46, 47, 47, - 47, 48, 49, 49, 33, 34, 34, 34, 35, 35, 36, 38, 39, 40, 43, 43, 44, 45, - 45, 46, 47, 47, 46, 46, 46, 45, 45, 45, 46, 46, 47, 47, 47, 48, 49, 49, - 34, 35, 35, 36, 36, 36, 37, 40, 40, 41, 44, 44, 45, 45, 45, 46, 47, 47, - 47, 46, 46, 45, 45, 45, 46, 46, 46, 47, 47, 48, 49, 49, 36, 38, 38, 38, - 39, 40, 40, 43, 43, 44, 47, 47, 47, 47, 47, 47, 48, 48, 47, 46, 46, 45, - 45, 45, 46, 46, 46, 46, 47, 47, 48, 48, 36, 38, 38, 38, 39, 40, 40, 43, - 43, 44, 47, 47, 47, 47, 47, 47, 48, 48, 47, 46, 46, 45, 45, 45, 46, 46, - 46, 46, 47, 47, 48, 48, 38, 39, 40, 40, 41, 41, 41, 43, 44, 45, 47, 47, - 47, 48, 48, 48, 49, 49, 48, 48, 48, 47, 47, 47, 47, 47, 47, 48, 48, 48, - 49, 49, 41, 42, 42, 42, 42, 42, 43, 44, 45, 45, 47, 47, 48, 48, 48, 49, - 50, 50, 50, 49, 49, 49, 49, 49, 50, 50, 50, 50, 50, 51, 52, 52, 41, 42, - 42, 42, 42, 42, 43, 44, 45, 45, 47, 47, 48, 48, 48, 49, 50, 50, 50, 49, - 49, 49, 49, 49, 50, 50, 50, 50, 50, 51, 52, 52, 45, 45, 45, 45, 44, 44, - 44, 46, 46, 46, 47, 47, 48, 49, 49, 50, 51, 51, 51, 51, 51, 51, 51, 51, - 52, 52, 52, 52, 52, 52, 53, 53, 49, 48, 47, 47, 46, 46, 46, 47, 47, 47, - 48, 48, 49, 50, 50, 51, 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, - 54, 54, 55, 55, 49, 48, 47, 47, 46, 46, 46, 47, 47, 47, 48, 48, 49, 50, - 50, 51, 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 54, 54, 55, 55, - 49, 47, 47, 47, 46, 46, 46, 46, 46, 47, 47, 47, 48, 50, 50, 51, 53, 53, - 53, 54, 54, 54, 55, 55, 55, 55, 55, 56, 56, 56, 57, 57, 48, 47, 47, 47, - 46, 45, 45, 46, 46, 46, 46, 46, 48, 49, 49, 51, 53, 53, 54, 54, 54, 55, - 55, 56, 56, 56, 56, 57, 57, 58, 58, 58, 48, 47, 47, 47, 46, 45, 45, 46, - 46, 46, 46, 46, 48, 49, 49, 51, 53, 53, 54, 54, 54, 55, 55, 56, 56, 56, - 56, 57, 57, 58, 58, 58, 49, 47, 47, 47, 45, 45, 45, 45, 45, 45, 45, 45, - 47, 49, 49, 51, 53, 53, 54, 55, 55, 57, 57, 58, 58, 59, 59, 60, 60, 60, - 61, 61, 49, 47, 47, 46, 45, 45, 45, 45, 45, 45, 45, 45, 47, 49, 49, 51, - 53, 53, 55, 55, 55, 57, 58, 58, 59, 60, 60, 61, 61, 61, 62, 62, 49, 47, - 47, 47, 45, 45, 45, 45, 45, 45, 45, 45, 47, 49, 49, 51, 53, 53, 55, 56, - 56, 58, 58, 59, 59, 60, 60, 61, 61, 62, 63, 63, 50, 49, 48, 48, 46, 46, - 46, 46, 46, 46, 46, 46, 47, 50, 50, 52, 54, 54, 55, 56, 56, 58, 59, 59, - 61, 61, 61, 63, 63, 63, 64, 64, 50, 49, 48, 48, 47, 46, 46, 46, 46, 46, - 46, 46, 47, 50, 50, 52, 54, 54, 55, 56, 56, 59, 60, 60, 61, 61, 62, 63, - 63, 64, 65, 65, 51, 49, 48, 48, 47, 46, 46, 47, 47, 46, 46, 46, 47, 50, - 50, 52, 54, 54, 55, 56, 56, 59, 60, 60, 61, 62, 62, 64, 64, 64, 66, 66, - 52, 50, 49, 49, 48, 47, 47, 47, 47, 47, 46, 46, 48, 50, 50, 52, 54, 54, - 56, 57, 57, 60, 61, 61, 63, 63, 64, 66, 66, 67, 68, 68, 52, 50, 50, 49, - 48, 47, 47, 47, 47, 47, 47, 47, 48, 50, 50, 52, 54, 54, 56, 57, 57, 60, - 61, 61, 63, 63, 64, 66, 66, 67, 68, 68, 53, 51, 50, 50, 48, 48, 48, 48, - 48, 48, 47, 47, 48, 51, 51, 52, 54, 54, 56, 58, 58, 60, 61, 62, 63, 64, - 64, 67, 67, 68, 69, 69, 54, 53, 52, 52, 50, 49, 49, 49, 49, 49, 48, 48, - 49, 52, 52, 53, 55, 55, 57, 58, 58, 61, 62, 63, 64, 65, 66, 68, 68, 69, - 71, 71, 54, 53, 52, 52, 50, 49, 49, 49, 49, 49, 48, 48, 49, 52, 52, 53, - 55, 55, 57, 58, 58, 61, 62, 63, 64, 65, 66, 68, 68, 69, 71, 71, - /* Size 4x8 */ - 31, 38, 47, 50, 31, 40, 46, 48, 36, 44, 47, 47, 42, 47, 50, 50, 47, 48, - 53, 54, 46, 46, 54, 60, 48, 46, 55, 64, 50, 48, 56, 67, - /* Size 8x4 */ - 31, 31, 36, 42, 47, 46, 48, 50, 38, 40, 44, 47, 48, 46, 46, 48, 47, 46, - 47, 50, 53, 54, 55, 56, 50, 48, 47, 50, 54, 60, 64, 67, - /* Size 8x16 */ - 32, 31, 35, 38, 48, 49, 50, 52, 31, 31, 37, 40, 47, 47, 48, 50, 30, 32, - 38, 40, 46, 45, 46, 48, 31, 33, 38, 41, 46, 45, 46, 48, 33, 36, 41, 44, - 47, 46, 46, 47, 37, 40, 45, 47, 47, 45, 46, 47, 39, 41, 46, 47, 48, 47, - 47, 48, 42, 43, 46, 48, 50, 49, 50, 50, 49, 46, 48, 49, 53, 53, 54, 54, - 48, 46, 47, 48, 53, 55, 55, 56, 48, 46, 46, 48, 53, 56, 56, 57, 49, 45, - 45, 47, 53, 58, 59, 61, 50, 46, 46, 48, 54, 59, 61, 63, 51, 47, 47, 48, - 54, 60, 61, 64, 52, 48, 47, 48, 54, 61, 63, 66, 54, 50, 49, 50, 55, 62, - 65, 68, - /* Size 16x8 */ - 32, 31, 30, 31, 33, 37, 39, 42, 49, 48, 48, 49, 50, 51, 52, 54, 31, 31, - 32, 33, 36, 40, 41, 43, 46, 46, 46, 45, 46, 47, 48, 50, 35, 37, 38, 38, - 41, 45, 46, 46, 48, 47, 46, 45, 46, 47, 47, 49, 38, 40, 40, 41, 44, 47, - 47, 48, 49, 48, 48, 47, 48, 48, 48, 50, 48, 47, 46, 46, 47, 47, 48, 50, - 53, 53, 53, 53, 54, 54, 54, 55, 49, 47, 45, 45, 46, 45, 47, 49, 53, 55, - 56, 58, 59, 60, 61, 62, 50, 48, 46, 46, 46, 46, 47, 50, 54, 55, 56, 59, - 61, 61, 63, 65, 52, 50, 48, 48, 47, 47, 48, 50, 54, 56, 57, 61, 63, 64, - 66, 68, - /* Size 16x32 */ - 32, 31, 31, 31, 35, 37, 38, 47, 48, 48, 49, 49, 50, 52, 52, 54, 31, 31, - 31, 32, 36, 38, 39, 46, 47, 47, 48, 48, 49, 50, 50, 53, 31, 31, 31, 32, - 37, 38, 40, 46, 47, 47, 47, 47, 48, 50, 50, 52, 31, 31, 31, 32, 37, 38, - 40, 46, 47, 47, 47, 47, 48, 50, 50, 52, 30, 31, 32, 32, 38, 39, 40, 45, - 46, 46, 45, 45, 46, 48, 48, 50, 30, 31, 32, 33, 38, 40, 41, 45, 46, 46, - 45, 45, 46, 48, 48, 50, 31, 32, 33, 33, 38, 40, 41, 45, 46, 46, 45, 45, - 46, 48, 48, 50, 33, 35, 35, 36, 41, 43, 43, 46, 47, 46, 45, 45, 46, 47, - 47, 49, 33, 35, 36, 36, 41, 43, 44, 46, 47, 46, 46, 46, 46, 47, 47, 49, - 34, 36, 37, 37, 42, 44, 45, 47, 47, 47, 45, 45, 46, 47, 47, 49, 37, 39, - 40, 41, 45, 47, 47, 47, 47, 47, 45, 45, 46, 47, 47, 48, 37, 39, 40, 41, - 45, 47, 47, 47, 47, 47, 45, 45, 46, 47, 47, 48, 39, 40, 41, 42, 46, 47, - 47, 48, 48, 48, 47, 47, 47, 48, 48, 50, 42, 42, 43, 43, 46, 47, 48, 50, - 50, 50, 49, 49, 50, 50, 50, 52, 42, 42, 43, 43, 46, 47, 48, 50, 50, 50, - 49, 49, 50, 50, 50, 52, 45, 45, 44, 45, 47, 47, 48, 51, 51, 51, 51, 51, - 52, 52, 52, 54, 49, 47, 46, 47, 48, 48, 49, 52, 53, 53, 53, 53, 54, 54, - 54, 55, 49, 47, 46, 47, 48, 48, 49, 52, 53, 53, 53, 53, 54, 54, 54, 55, - 48, 47, 46, 46, 47, 47, 48, 52, 53, 53, 55, 55, 55, 56, 56, 57, 48, 46, - 46, 46, 46, 47, 48, 52, 53, 54, 56, 56, 56, 57, 57, 59, 48, 46, 46, 46, - 46, 47, 48, 52, 53, 54, 56, 56, 56, 57, 57, 59, 49, 46, 45, 45, 46, 46, - 47, 52, 53, 54, 57, 57, 58, 60, 60, 61, 49, 46, 45, 45, 45, 46, 47, 52, - 53, 55, 58, 58, 59, 61, 61, 62, 49, 46, 45, 45, 46, 46, 47, 52, 53, 55, - 58, 58, 60, 61, 61, 63, 50, 47, 46, 46, 46, 46, 48, 53, 54, 55, 59, 59, - 61, 63, 63, 65, 50, 48, 46, 46, 46, 46, 48, 53, 54, 55, 59, 59, 61, 64, - 64, 65, 51, 48, 47, 47, 47, 47, 48, 53, 54, 55, 60, 60, 61, 64, 64, 66, - 52, 49, 48, 48, 47, 47, 48, 53, 54, 56, 61, 61, 63, 66, 66, 68, 52, 49, - 48, 48, 47, 47, 48, 53, 54, 56, 61, 61, 63, 66, 66, 68, 53, 50, 48, 48, - 48, 48, 49, 54, 54, 56, 61, 61, 63, 67, 67, 69, 54, 51, 50, 50, 49, 49, - 50, 55, 55, 57, 62, 62, 65, 68, 68, 71, 54, 51, 50, 50, 49, 49, 50, 55, - 55, 57, 62, 62, 65, 68, 68, 71, - /* Size 32x16 */ - 32, 31, 31, 31, 30, 30, 31, 33, 33, 34, 37, 37, 39, 42, 42, 45, 49, 49, - 48, 48, 48, 49, 49, 49, 50, 50, 51, 52, 52, 53, 54, 54, 31, 31, 31, 31, - 31, 31, 32, 35, 35, 36, 39, 39, 40, 42, 42, 45, 47, 47, 47, 46, 46, 46, - 46, 46, 47, 48, 48, 49, 49, 50, 51, 51, 31, 31, 31, 31, 32, 32, 33, 35, - 36, 37, 40, 40, 41, 43, 43, 44, 46, 46, 46, 46, 46, 45, 45, 45, 46, 46, - 47, 48, 48, 48, 50, 50, 31, 32, 32, 32, 32, 33, 33, 36, 36, 37, 41, 41, - 42, 43, 43, 45, 47, 47, 46, 46, 46, 45, 45, 45, 46, 46, 47, 48, 48, 48, - 50, 50, 35, 36, 37, 37, 38, 38, 38, 41, 41, 42, 45, 45, 46, 46, 46, 47, - 48, 48, 47, 46, 46, 46, 45, 46, 46, 46, 47, 47, 47, 48, 49, 49, 37, 38, - 38, 38, 39, 40, 40, 43, 43, 44, 47, 47, 47, 47, 47, 47, 48, 48, 47, 47, - 47, 46, 46, 46, 46, 46, 47, 47, 47, 48, 49, 49, 38, 39, 40, 40, 40, 41, - 41, 43, 44, 45, 47, 47, 47, 48, 48, 48, 49, 49, 48, 48, 48, 47, 47, 47, - 48, 48, 48, 48, 48, 49, 50, 50, 47, 46, 46, 46, 45, 45, 45, 46, 46, 47, - 47, 47, 48, 50, 50, 51, 52, 52, 52, 52, 52, 52, 52, 52, 53, 53, 53, 53, - 53, 54, 55, 55, 48, 47, 47, 47, 46, 46, 46, 47, 47, 47, 47, 47, 48, 50, - 50, 51, 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 54, 54, 55, 55, - 48, 47, 47, 47, 46, 46, 46, 46, 46, 47, 47, 47, 48, 50, 50, 51, 53, 53, - 53, 54, 54, 54, 55, 55, 55, 55, 55, 56, 56, 56, 57, 57, 49, 48, 47, 47, - 45, 45, 45, 45, 46, 45, 45, 45, 47, 49, 49, 51, 53, 53, 55, 56, 56, 57, - 58, 58, 59, 59, 60, 61, 61, 61, 62, 62, 49, 48, 47, 47, 45, 45, 45, 45, - 46, 45, 45, 45, 47, 49, 49, 51, 53, 53, 55, 56, 56, 57, 58, 58, 59, 59, - 60, 61, 61, 61, 62, 62, 50, 49, 48, 48, 46, 46, 46, 46, 46, 46, 46, 46, - 47, 50, 50, 52, 54, 54, 55, 56, 56, 58, 59, 60, 61, 61, 61, 63, 63, 63, - 65, 65, 52, 50, 50, 50, 48, 48, 48, 47, 47, 47, 47, 47, 48, 50, 50, 52, - 54, 54, 56, 57, 57, 60, 61, 61, 63, 64, 64, 66, 66, 67, 68, 68, 52, 50, - 50, 50, 48, 48, 48, 47, 47, 47, 47, 47, 48, 50, 50, 52, 54, 54, 56, 57, - 57, 60, 61, 61, 63, 64, 64, 66, 66, 67, 68, 68, 54, 53, 52, 52, 50, 50, - 50, 49, 49, 49, 48, 48, 50, 52, 52, 54, 55, 55, 57, 59, 59, 61, 62, 63, - 65, 65, 66, 68, 68, 69, 71, 71, - /* Size 4x16 */ - 31, 37, 48, 52, 31, 38, 47, 50, 31, 39, 46, 48, 32, 40, 46, 48, 35, 43, - 46, 47, 39, 47, 47, 47, 40, 47, 48, 48, 42, 47, 50, 50, 47, 48, 53, 54, - 47, 47, 53, 56, 46, 47, 54, 57, 46, 46, 55, 61, 47, 46, 55, 63, 48, 47, - 55, 64, 49, 47, 56, 66, 51, 49, 57, 68, - /* Size 16x4 */ - 31, 31, 31, 32, 35, 39, 40, 42, 47, 47, 46, 46, 47, 48, 49, 51, 37, 38, - 39, 40, 43, 47, 47, 47, 48, 47, 47, 46, 46, 47, 47, 49, 48, 47, 46, 46, - 46, 47, 48, 50, 53, 53, 54, 55, 55, 55, 56, 57, 52, 50, 48, 48, 47, 47, - 48, 50, 54, 56, 57, 61, 63, 64, 66, 68, - /* Size 8x32 */ - 32, 31, 35, 38, 48, 49, 50, 52, 31, 31, 36, 39, 47, 48, 49, 50, 31, 31, - 37, 40, 47, 47, 48, 50, 31, 31, 37, 40, 47, 47, 48, 50, 30, 32, 38, 40, - 46, 45, 46, 48, 30, 32, 38, 41, 46, 45, 46, 48, 31, 33, 38, 41, 46, 45, - 46, 48, 33, 35, 41, 43, 47, 45, 46, 47, 33, 36, 41, 44, 47, 46, 46, 47, - 34, 37, 42, 45, 47, 45, 46, 47, 37, 40, 45, 47, 47, 45, 46, 47, 37, 40, - 45, 47, 47, 45, 46, 47, 39, 41, 46, 47, 48, 47, 47, 48, 42, 43, 46, 48, - 50, 49, 50, 50, 42, 43, 46, 48, 50, 49, 50, 50, 45, 44, 47, 48, 51, 51, - 52, 52, 49, 46, 48, 49, 53, 53, 54, 54, 49, 46, 48, 49, 53, 53, 54, 54, - 48, 46, 47, 48, 53, 55, 55, 56, 48, 46, 46, 48, 53, 56, 56, 57, 48, 46, - 46, 48, 53, 56, 56, 57, 49, 45, 46, 47, 53, 57, 58, 60, 49, 45, 45, 47, - 53, 58, 59, 61, 49, 45, 46, 47, 53, 58, 60, 61, 50, 46, 46, 48, 54, 59, - 61, 63, 50, 46, 46, 48, 54, 59, 61, 64, 51, 47, 47, 48, 54, 60, 61, 64, - 52, 48, 47, 48, 54, 61, 63, 66, 52, 48, 47, 48, 54, 61, 63, 66, 53, 48, - 48, 49, 54, 61, 63, 67, 54, 50, 49, 50, 55, 62, 65, 68, 54, 50, 49, 50, - 55, 62, 65, 68, - /* Size 32x8 */ - 32, 31, 31, 31, 30, 30, 31, 33, 33, 34, 37, 37, 39, 42, 42, 45, 49, 49, - 48, 48, 48, 49, 49, 49, 50, 50, 51, 52, 52, 53, 54, 54, 31, 31, 31, 31, - 32, 32, 33, 35, 36, 37, 40, 40, 41, 43, 43, 44, 46, 46, 46, 46, 46, 45, - 45, 45, 46, 46, 47, 48, 48, 48, 50, 50, 35, 36, 37, 37, 38, 38, 38, 41, - 41, 42, 45, 45, 46, 46, 46, 47, 48, 48, 47, 46, 46, 46, 45, 46, 46, 46, - 47, 47, 47, 48, 49, 49, 38, 39, 40, 40, 40, 41, 41, 43, 44, 45, 47, 47, - 47, 48, 48, 48, 49, 49, 48, 48, 48, 47, 47, 47, 48, 48, 48, 48, 48, 49, - 50, 50, 48, 47, 47, 47, 46, 46, 46, 47, 47, 47, 47, 47, 48, 50, 50, 51, - 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 54, 54, 55, 55, 49, 48, - 47, 47, 45, 45, 45, 45, 46, 45, 45, 45, 47, 49, 49, 51, 53, 53, 55, 56, - 56, 57, 58, 58, 59, 59, 60, 61, 61, 61, 62, 62, 50, 49, 48, 48, 46, 46, - 46, 46, 46, 46, 46, 46, 47, 50, 50, 52, 54, 54, 55, 56, 56, 58, 59, 60, - 61, 61, 61, 63, 63, 63, 65, 65, 52, 50, 50, 50, 48, 48, 48, 47, 47, 47, - 47, 47, 48, 50, 50, 52, 54, 54, 56, 57, 57, 60, 61, 61, 63, 64, 64, 66, - 66, 67, 68, 68 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 32, 35, 43, 32, 34, 37, 43, 35, 37, 48, 54, 43, 43, 54, 65, - /* Size 8x8 */ - 31, 31, 32, 32, 34, 37, 43, 47, 31, 32, 32, 32, 34, 36, 41, 44, 32, 32, - 33, 34, 35, 38, 42, 45, 32, 32, 34, 35, 37, 39, 42, 46, 34, 34, 35, 37, - 41, 45, 49, 52, 37, 36, 38, 39, 45, 51, 56, 59, 43, 41, 42, 42, 49, 56, - 63, 67, 47, 44, 45, 46, 52, 59, 67, 71, - /* Size 16x16 */ - 32, 31, 31, 31, 31, 31, 32, 32, 34, 35, 36, 39, 41, 44, 47, 48, 31, 32, - 32, 32, 32, 32, 32, 33, 34, 35, 35, 38, 40, 42, 45, 46, 31, 32, 32, 32, - 32, 32, 32, 33, 34, 34, 35, 38, 39, 42, 45, 45, 31, 32, 32, 32, 32, 32, - 32, 33, 33, 34, 34, 37, 38, 41, 44, 44, 31, 32, 32, 32, 33, 33, 33, 34, - 35, 36, 36, 39, 40, 42, 44, 45, 31, 32, 32, 32, 33, 33, 34, 34, 35, 36, - 36, 39, 40, 42, 45, 45, 32, 32, 32, 32, 33, 34, 35, 36, 37, 38, 38, 40, - 41, 42, 45, 46, 32, 33, 33, 33, 34, 34, 36, 36, 38, 39, 40, 42, 43, 44, - 47, 47, 34, 34, 34, 33, 35, 35, 37, 38, 39, 42, 42, 45, 46, 47, 50, 51, - 35, 35, 34, 34, 36, 36, 38, 39, 42, 46, 47, 49, 50, 52, 55, 55, 36, 35, - 35, 34, 36, 36, 38, 40, 42, 47, 48, 50, 52, 54, 56, 57, 39, 38, 38, 37, - 39, 39, 40, 42, 45, 49, 50, 54, 55, 58, 60, 61, 41, 40, 39, 38, 40, 40, - 41, 43, 46, 50, 52, 55, 57, 60, 62, 63, 44, 42, 42, 41, 42, 42, 42, 44, - 47, 52, 54, 58, 60, 63, 66, 67, 47, 45, 45, 44, 44, 45, 45, 47, 50, 55, - 56, 60, 62, 66, 69, 70, 48, 46, 45, 44, 45, 45, 46, 47, 51, 55, 57, 61, - 63, 67, 70, 71, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 34, 34, 34, - 35, 36, 36, 38, 39, 39, 41, 44, 44, 45, 47, 48, 48, 51, 31, 31, 31, 31, - 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 35, 35, 35, 37, - 39, 39, 40, 43, 43, 44, 46, 47, 47, 50, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 35, 35, 35, 37, 38, 38, 40, 42, - 42, 43, 45, 46, 46, 49, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 34, 34, 34, 35, 35, 35, 37, 38, 38, 40, 42, 42, 43, 45, 46, - 46, 49, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, - 34, 34, 34, 35, 35, 36, 38, 38, 39, 42, 42, 42, 45, 45, 45, 48, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, - 34, 36, 37, 37, 38, 41, 41, 41, 44, 44, 44, 47, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 36, 37, 37, - 38, 41, 41, 41, 44, 44, 44, 47, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 36, 38, 38, 39, 41, 41, 42, - 44, 45, 45, 47, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, - 34, 35, 35, 35, 36, 36, 36, 37, 39, 39, 40, 42, 42, 42, 44, 45, 45, 48, - 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 35, 35, 35, - 36, 36, 36, 38, 39, 39, 40, 42, 42, 42, 45, 45, 45, 48, 31, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 35, 35, 35, 36, 36, 36, 38, - 39, 39, 40, 42, 42, 42, 45, 45, 45, 48, 32, 32, 32, 32, 32, 32, 32, 33, - 33, 33, 33, 34, 35, 35, 35, 36, 36, 36, 37, 37, 37, 39, 40, 40, 41, 42, - 42, 43, 45, 45, 45, 48, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 35, - 35, 35, 36, 37, 37, 37, 38, 38, 38, 39, 40, 40, 41, 42, 42, 43, 45, 46, - 46, 48, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 35, 35, 35, 36, 37, - 37, 37, 38, 38, 38, 39, 40, 40, 41, 42, 42, 43, 45, 46, 46, 48, 32, 33, - 33, 33, 33, 33, 33, 33, 34, 34, 34, 35, 36, 36, 36, 38, 38, 38, 39, 40, - 40, 41, 42, 42, 43, 44, 44, 45, 47, 47, 47, 50, 34, 34, 34, 34, 34, 33, - 33, 34, 35, 35, 35, 36, 37, 37, 38, 39, 39, 40, 42, 42, 42, 44, 45, 45, - 46, 47, 47, 48, 50, 51, 51, 53, 34, 34, 34, 34, 34, 33, 33, 34, 35, 35, - 35, 36, 37, 37, 38, 39, 39, 40, 42, 42, 42, 44, 45, 45, 46, 47, 47, 48, - 50, 51, 51, 53, 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 36, 37, 37, - 38, 40, 40, 41, 43, 44, 44, 45, 46, 46, 47, 49, 49, 49, 51, 52, 52, 54, - 35, 35, 35, 35, 34, 34, 34, 34, 36, 36, 36, 37, 38, 38, 39, 42, 42, 43, - 46, 47, 47, 48, 49, 49, 50, 52, 52, 53, 55, 55, 55, 57, 36, 35, 35, 35, - 35, 34, 34, 35, 36, 36, 36, 37, 38, 38, 40, 42, 42, 44, 47, 48, 48, 50, - 50, 50, 52, 54, 54, 54, 56, 57, 57, 58, 36, 35, 35, 35, 35, 34, 34, 35, - 36, 36, 36, 37, 38, 38, 40, 42, 42, 44, 47, 48, 48, 50, 50, 50, 52, 54, - 54, 54, 56, 57, 57, 58, 38, 37, 37, 37, 36, 36, 36, 36, 37, 38, 38, 39, - 39, 39, 41, 44, 44, 45, 48, 50, 50, 51, 52, 52, 54, 56, 56, 57, 58, 59, - 59, 61, 39, 39, 38, 38, 38, 37, 37, 38, 39, 39, 39, 40, 40, 40, 42, 45, - 45, 46, 49, 50, 50, 52, 54, 54, 55, 58, 58, 58, 60, 61, 61, 63, 39, 39, - 38, 38, 38, 37, 37, 38, 39, 39, 39, 40, 40, 40, 42, 45, 45, 46, 49, 50, - 50, 52, 54, 54, 55, 58, 58, 58, 60, 61, 61, 63, 41, 40, 40, 40, 39, 38, - 38, 39, 40, 40, 40, 41, 41, 41, 43, 46, 46, 47, 50, 52, 52, 54, 55, 55, - 57, 60, 60, 60, 62, 63, 63, 66, 44, 43, 42, 42, 42, 41, 41, 41, 42, 42, - 42, 42, 42, 42, 44, 47, 47, 49, 52, 54, 54, 56, 58, 58, 60, 63, 63, 64, - 66, 67, 67, 69, 44, 43, 42, 42, 42, 41, 41, 41, 42, 42, 42, 42, 42, 42, - 44, 47, 47, 49, 52, 54, 54, 56, 58, 58, 60, 63, 63, 64, 66, 67, 67, 69, - 45, 44, 43, 43, 42, 41, 41, 42, 42, 42, 42, 43, 43, 43, 45, 48, 48, 49, - 53, 54, 54, 57, 58, 58, 60, 64, 64, 65, 67, 68, 68, 70, 47, 46, 45, 45, - 45, 44, 44, 44, 44, 45, 45, 45, 45, 45, 47, 50, 50, 51, 55, 56, 56, 58, - 60, 60, 62, 66, 66, 67, 69, 70, 70, 73, 48, 47, 46, 46, 45, 44, 44, 45, - 45, 45, 45, 45, 46, 46, 47, 51, 51, 52, 55, 57, 57, 59, 61, 61, 63, 67, - 67, 68, 70, 71, 71, 74, 48, 47, 46, 46, 45, 44, 44, 45, 45, 45, 45, 45, - 46, 46, 47, 51, 51, 52, 55, 57, 57, 59, 61, 61, 63, 67, 67, 68, 70, 71, - 71, 74, 51, 50, 49, 49, 48, 47, 47, 47, 48, 48, 48, 48, 48, 48, 50, 53, - 53, 54, 57, 58, 58, 61, 63, 63, 66, 69, 69, 70, 73, 74, 74, 77, - /* Size 4x8 */ - 31, 32, 35, 43, 32, 33, 34, 41, 32, 34, 36, 42, 32, 35, 38, 42, 34, 37, - 43, 49, 37, 40, 49, 56, 42, 43, 53, 63, 46, 46, 56, 67, - /* Size 8x4 */ - 31, 32, 32, 32, 34, 37, 42, 46, 32, 33, 34, 35, 37, 40, 43, 46, 35, 34, - 36, 38, 43, 49, 53, 56, 43, 41, 42, 42, 49, 56, 63, 67, - /* Size 8x16 */ - 32, 31, 31, 32, 35, 36, 44, 47, 31, 32, 32, 32, 35, 35, 42, 45, 31, 32, - 32, 32, 34, 35, 41, 45, 31, 32, 32, 33, 34, 34, 41, 44, 31, 32, 33, 34, - 35, 36, 42, 44, 32, 32, 33, 34, 36, 36, 42, 45, 32, 33, 34, 35, 37, 38, - 42, 45, 32, 33, 34, 36, 39, 40, 44, 47, 34, 34, 35, 37, 41, 42, 48, 50, - 35, 34, 36, 38, 45, 47, 52, 55, 36, 34, 36, 38, 46, 48, 54, 56, 39, 37, - 39, 40, 48, 50, 58, 60, 41, 39, 40, 41, 49, 51, 60, 62, 44, 41, 42, 43, - 51, 53, 63, 66, 47, 44, 44, 45, 53, 56, 66, 69, 48, 45, 45, 46, 54, 56, - 67, 70, - /* Size 16x8 */ - 32, 31, 31, 31, 31, 32, 32, 32, 34, 35, 36, 39, 41, 44, 47, 48, 31, 32, - 32, 32, 32, 32, 33, 33, 34, 34, 34, 37, 39, 41, 44, 45, 31, 32, 32, 32, - 33, 33, 34, 34, 35, 36, 36, 39, 40, 42, 44, 45, 32, 32, 32, 33, 34, 34, - 35, 36, 37, 38, 38, 40, 41, 43, 45, 46, 35, 35, 34, 34, 35, 36, 37, 39, - 41, 45, 46, 48, 49, 51, 53, 54, 36, 35, 35, 34, 36, 36, 38, 40, 42, 47, - 48, 50, 51, 53, 56, 56, 44, 42, 41, 41, 42, 42, 42, 44, 48, 52, 54, 58, - 60, 63, 66, 67, 47, 45, 45, 44, 44, 45, 45, 47, 50, 55, 56, 60, 62, 66, - 69, 70, - /* Size 16x32 */ - 32, 31, 31, 31, 31, 32, 32, 32, 35, 36, 36, 40, 44, 44, 47, 53, 31, 31, - 32, 32, 32, 32, 32, 33, 35, 35, 35, 39, 43, 43, 46, 52, 31, 32, 32, 32, - 32, 32, 32, 33, 35, 35, 35, 39, 42, 42, 45, 51, 31, 32, 32, 32, 32, 32, - 32, 33, 35, 35, 35, 39, 42, 42, 45, 51, 31, 32, 32, 32, 32, 32, 32, 33, - 34, 35, 35, 39, 41, 41, 45, 50, 31, 32, 32, 32, 32, 33, 33, 33, 34, 34, - 34, 38, 41, 41, 44, 49, 31, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 38, - 41, 41, 44, 49, 31, 32, 32, 32, 32, 33, 33, 33, 34, 35, 35, 38, 41, 41, - 44, 49, 31, 32, 32, 32, 33, 34, 34, 34, 35, 36, 36, 39, 42, 42, 44, 49, - 32, 32, 32, 32, 33, 34, 34, 34, 36, 36, 36, 39, 42, 42, 45, 50, 32, 32, - 32, 32, 33, 34, 34, 34, 36, 36, 36, 39, 42, 42, 45, 50, 32, 32, 32, 32, - 33, 35, 35, 35, 37, 37, 37, 40, 42, 42, 45, 49, 32, 32, 33, 33, 34, 35, - 35, 36, 37, 38, 38, 41, 42, 42, 45, 49, 32, 32, 33, 33, 34, 35, 35, 36, - 37, 38, 38, 41, 42, 42, 45, 49, 32, 33, 33, 33, 34, 36, 36, 36, 39, 40, - 40, 42, 44, 44, 47, 51, 34, 34, 34, 34, 35, 37, 37, 38, 41, 42, 42, 45, - 48, 48, 50, 54, 34, 34, 34, 34, 35, 37, 37, 38, 41, 42, 42, 45, 48, 48, - 50, 54, 34, 34, 34, 34, 35, 37, 37, 38, 42, 43, 43, 46, 49, 49, 51, 55, - 35, 35, 34, 34, 36, 38, 38, 39, 45, 47, 47, 50, 52, 52, 55, 59, 36, 35, - 34, 34, 36, 38, 38, 40, 46, 48, 48, 51, 54, 54, 56, 60, 36, 35, 34, 34, - 36, 38, 38, 40, 46, 48, 48, 51, 54, 54, 56, 60, 38, 37, 36, 36, 37, 40, - 40, 41, 47, 49, 49, 53, 56, 56, 58, 63, 39, 38, 37, 37, 39, 40, 40, 42, - 48, 50, 50, 54, 58, 58, 60, 65, 39, 38, 37, 37, 39, 40, 40, 42, 48, 50, - 50, 54, 58, 58, 60, 65, 41, 40, 39, 39, 40, 41, 41, 43, 49, 51, 51, 56, - 60, 60, 62, 67, 44, 42, 41, 41, 42, 43, 43, 45, 51, 53, 53, 59, 63, 63, - 66, 71, 44, 42, 41, 41, 42, 43, 43, 45, 51, 53, 53, 59, 63, 63, 66, 71, - 44, 43, 42, 42, 42, 43, 43, 45, 51, 54, 54, 59, 64, 64, 67, 72, 47, 45, - 44, 44, 44, 45, 45, 47, 53, 56, 56, 61, 66, 66, 69, 75, 48, 46, 45, 45, - 45, 46, 46, 48, 54, 56, 56, 62, 67, 67, 70, 76, 48, 46, 45, 45, 45, 46, - 46, 48, 54, 56, 56, 62, 67, 67, 70, 76, 51, 49, 47, 47, 48, 48, 48, 50, - 56, 58, 58, 64, 69, 69, 73, 79, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 34, 34, 34, - 35, 36, 36, 38, 39, 39, 41, 44, 44, 44, 47, 48, 48, 51, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 35, 35, 35, 37, - 38, 38, 40, 42, 42, 43, 45, 46, 46, 49, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 34, 36, 37, 37, 39, 41, - 41, 42, 44, 45, 45, 47, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 34, 34, 34, 34, 34, 34, 36, 37, 37, 39, 41, 41, 42, 44, 45, - 45, 47, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 35, - 35, 35, 36, 36, 36, 37, 39, 39, 40, 42, 42, 42, 44, 45, 45, 48, 32, 32, - 32, 32, 32, 33, 33, 33, 34, 34, 34, 35, 35, 35, 36, 37, 37, 37, 38, 38, - 38, 40, 40, 40, 41, 43, 43, 43, 45, 46, 46, 48, 32, 32, 32, 32, 32, 33, - 33, 33, 34, 34, 34, 35, 35, 35, 36, 37, 37, 37, 38, 38, 38, 40, 40, 40, - 41, 43, 43, 43, 45, 46, 46, 48, 32, 33, 33, 33, 33, 33, 33, 33, 34, 34, - 34, 35, 36, 36, 36, 38, 38, 38, 39, 40, 40, 41, 42, 42, 43, 45, 45, 45, - 47, 48, 48, 50, 35, 35, 35, 35, 34, 34, 34, 34, 35, 36, 36, 37, 37, 37, - 39, 41, 41, 42, 45, 46, 46, 47, 48, 48, 49, 51, 51, 51, 53, 54, 54, 56, - 36, 35, 35, 35, 35, 34, 34, 35, 36, 36, 36, 37, 38, 38, 40, 42, 42, 43, - 47, 48, 48, 49, 50, 50, 51, 53, 53, 54, 56, 56, 56, 58, 36, 35, 35, 35, - 35, 34, 34, 35, 36, 36, 36, 37, 38, 38, 40, 42, 42, 43, 47, 48, 48, 49, - 50, 50, 51, 53, 53, 54, 56, 56, 56, 58, 40, 39, 39, 39, 39, 38, 38, 38, - 39, 39, 39, 40, 41, 41, 42, 45, 45, 46, 50, 51, 51, 53, 54, 54, 56, 59, - 59, 59, 61, 62, 62, 64, 44, 43, 42, 42, 41, 41, 41, 41, 42, 42, 42, 42, - 42, 42, 44, 48, 48, 49, 52, 54, 54, 56, 58, 58, 60, 63, 63, 64, 66, 67, - 67, 69, 44, 43, 42, 42, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, 44, 48, - 48, 49, 52, 54, 54, 56, 58, 58, 60, 63, 63, 64, 66, 67, 67, 69, 47, 46, - 45, 45, 45, 44, 44, 44, 44, 45, 45, 45, 45, 45, 47, 50, 50, 51, 55, 56, - 56, 58, 60, 60, 62, 66, 66, 67, 69, 70, 70, 73, 53, 52, 51, 51, 50, 49, - 49, 49, 49, 50, 50, 49, 49, 49, 51, 54, 54, 55, 59, 60, 60, 63, 65, 65, - 67, 71, 71, 72, 75, 76, 76, 79, - /* Size 4x16 */ - 31, 32, 36, 44, 32, 32, 35, 42, 32, 32, 35, 41, 32, 33, 34, 41, 32, 34, - 36, 42, 32, 34, 36, 42, 32, 35, 38, 42, 33, 36, 40, 44, 34, 37, 42, 48, - 35, 38, 47, 52, 35, 38, 48, 54, 38, 40, 50, 58, 40, 41, 51, 60, 42, 43, - 53, 63, 45, 45, 56, 66, 46, 46, 56, 67, - /* Size 16x4 */ - 31, 32, 32, 32, 32, 32, 32, 33, 34, 35, 35, 38, 40, 42, 45, 46, 32, 32, - 32, 33, 34, 34, 35, 36, 37, 38, 38, 40, 41, 43, 45, 46, 36, 35, 35, 34, - 36, 36, 38, 40, 42, 47, 48, 50, 51, 53, 56, 56, 44, 42, 41, 41, 42, 42, - 42, 44, 48, 52, 54, 58, 60, 63, 66, 67, - /* Size 8x32 */ - 32, 31, 31, 32, 35, 36, 44, 47, 31, 32, 32, 32, 35, 35, 43, 46, 31, 32, - 32, 32, 35, 35, 42, 45, 31, 32, 32, 32, 35, 35, 42, 45, 31, 32, 32, 32, - 34, 35, 41, 45, 31, 32, 32, 33, 34, 34, 41, 44, 31, 32, 32, 33, 34, 34, - 41, 44, 31, 32, 32, 33, 34, 35, 41, 44, 31, 32, 33, 34, 35, 36, 42, 44, - 32, 32, 33, 34, 36, 36, 42, 45, 32, 32, 33, 34, 36, 36, 42, 45, 32, 32, - 33, 35, 37, 37, 42, 45, 32, 33, 34, 35, 37, 38, 42, 45, 32, 33, 34, 35, - 37, 38, 42, 45, 32, 33, 34, 36, 39, 40, 44, 47, 34, 34, 35, 37, 41, 42, - 48, 50, 34, 34, 35, 37, 41, 42, 48, 50, 34, 34, 35, 37, 42, 43, 49, 51, - 35, 34, 36, 38, 45, 47, 52, 55, 36, 34, 36, 38, 46, 48, 54, 56, 36, 34, - 36, 38, 46, 48, 54, 56, 38, 36, 37, 40, 47, 49, 56, 58, 39, 37, 39, 40, - 48, 50, 58, 60, 39, 37, 39, 40, 48, 50, 58, 60, 41, 39, 40, 41, 49, 51, - 60, 62, 44, 41, 42, 43, 51, 53, 63, 66, 44, 41, 42, 43, 51, 53, 63, 66, - 44, 42, 42, 43, 51, 54, 64, 67, 47, 44, 44, 45, 53, 56, 66, 69, 48, 45, - 45, 46, 54, 56, 67, 70, 48, 45, 45, 46, 54, 56, 67, 70, 51, 47, 48, 48, - 56, 58, 69, 73, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 34, 34, 34, - 35, 36, 36, 38, 39, 39, 41, 44, 44, 44, 47, 48, 48, 51, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 34, 36, - 37, 37, 39, 41, 41, 42, 44, 45, 45, 47, 31, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 33, 34, 34, 34, 35, 35, 35, 36, 36, 36, 37, 39, 39, 40, 42, - 42, 42, 44, 45, 45, 48, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 35, - 35, 35, 36, 37, 37, 37, 38, 38, 38, 40, 40, 40, 41, 43, 43, 43, 45, 46, - 46, 48, 35, 35, 35, 35, 34, 34, 34, 34, 35, 36, 36, 37, 37, 37, 39, 41, - 41, 42, 45, 46, 46, 47, 48, 48, 49, 51, 51, 51, 53, 54, 54, 56, 36, 35, - 35, 35, 35, 34, 34, 35, 36, 36, 36, 37, 38, 38, 40, 42, 42, 43, 47, 48, - 48, 49, 50, 50, 51, 53, 53, 54, 56, 56, 56, 58, 44, 43, 42, 42, 41, 41, - 41, 41, 42, 42, 42, 42, 42, 42, 44, 48, 48, 49, 52, 54, 54, 56, 58, 58, - 60, 63, 63, 64, 66, 67, 67, 69, 47, 46, 45, 45, 45, 44, 44, 44, 44, 45, - 45, 45, 45, 45, 47, 50, 50, 51, 55, 56, 56, 58, 60, 60, 62, 66, 66, 67, - 69, 70, 70, 73 }, - { /* Chroma */ - /* Size 4x4 */ - 31, 37, 47, 47, 37, 44, 47, 45, 47, 47, 53, 53, 47, 45, 53, 59, - /* Size 8x8 */ - 31, 31, 34, 37, 43, 48, 47, 49, 31, 32, 35, 40, 43, 46, 45, 46, 34, 35, - 39, 43, 45, 46, 45, 46, 37, 40, 43, 47, 47, 47, 45, 46, 43, 43, 45, 47, - 49, 50, 50, 50, 48, 46, 46, 47, 50, 53, 55, 55, 47, 45, 45, 45, 50, 55, - 58, 60, 49, 46, 46, 46, 50, 55, 60, 61, - /* Size 16x16 */ - 32, 31, 31, 30, 33, 33, 36, 38, 41, 47, 49, 48, 49, 49, 50, 50, 31, 31, - 31, 31, 34, 34, 38, 40, 42, 46, 47, 47, 47, 47, 48, 48, 31, 31, 31, 31, - 34, 35, 39, 40, 42, 46, 47, 46, 46, 46, 47, 47, 30, 31, 31, 32, 34, 35, - 40, 41, 42, 45, 46, 45, 45, 45, 46, 46, 33, 34, 34, 34, 37, 38, 42, 43, - 44, 46, 47, 46, 46, 45, 46, 46, 33, 34, 35, 35, 38, 39, 43, 44, 45, 47, - 47, 46, 46, 45, 46, 46, 36, 38, 39, 40, 42, 43, 47, 47, 47, 47, 48, 46, - 46, 45, 46, 46, 38, 40, 40, 41, 43, 44, 47, 47, 48, 48, 49, 48, 47, 47, - 47, 47, 41, 42, 42, 42, 44, 45, 47, 48, 48, 50, 50, 49, 49, 49, 50, 50, - 47, 46, 46, 45, 46, 47, 47, 48, 50, 52, 52, 52, 52, 52, 53, 53, 49, 47, - 47, 46, 47, 47, 48, 49, 50, 52, 53, 53, 53, 53, 54, 54, 48, 47, 46, 45, - 46, 46, 46, 48, 49, 52, 53, 54, 55, 55, 56, 56, 49, 47, 46, 45, 46, 46, - 46, 47, 49, 52, 53, 55, 55, 57, 57, 58, 49, 47, 46, 45, 45, 45, 45, 47, - 49, 52, 53, 55, 57, 58, 59, 60, 50, 48, 47, 46, 46, 46, 46, 47, 50, 53, - 54, 56, 57, 59, 61, 61, 50, 48, 47, 46, 46, 46, 46, 47, 50, 53, 54, 56, - 58, 60, 61, 61, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 30, 30, 31, 33, 33, 33, 35, 36, 36, 38, 41, 41, 43, - 47, 49, 49, 49, 48, 48, 49, 49, 49, 49, 50, 50, 50, 51, 31, 31, 31, 31, - 31, 31, 31, 31, 33, 34, 34, 36, 37, 37, 39, 42, 42, 43, 47, 48, 48, 48, - 47, 47, 47, 47, 47, 48, 49, 49, 49, 50, 31, 31, 31, 31, 31, 31, 31, 32, - 34, 34, 34, 37, 38, 38, 40, 42, 42, 43, 46, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 48, 48, 48, 49, 31, 31, 31, 31, 31, 31, 31, 32, 34, 34, 34, 37, - 38, 38, 40, 42, 42, 43, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, - 48, 49, 31, 31, 31, 31, 31, 31, 31, 32, 34, 35, 35, 37, 39, 39, 40, 42, - 42, 43, 46, 47, 47, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 48, 30, 31, - 31, 31, 31, 32, 32, 32, 34, 35, 35, 38, 40, 40, 41, 42, 42, 43, 45, 46, - 46, 46, 45, 45, 45, 45, 45, 45, 46, 46, 46, 47, 30, 31, 31, 31, 31, 32, - 32, 32, 34, 35, 35, 38, 40, 40, 41, 42, 42, 43, 45, 46, 46, 46, 45, 45, - 45, 45, 45, 45, 46, 46, 46, 47, 31, 31, 32, 32, 32, 32, 32, 33, 35, 36, - 36, 38, 40, 40, 41, 43, 43, 43, 46, 46, 46, 46, 45, 45, 45, 45, 45, 45, - 46, 46, 46, 47, 33, 33, 34, 34, 34, 34, 34, 35, 37, 38, 38, 41, 42, 42, - 43, 44, 44, 45, 46, 47, 47, 46, 46, 46, 46, 45, 45, 45, 46, 46, 46, 47, - 33, 34, 34, 34, 35, 35, 35, 36, 38, 39, 39, 41, 43, 43, 44, 45, 45, 45, - 47, 47, 47, 46, 46, 46, 46, 45, 45, 45, 46, 46, 46, 47, 33, 34, 34, 34, - 35, 35, 35, 36, 38, 39, 39, 41, 43, 43, 44, 45, 45, 45, 47, 47, 47, 46, - 46, 46, 46, 45, 45, 45, 46, 46, 46, 47, 35, 36, 37, 37, 37, 38, 38, 38, - 41, 41, 41, 44, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 46, 46, 46, 45, - 45, 45, 46, 46, 46, 47, 36, 37, 38, 38, 39, 40, 40, 40, 42, 43, 43, 46, - 47, 47, 47, 47, 47, 47, 47, 48, 48, 47, 46, 46, 46, 45, 45, 45, 46, 46, - 46, 46, 36, 37, 38, 38, 39, 40, 40, 40, 42, 43, 43, 46, 47, 47, 47, 47, - 47, 47, 47, 48, 48, 47, 46, 46, 46, 45, 45, 45, 46, 46, 46, 46, 38, 39, - 40, 40, 40, 41, 41, 41, 43, 44, 44, 46, 47, 47, 47, 48, 48, 48, 48, 49, - 49, 48, 48, 48, 47, 47, 47, 47, 47, 47, 47, 48, 41, 42, 42, 42, 42, 42, - 42, 43, 44, 45, 45, 46, 47, 47, 48, 48, 48, 49, 50, 50, 50, 50, 49, 49, - 49, 49, 49, 49, 50, 50, 50, 50, 41, 42, 42, 42, 42, 42, 42, 43, 44, 45, - 45, 46, 47, 47, 48, 48, 48, 49, 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, - 50, 50, 50, 50, 43, 43, 43, 43, 43, 43, 43, 43, 45, 45, 45, 46, 47, 47, - 48, 49, 49, 49, 50, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 51, - 47, 47, 46, 46, 46, 45, 45, 46, 46, 47, 47, 47, 47, 47, 48, 50, 50, 50, - 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 53, 53, 53, 53, 49, 48, 47, 47, - 47, 46, 46, 46, 47, 47, 47, 47, 48, 48, 49, 50, 50, 51, 52, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 49, 48, 47, 47, 47, 46, 46, 46, - 47, 47, 47, 47, 48, 48, 49, 50, 50, 51, 52, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 54, 54, 54, 54, 49, 48, 47, 47, 46, 46, 46, 46, 46, 46, 46, 47, - 47, 47, 48, 50, 50, 50, 52, 53, 53, 53, 54, 54, 54, 55, 55, 55, 55, 55, - 55, 56, 48, 47, 47, 47, 46, 45, 45, 45, 46, 46, 46, 46, 46, 46, 48, 49, - 49, 50, 52, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 56, 57, 48, 47, - 47, 47, 46, 45, 45, 45, 46, 46, 46, 46, 46, 46, 48, 49, 49, 50, 52, 53, - 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 56, 57, 49, 47, 47, 47, 46, 45, - 45, 45, 46, 46, 46, 46, 46, 46, 47, 49, 49, 50, 52, 53, 53, 54, 55, 55, - 55, 57, 57, 57, 57, 58, 58, 58, 49, 47, 47, 47, 46, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 47, 49, 49, 50, 52, 53, 53, 55, 55, 55, 57, 58, 58, 59, - 59, 60, 60, 60, 49, 47, 47, 47, 46, 45, 45, 45, 45, 45, 45, 45, 45, 45, - 47, 49, 49, 50, 52, 53, 53, 55, 55, 55, 57, 58, 58, 59, 59, 60, 60, 60, - 49, 48, 47, 47, 46, 45, 45, 45, 45, 45, 45, 45, 45, 45, 47, 49, 49, 50, - 52, 53, 53, 55, 56, 56, 57, 59, 59, 59, 60, 60, 60, 61, 50, 49, 48, 48, - 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 50, 50, 50, 53, 54, 54, 55, - 56, 56, 57, 59, 59, 60, 61, 61, 61, 62, 50, 49, 48, 48, 47, 46, 46, 46, - 46, 46, 46, 46, 46, 46, 47, 50, 50, 50, 53, 54, 54, 55, 56, 56, 58, 60, - 60, 60, 61, 61, 61, 63, 50, 49, 48, 48, 47, 46, 46, 46, 46, 46, 46, 46, - 46, 46, 47, 50, 50, 50, 53, 54, 54, 55, 56, 56, 58, 60, 60, 60, 61, 61, - 61, 63, 51, 50, 49, 49, 48, 47, 47, 47, 47, 47, 47, 47, 46, 46, 48, 50, - 50, 51, 53, 54, 54, 56, 57, 57, 58, 60, 60, 61, 62, 63, 63, 64, - /* Size 4x8 */ - 31, 38, 47, 48, 31, 40, 46, 45, 35, 43, 47, 46, 39, 47, 47, 45, 43, 47, - 50, 50, 47, 47, 53, 55, 46, 46, 53, 58, 48, 46, 54, 59, - /* Size 8x4 */ - 31, 31, 35, 39, 43, 47, 46, 48, 38, 40, 43, 47, 47, 47, 46, 46, 47, 46, - 47, 47, 50, 53, 53, 54, 48, 45, 46, 45, 50, 55, 58, 59, - /* Size 8x16 */ - 32, 31, 33, 37, 45, 48, 49, 50, 31, 31, 34, 38, 45, 47, 47, 48, 31, 32, - 34, 39, 45, 46, 46, 47, 30, 32, 35, 40, 44, 46, 45, 46, 33, 35, 37, 42, - 46, 47, 45, 46, 33, 36, 38, 43, 46, 47, 46, 46, 37, 40, 43, 47, 47, 47, - 45, 46, 39, 41, 43, 47, 48, 48, 47, 47, 42, 43, 44, 47, 49, 50, 49, 50, - 47, 46, 46, 48, 51, 52, 53, 53, 49, 46, 47, 48, 52, 53, 53, 54, 48, 46, - 46, 47, 51, 53, 56, 56, 48, 45, 46, 46, 51, 53, 57, 57, 49, 45, 45, 46, - 51, 53, 58, 59, 50, 46, 46, 46, 52, 54, 59, 61, 50, 46, 46, 46, 52, 54, - 59, 61, - /* Size 16x8 */ - 32, 31, 31, 30, 33, 33, 37, 39, 42, 47, 49, 48, 48, 49, 50, 50, 31, 31, - 32, 32, 35, 36, 40, 41, 43, 46, 46, 46, 45, 45, 46, 46, 33, 34, 34, 35, - 37, 38, 43, 43, 44, 46, 47, 46, 46, 45, 46, 46, 37, 38, 39, 40, 42, 43, - 47, 47, 47, 48, 48, 47, 46, 46, 46, 46, 45, 45, 45, 44, 46, 46, 47, 48, - 49, 51, 52, 51, 51, 51, 52, 52, 48, 47, 46, 46, 47, 47, 47, 48, 50, 52, - 53, 53, 53, 53, 54, 54, 49, 47, 46, 45, 45, 46, 45, 47, 49, 53, 53, 56, - 57, 58, 59, 59, 50, 48, 47, 46, 46, 46, 46, 47, 50, 53, 54, 56, 57, 59, - 61, 61, - /* Size 16x32 */ - 32, 31, 31, 31, 33, 37, 37, 38, 45, 48, 48, 49, 49, 49, 50, 52, 31, 31, - 31, 31, 33, 38, 38, 39, 45, 47, 47, 48, 48, 48, 49, 51, 31, 31, 31, 31, - 34, 38, 38, 40, 45, 47, 47, 47, 47, 47, 48, 50, 31, 31, 31, 31, 34, 38, - 38, 40, 45, 47, 47, 47, 47, 47, 48, 50, 31, 31, 32, 32, 34, 39, 39, 40, - 45, 46, 46, 46, 46, 46, 47, 49, 30, 31, 32, 32, 35, 40, 40, 41, 44, 46, - 46, 45, 45, 45, 46, 48, 30, 31, 32, 32, 35, 40, 40, 41, 44, 46, 46, 45, - 45, 45, 46, 48, 31, 32, 33, 33, 35, 40, 40, 41, 45, 46, 46, 45, 45, 45, - 46, 48, 33, 34, 35, 35, 37, 42, 42, 43, 46, 47, 47, 46, 45, 45, 46, 47, - 33, 35, 36, 36, 38, 43, 43, 44, 46, 47, 47, 46, 46, 46, 46, 47, 33, 35, - 36, 36, 38, 43, 43, 44, 46, 47, 47, 46, 46, 46, 46, 47, 35, 37, 38, 38, - 41, 45, 45, 46, 47, 47, 47, 46, 45, 45, 46, 47, 37, 39, 40, 40, 43, 47, - 47, 47, 47, 47, 47, 46, 45, 45, 46, 47, 37, 39, 40, 40, 43, 47, 47, 47, - 47, 47, 47, 46, 45, 45, 46, 47, 39, 40, 41, 41, 43, 47, 47, 47, 48, 48, - 48, 47, 47, 47, 47, 48, 42, 42, 43, 43, 44, 47, 47, 48, 49, 50, 50, 49, - 49, 49, 50, 50, 42, 42, 43, 43, 44, 47, 47, 48, 49, 50, 50, 49, 49, 49, - 50, 50, 43, 43, 43, 43, 45, 47, 47, 48, 50, 50, 50, 50, 50, 50, 50, 51, - 47, 46, 46, 46, 46, 48, 48, 48, 51, 52, 52, 52, 53, 53, 53, 53, 49, 47, - 46, 46, 47, 48, 48, 49, 52, 53, 53, 53, 53, 53, 54, 54, 49, 47, 46, 46, - 47, 48, 48, 49, 52, 53, 53, 53, 53, 53, 54, 54, 48, 47, 46, 46, 46, 47, - 47, 48, 52, 53, 53, 54, 55, 55, 55, 56, 48, 47, 46, 46, 46, 47, 47, 48, - 51, 53, 53, 54, 56, 56, 56, 57, 48, 47, 46, 46, 46, 47, 47, 48, 51, 53, - 53, 54, 56, 56, 56, 57, 48, 47, 45, 45, 46, 46, 46, 47, 51, 53, 53, 55, - 57, 57, 57, 59, 49, 46, 45, 45, 45, 46, 46, 47, 51, 53, 53, 56, 58, 58, - 59, 61, 49, 46, 45, 45, 45, 46, 46, 47, 51, 53, 53, 56, 58, 58, 59, 61, - 49, 47, 45, 45, 45, 46, 46, 47, 52, 53, 53, 56, 58, 58, 60, 62, 50, 48, - 46, 46, 46, 46, 46, 48, 52, 54, 54, 57, 59, 59, 61, 63, 50, 48, 46, 46, - 46, 46, 46, 48, 52, 54, 54, 57, 59, 59, 61, 64, 50, 48, 46, 46, 46, 46, - 46, 48, 52, 54, 54, 57, 59, 59, 61, 64, 51, 49, 47, 47, 47, 47, 47, 48, - 52, 54, 54, 58, 60, 60, 62, 65, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 30, 30, 31, 33, 33, 33, 35, 37, 37, 39, 42, 42, 43, - 47, 49, 49, 48, 48, 48, 48, 49, 49, 49, 50, 50, 50, 51, 31, 31, 31, 31, - 31, 31, 31, 32, 34, 35, 35, 37, 39, 39, 40, 42, 42, 43, 46, 47, 47, 47, - 47, 47, 47, 46, 46, 47, 48, 48, 48, 49, 31, 31, 31, 31, 32, 32, 32, 33, - 35, 36, 36, 38, 40, 40, 41, 43, 43, 43, 46, 46, 46, 46, 46, 46, 45, 45, - 45, 45, 46, 46, 46, 47, 31, 31, 31, 31, 32, 32, 32, 33, 35, 36, 36, 38, - 40, 40, 41, 43, 43, 43, 46, 46, 46, 46, 46, 46, 45, 45, 45, 45, 46, 46, - 46, 47, 33, 33, 34, 34, 34, 35, 35, 35, 37, 38, 38, 41, 43, 43, 43, 44, - 44, 45, 46, 47, 47, 46, 46, 46, 46, 45, 45, 45, 46, 46, 46, 47, 37, 38, - 38, 38, 39, 40, 40, 40, 42, 43, 43, 45, 47, 47, 47, 47, 47, 47, 48, 48, - 48, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 47, 37, 38, 38, 38, 39, 40, - 40, 40, 42, 43, 43, 45, 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 47, 47, - 46, 46, 46, 46, 46, 46, 46, 47, 38, 39, 40, 40, 40, 41, 41, 41, 43, 44, - 44, 46, 47, 47, 47, 48, 48, 48, 48, 49, 49, 48, 48, 48, 47, 47, 47, 47, - 48, 48, 48, 48, 45, 45, 45, 45, 45, 44, 44, 45, 46, 46, 46, 47, 47, 47, - 48, 49, 49, 50, 51, 52, 52, 52, 51, 51, 51, 51, 51, 52, 52, 52, 52, 52, - 48, 47, 47, 47, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 48, 50, 50, 50, - 52, 53, 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 48, 47, 47, 47, - 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 48, 50, 50, 50, 52, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 49, 48, 47, 47, 46, 45, 45, 45, - 46, 46, 46, 46, 46, 46, 47, 49, 49, 50, 52, 53, 53, 54, 54, 54, 55, 56, - 56, 56, 57, 57, 57, 58, 49, 48, 47, 47, 46, 45, 45, 45, 45, 46, 46, 45, - 45, 45, 47, 49, 49, 50, 53, 53, 53, 55, 56, 56, 57, 58, 58, 58, 59, 59, - 59, 60, 49, 48, 47, 47, 46, 45, 45, 45, 45, 46, 46, 45, 45, 45, 47, 49, - 49, 50, 53, 53, 53, 55, 56, 56, 57, 58, 58, 58, 59, 59, 59, 60, 50, 49, - 48, 48, 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 50, 50, 50, 53, 54, - 54, 55, 56, 56, 57, 59, 59, 60, 61, 61, 61, 62, 52, 51, 50, 50, 49, 48, - 48, 48, 47, 47, 47, 47, 47, 47, 48, 50, 50, 51, 53, 54, 54, 56, 57, 57, - 59, 61, 61, 62, 63, 64, 64, 65, - /* Size 4x16 */ - 31, 37, 48, 49, 31, 38, 47, 47, 31, 39, 46, 46, 31, 40, 46, 45, 34, 42, - 47, 45, 35, 43, 47, 46, 39, 47, 47, 45, 40, 47, 48, 47, 42, 47, 50, 49, - 46, 48, 52, 53, 47, 48, 53, 53, 47, 47, 53, 56, 47, 46, 53, 57, 46, 46, - 53, 58, 48, 46, 54, 59, 48, 46, 54, 59, - /* Size 16x4 */ - 31, 31, 31, 31, 34, 35, 39, 40, 42, 46, 47, 47, 47, 46, 48, 48, 37, 38, - 39, 40, 42, 43, 47, 47, 47, 48, 48, 47, 46, 46, 46, 46, 48, 47, 46, 46, - 47, 47, 47, 48, 50, 52, 53, 53, 53, 53, 54, 54, 49, 47, 46, 45, 45, 46, - 45, 47, 49, 53, 53, 56, 57, 58, 59, 59, - /* Size 8x32 */ - 32, 31, 33, 37, 45, 48, 49, 50, 31, 31, 33, 38, 45, 47, 48, 49, 31, 31, - 34, 38, 45, 47, 47, 48, 31, 31, 34, 38, 45, 47, 47, 48, 31, 32, 34, 39, - 45, 46, 46, 47, 30, 32, 35, 40, 44, 46, 45, 46, 30, 32, 35, 40, 44, 46, - 45, 46, 31, 33, 35, 40, 45, 46, 45, 46, 33, 35, 37, 42, 46, 47, 45, 46, - 33, 36, 38, 43, 46, 47, 46, 46, 33, 36, 38, 43, 46, 47, 46, 46, 35, 38, - 41, 45, 47, 47, 45, 46, 37, 40, 43, 47, 47, 47, 45, 46, 37, 40, 43, 47, - 47, 47, 45, 46, 39, 41, 43, 47, 48, 48, 47, 47, 42, 43, 44, 47, 49, 50, - 49, 50, 42, 43, 44, 47, 49, 50, 49, 50, 43, 43, 45, 47, 50, 50, 50, 50, - 47, 46, 46, 48, 51, 52, 53, 53, 49, 46, 47, 48, 52, 53, 53, 54, 49, 46, - 47, 48, 52, 53, 53, 54, 48, 46, 46, 47, 52, 53, 55, 55, 48, 46, 46, 47, - 51, 53, 56, 56, 48, 46, 46, 47, 51, 53, 56, 56, 48, 45, 46, 46, 51, 53, - 57, 57, 49, 45, 45, 46, 51, 53, 58, 59, 49, 45, 45, 46, 51, 53, 58, 59, - 49, 45, 45, 46, 52, 53, 58, 60, 50, 46, 46, 46, 52, 54, 59, 61, 50, 46, - 46, 46, 52, 54, 59, 61, 50, 46, 46, 46, 52, 54, 59, 61, 51, 47, 47, 47, - 52, 54, 60, 62, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 30, 30, 31, 33, 33, 33, 35, 37, 37, 39, 42, 42, 43, - 47, 49, 49, 48, 48, 48, 48, 49, 49, 49, 50, 50, 50, 51, 31, 31, 31, 31, - 32, 32, 32, 33, 35, 36, 36, 38, 40, 40, 41, 43, 43, 43, 46, 46, 46, 46, - 46, 46, 45, 45, 45, 45, 46, 46, 46, 47, 33, 33, 34, 34, 34, 35, 35, 35, - 37, 38, 38, 41, 43, 43, 43, 44, 44, 45, 46, 47, 47, 46, 46, 46, 46, 45, - 45, 45, 46, 46, 46, 47, 37, 38, 38, 38, 39, 40, 40, 40, 42, 43, 43, 45, - 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 47, 47, 46, 46, 46, 46, 46, 46, - 46, 47, 45, 45, 45, 45, 45, 44, 44, 45, 46, 46, 46, 47, 47, 47, 48, 49, - 49, 50, 51, 52, 52, 52, 51, 51, 51, 51, 51, 52, 52, 52, 52, 52, 48, 47, - 47, 47, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 48, 50, 50, 50, 52, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 49, 48, 47, 47, 46, 45, - 45, 45, 45, 46, 46, 45, 45, 45, 47, 49, 49, 50, 53, 53, 53, 55, 56, 56, - 57, 58, 58, 58, 59, 59, 59, 60, 50, 49, 48, 48, 47, 46, 46, 46, 46, 46, - 46, 46, 46, 46, 47, 50, 50, 50, 53, 54, 54, 55, 56, 56, 57, 59, 59, 60, - 61, 61, 61, 62 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 32, 34, 38, 32, 33, 35, 39, 34, 35, 39, 45, 38, 39, 45, 54, - /* Size 8x8 */ - 31, 31, 32, 32, 33, 34, 37, 41, 31, 32, 32, 32, 33, 34, 36, 39, 32, 32, - 32, 33, 34, 35, 37, 40, 32, 32, 33, 34, 35, 36, 38, 41, 33, 33, 34, 35, - 37, 39, 41, 44, 34, 34, 35, 36, 39, 43, 46, 49, 37, 36, 37, 38, 41, 46, - 51, 54, 41, 39, 40, 41, 44, 49, 54, 58, - /* Size 16x16 */ - 32, 31, 31, 31, 31, 31, 31, 32, 32, 34, 34, 36, 36, 39, 39, 44, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 34, 34, 35, 35, 38, 38, 42, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 34, 34, 35, 35, 38, 38, 42, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 33, 33, 34, 34, 37, 37, 41, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 34, 34, 37, 37, 41, 31, 32, 32, 32, 32, 33, 33, 34, 34, 35, - 35, 36, 36, 39, 39, 42, 31, 32, 32, 32, 32, 33, 33, 34, 34, 35, 35, 36, - 36, 39, 39, 42, 32, 32, 32, 32, 32, 34, 34, 35, 35, 37, 37, 38, 38, 40, - 40, 42, 32, 32, 32, 32, 32, 34, 34, 35, 35, 37, 37, 38, 38, 40, 40, 42, - 34, 34, 34, 33, 33, 35, 35, 37, 37, 39, 39, 42, 42, 45, 45, 47, 34, 34, - 34, 33, 33, 35, 35, 37, 37, 39, 39, 42, 42, 45, 45, 47, 36, 35, 35, 34, - 34, 36, 36, 38, 38, 42, 42, 48, 48, 50, 50, 54, 36, 35, 35, 34, 34, 36, - 36, 38, 38, 42, 42, 48, 48, 50, 50, 54, 39, 38, 38, 37, 37, 39, 39, 40, - 40, 45, 45, 50, 50, 54, 54, 58, 39, 38, 38, 37, 37, 39, 39, 40, 40, 45, - 45, 50, 50, 54, 54, 58, 44, 42, 42, 41, 41, 42, 42, 42, 42, 47, 47, 54, - 54, 58, 58, 63, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 33, - 34, 34, 34, 35, 36, 36, 36, 37, 39, 39, 39, 41, 44, 44, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 34, - 35, 35, 35, 37, 39, 39, 39, 41, 43, 43, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 34, 35, 35, 35, 37, - 38, 38, 38, 40, 42, 42, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 34, 34, 34, 34, 35, 35, 35, 37, 38, 38, 38, 40, - 42, 42, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 34, 34, 34, 34, 35, 35, 35, 37, 38, 38, 38, 40, 42, 42, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, - 34, 34, 35, 35, 35, 36, 38, 38, 38, 39, 41, 41, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, - 34, 36, 37, 37, 37, 39, 41, 41, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 36, 37, 37, - 37, 39, 41, 41, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 36, 37, 37, 37, 39, 41, 41, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 34, - 34, 34, 34, 35, 35, 35, 35, 37, 38, 38, 38, 40, 41, 41, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 36, - 36, 36, 36, 38, 39, 39, 39, 40, 42, 42, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 36, 36, 36, 36, 38, - 39, 39, 39, 40, 42, 42, 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, - 33, 33, 34, 34, 34, 34, 35, 35, 35, 36, 36, 36, 36, 38, 39, 39, 39, 40, - 42, 42, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, - 34, 35, 36, 36, 36, 36, 37, 37, 37, 38, 40, 40, 40, 41, 42, 42, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 34, 35, 35, 35, 36, 37, 37, - 37, 37, 38, 38, 38, 39, 40, 40, 40, 41, 42, 42, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 33, 34, 34, 34, 34, 35, 35, 35, 36, 37, 37, 37, 37, 38, 38, - 38, 39, 40, 40, 40, 41, 42, 42, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, - 34, 34, 34, 34, 35, 35, 35, 36, 37, 37, 37, 37, 38, 38, 38, 39, 40, 40, - 40, 41, 42, 42, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 35, - 36, 36, 36, 37, 38, 38, 38, 39, 40, 40, 40, 41, 42, 42, 42, 44, 45, 45, - 34, 34, 34, 34, 34, 34, 33, 33, 33, 34, 35, 35, 35, 36, 37, 37, 37, 38, - 39, 39, 39, 41, 42, 42, 42, 44, 45, 45, 45, 46, 47, 47, 34, 34, 34, 34, - 34, 34, 33, 33, 33, 34, 35, 35, 35, 36, 37, 37, 37, 38, 39, 39, 39, 41, - 42, 42, 42, 44, 45, 45, 45, 46, 47, 47, 34, 34, 34, 34, 34, 34, 33, 33, - 33, 34, 35, 35, 35, 36, 37, 37, 37, 38, 39, 39, 39, 41, 42, 42, 42, 44, - 45, 45, 45, 46, 47, 47, 35, 34, 34, 34, 34, 34, 34, 34, 34, 35, 36, 36, - 36, 36, 37, 37, 37, 39, 41, 41, 41, 43, 45, 45, 45, 46, 47, 47, 47, 49, - 50, 50, 36, 35, 35, 35, 35, 35, 34, 34, 34, 35, 36, 36, 36, 37, 38, 38, - 38, 40, 42, 42, 42, 45, 48, 48, 48, 49, 50, 50, 50, 52, 54, 54, 36, 35, - 35, 35, 35, 35, 34, 34, 34, 35, 36, 36, 36, 37, 38, 38, 38, 40, 42, 42, - 42, 45, 48, 48, 48, 49, 50, 50, 50, 52, 54, 54, 36, 35, 35, 35, 35, 35, - 34, 34, 34, 35, 36, 36, 36, 37, 38, 38, 38, 40, 42, 42, 42, 45, 48, 48, - 48, 49, 50, 50, 50, 52, 54, 54, 37, 37, 37, 37, 37, 36, 36, 36, 36, 37, - 38, 38, 38, 38, 39, 39, 39, 41, 44, 44, 44, 46, 49, 49, 49, 51, 52, 52, - 52, 54, 56, 56, 39, 39, 38, 38, 38, 38, 37, 37, 37, 38, 39, 39, 39, 40, - 40, 40, 40, 42, 45, 45, 45, 47, 50, 50, 50, 52, 54, 54, 54, 56, 58, 58, - 39, 39, 38, 38, 38, 38, 37, 37, 37, 38, 39, 39, 39, 40, 40, 40, 40, 42, - 45, 45, 45, 47, 50, 50, 50, 52, 54, 54, 54, 56, 58, 58, 39, 39, 38, 38, - 38, 38, 37, 37, 37, 38, 39, 39, 39, 40, 40, 40, 40, 42, 45, 45, 45, 47, - 50, 50, 50, 52, 54, 54, 54, 56, 58, 58, 41, 41, 40, 40, 40, 39, 39, 39, - 39, 40, 40, 40, 40, 41, 41, 41, 41, 44, 46, 46, 46, 49, 52, 52, 52, 54, - 56, 56, 56, 58, 60, 60, 44, 43, 42, 42, 42, 41, 41, 41, 41, 41, 42, 42, - 42, 42, 42, 42, 42, 45, 47, 47, 47, 50, 54, 54, 54, 56, 58, 58, 58, 60, - 63, 63, 44, 43, 42, 42, 42, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, - 42, 45, 47, 47, 47, 50, 54, 54, 54, 56, 58, 58, 58, 60, 63, 63, - /* Size 4x8 */ - 31, 32, 34, 39, 32, 32, 34, 38, 32, 33, 34, 38, 32, 33, 36, 40, 33, 34, - 38, 42, 34, 36, 41, 47, 37, 38, 44, 52, 40, 40, 46, 56, - /* Size 8x4 */ - 31, 32, 32, 32, 33, 34, 37, 40, 32, 32, 33, 33, 34, 36, 38, 40, 34, 34, - 34, 36, 38, 41, 44, 46, 39, 38, 38, 40, 42, 47, 52, 56, - /* Size 8x16 */ - 32, 31, 31, 32, 32, 36, 36, 44, 31, 32, 32, 32, 32, 35, 35, 42, 31, 32, - 32, 32, 32, 35, 35, 42, 31, 32, 32, 33, 33, 34, 34, 41, 31, 32, 32, 33, - 33, 34, 34, 41, 32, 32, 32, 34, 34, 36, 36, 42, 32, 32, 32, 34, 34, 36, - 36, 42, 32, 33, 33, 35, 35, 38, 38, 42, 32, 33, 33, 35, 35, 38, 38, 42, - 34, 34, 34, 37, 37, 42, 42, 48, 34, 34, 34, 37, 37, 42, 42, 48, 36, 34, - 34, 38, 38, 48, 48, 54, 36, 34, 34, 38, 38, 48, 48, 54, 39, 37, 37, 40, - 40, 50, 50, 58, 39, 37, 37, 40, 40, 50, 50, 58, 44, 41, 41, 43, 43, 53, - 53, 63, - /* Size 16x8 */ - 32, 31, 31, 31, 31, 32, 32, 32, 32, 34, 34, 36, 36, 39, 39, 44, 31, 32, - 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 34, 37, 37, 41, 31, 32, 32, 32, - 32, 32, 32, 33, 33, 34, 34, 34, 34, 37, 37, 41, 32, 32, 32, 33, 33, 34, - 34, 35, 35, 37, 37, 38, 38, 40, 40, 43, 32, 32, 32, 33, 33, 34, 34, 35, - 35, 37, 37, 38, 38, 40, 40, 43, 36, 35, 35, 34, 34, 36, 36, 38, 38, 42, - 42, 48, 48, 50, 50, 53, 36, 35, 35, 34, 34, 36, 36, 38, 38, 42, 42, 48, - 48, 50, 50, 53, 44, 42, 42, 41, 41, 42, 42, 42, 42, 48, 48, 54, 54, 58, - 58, 63, - /* Size 16x32 */ - 32, 31, 31, 31, 31, 32, 32, 32, 32, 34, 36, 36, 36, 39, 44, 44, 31, 31, - 31, 31, 31, 32, 32, 32, 32, 34, 35, 35, 35, 39, 43, 43, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 34, 35, 35, 35, 38, 42, 42, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 34, 35, 35, 35, 38, 42, 42, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 34, 35, 35, 35, 38, 42, 42, 31, 32, 32, 32, 32, 32, 32, 32, 32, 34, - 35, 35, 35, 38, 41, 41, 31, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, - 34, 37, 41, 41, 31, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 37, - 41, 41, 31, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 37, 41, 41, - 31, 32, 32, 32, 32, 33, 33, 33, 33, 34, 35, 35, 35, 38, 41, 41, 32, 32, - 32, 32, 32, 33, 34, 34, 34, 35, 36, 36, 36, 39, 42, 42, 32, 32, 32, 32, - 32, 33, 34, 34, 34, 35, 36, 36, 36, 39, 42, 42, 32, 32, 32, 32, 32, 33, - 34, 34, 34, 35, 36, 36, 36, 39, 42, 42, 32, 32, 32, 32, 32, 33, 34, 34, - 34, 36, 37, 37, 37, 40, 42, 42, 32, 32, 33, 33, 33, 34, 35, 35, 35, 37, - 38, 38, 38, 40, 42, 42, 32, 32, 33, 33, 33, 34, 35, 35, 35, 37, 38, 38, - 38, 40, 42, 42, 32, 32, 33, 33, 33, 34, 35, 35, 35, 37, 38, 38, 38, 40, - 42, 42, 33, 33, 33, 33, 33, 34, 36, 36, 36, 38, 40, 40, 40, 42, 45, 45, - 34, 34, 34, 34, 34, 35, 37, 37, 37, 39, 42, 42, 42, 45, 48, 48, 34, 34, - 34, 34, 34, 35, 37, 37, 37, 39, 42, 42, 42, 45, 48, 48, 34, 34, 34, 34, - 34, 35, 37, 37, 37, 39, 42, 42, 42, 45, 48, 48, 35, 34, 34, 34, 34, 36, - 37, 37, 37, 41, 45, 45, 45, 47, 50, 50, 36, 35, 34, 34, 34, 36, 38, 38, - 38, 43, 48, 48, 48, 51, 54, 54, 36, 35, 34, 34, 34, 36, 38, 38, 38, 43, - 48, 48, 48, 51, 54, 54, 36, 35, 34, 34, 34, 36, 38, 38, 38, 43, 48, 48, - 48, 51, 54, 54, 37, 37, 36, 36, 36, 38, 39, 39, 39, 44, 49, 49, 49, 52, - 56, 56, 39, 38, 37, 37, 37, 39, 40, 40, 40, 45, 50, 50, 50, 54, 58, 58, - 39, 38, 37, 37, 37, 39, 40, 40, 40, 45, 50, 50, 50, 54, 58, 58, 39, 38, - 37, 37, 37, 39, 40, 40, 40, 45, 50, 50, 50, 54, 58, 58, 41, 40, 39, 39, - 39, 40, 42, 42, 42, 46, 52, 52, 52, 56, 60, 60, 44, 42, 41, 41, 41, 42, - 43, 43, 43, 48, 53, 53, 53, 58, 63, 63, 44, 42, 41, 41, 41, 42, 43, 43, - 43, 48, 53, 53, 53, 58, 63, 63, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, - 34, 34, 34, 35, 36, 36, 36, 37, 39, 39, 39, 41, 44, 44, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 34, - 35, 35, 35, 37, 38, 38, 38, 40, 42, 42, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 36, - 37, 37, 37, 39, 41, 41, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 36, 37, 37, 37, 39, - 41, 41, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 34, 34, 34, 34, 34, 34, 34, 36, 37, 37, 37, 39, 41, 41, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, - 35, 36, 36, 36, 36, 38, 39, 39, 39, 40, 42, 42, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 36, 37, 37, 37, 37, 38, 38, - 38, 39, 40, 40, 40, 42, 43, 43, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 34, 34, 34, 34, 35, 35, 35, 36, 37, 37, 37, 37, 38, 38, 38, 39, 40, 40, - 40, 42, 43, 43, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, - 35, 35, 35, 36, 37, 37, 37, 37, 38, 38, 38, 39, 40, 40, 40, 42, 43, 43, - 34, 34, 34, 34, 34, 34, 33, 33, 33, 34, 35, 35, 35, 36, 37, 37, 37, 38, - 39, 39, 39, 41, 43, 43, 43, 44, 45, 45, 45, 46, 48, 48, 36, 35, 35, 35, - 35, 35, 34, 34, 34, 35, 36, 36, 36, 37, 38, 38, 38, 40, 42, 42, 42, 45, - 48, 48, 48, 49, 50, 50, 50, 52, 53, 53, 36, 35, 35, 35, 35, 35, 34, 34, - 34, 35, 36, 36, 36, 37, 38, 38, 38, 40, 42, 42, 42, 45, 48, 48, 48, 49, - 50, 50, 50, 52, 53, 53, 36, 35, 35, 35, 35, 35, 34, 34, 34, 35, 36, 36, - 36, 37, 38, 38, 38, 40, 42, 42, 42, 45, 48, 48, 48, 49, 50, 50, 50, 52, - 53, 53, 39, 39, 38, 38, 38, 38, 37, 37, 37, 38, 39, 39, 39, 40, 40, 40, - 40, 42, 45, 45, 45, 47, 51, 51, 51, 52, 54, 54, 54, 56, 58, 58, 44, 43, - 42, 42, 42, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, 42, 45, 48, 48, - 48, 50, 54, 54, 54, 56, 58, 58, 58, 60, 63, 63, 44, 43, 42, 42, 42, 41, - 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, 42, 45, 48, 48, 48, 50, 54, 54, - 54, 56, 58, 58, 58, 60, 63, 63, - /* Size 4x16 */ - 31, 32, 34, 39, 32, 32, 34, 38, 32, 32, 34, 38, 32, 32, 33, 37, 32, 32, - 33, 37, 32, 33, 35, 39, 32, 33, 35, 39, 32, 34, 37, 40, 32, 34, 37, 40, - 34, 35, 39, 45, 34, 35, 39, 45, 35, 36, 43, 51, 35, 36, 43, 51, 38, 39, - 45, 54, 38, 39, 45, 54, 42, 42, 48, 58, - /* Size 16x4 */ - 31, 32, 32, 32, 32, 32, 32, 32, 32, 34, 34, 35, 35, 38, 38, 42, 32, 32, - 32, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 39, 39, 42, 34, 34, 34, 33, - 33, 35, 35, 37, 37, 39, 39, 43, 43, 45, 45, 48, 39, 38, 38, 37, 37, 39, - 39, 40, 40, 45, 45, 51, 51, 54, 54, 58, - /* Size 8x32 */ - 32, 31, 31, 32, 32, 36, 36, 44, 31, 31, 31, 32, 32, 35, 35, 43, 31, 32, - 32, 32, 32, 35, 35, 42, 31, 32, 32, 32, 32, 35, 35, 42, 31, 32, 32, 32, - 32, 35, 35, 42, 31, 32, 32, 32, 32, 35, 35, 41, 31, 32, 32, 33, 33, 34, - 34, 41, 31, 32, 32, 33, 33, 34, 34, 41, 31, 32, 32, 33, 33, 34, 34, 41, - 31, 32, 32, 33, 33, 35, 35, 41, 32, 32, 32, 34, 34, 36, 36, 42, 32, 32, - 32, 34, 34, 36, 36, 42, 32, 32, 32, 34, 34, 36, 36, 42, 32, 32, 32, 34, - 34, 37, 37, 42, 32, 33, 33, 35, 35, 38, 38, 42, 32, 33, 33, 35, 35, 38, - 38, 42, 32, 33, 33, 35, 35, 38, 38, 42, 33, 33, 33, 36, 36, 40, 40, 45, - 34, 34, 34, 37, 37, 42, 42, 48, 34, 34, 34, 37, 37, 42, 42, 48, 34, 34, - 34, 37, 37, 42, 42, 48, 35, 34, 34, 37, 37, 45, 45, 50, 36, 34, 34, 38, - 38, 48, 48, 54, 36, 34, 34, 38, 38, 48, 48, 54, 36, 34, 34, 38, 38, 48, - 48, 54, 37, 36, 36, 39, 39, 49, 49, 56, 39, 37, 37, 40, 40, 50, 50, 58, - 39, 37, 37, 40, 40, 50, 50, 58, 39, 37, 37, 40, 40, 50, 50, 58, 41, 39, - 39, 42, 42, 52, 52, 60, 44, 41, 41, 43, 43, 53, 53, 63, 44, 41, 41, 43, - 43, 53, 53, 63, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, - 34, 34, 34, 35, 36, 36, 36, 37, 39, 39, 39, 41, 44, 44, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, - 34, 34, 34, 36, 37, 37, 37, 39, 41, 41, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 36, - 37, 37, 37, 39, 41, 41, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, - 34, 34, 35, 35, 35, 36, 37, 37, 37, 37, 38, 38, 38, 39, 40, 40, 40, 42, - 43, 43, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, - 35, 36, 37, 37, 37, 37, 38, 38, 38, 39, 40, 40, 40, 42, 43, 43, 36, 35, - 35, 35, 35, 35, 34, 34, 34, 35, 36, 36, 36, 37, 38, 38, 38, 40, 42, 42, - 42, 45, 48, 48, 48, 49, 50, 50, 50, 52, 53, 53, 36, 35, 35, 35, 35, 35, - 34, 34, 34, 35, 36, 36, 36, 37, 38, 38, 38, 40, 42, 42, 42, 45, 48, 48, - 48, 49, 50, 50, 50, 52, 53, 53, 44, 43, 42, 42, 42, 41, 41, 41, 41, 41, - 42, 42, 42, 42, 42, 42, 42, 45, 48, 48, 48, 50, 54, 54, 54, 56, 58, 58, - 58, 60, 63, 63 }, - { /* Chroma */ - /* Size 4x4 */ - 31, 34, 42, 47, 34, 39, 45, 46, 42, 45, 48, 49, 47, 46, 49, 54, - /* Size 8x8 */ - 31, 31, 32, 35, 39, 45, 48, 48, 31, 31, 33, 37, 41, 44, 46, 46, 32, 33, - 35, 39, 42, 45, 46, 45, 35, 37, 39, 43, 45, 47, 47, 46, 39, 41, 42, 45, - 47, 48, 48, 47, 45, 44, 45, 47, 48, 50, 51, 51, 48, 46, 46, 47, 48, 51, - 53, 54, 48, 46, 45, 46, 47, 51, 54, 56, - /* Size 16x16 */ - 32, 31, 31, 30, 30, 33, 33, 36, 36, 41, 41, 49, 49, 48, 48, 49, 31, 31, - 31, 31, 31, 34, 34, 38, 38, 42, 42, 47, 47, 47, 47, 47, 31, 31, 31, 31, - 31, 34, 34, 38, 38, 42, 42, 47, 47, 47, 47, 47, 30, 31, 31, 32, 32, 35, - 35, 40, 40, 42, 42, 46, 46, 45, 45, 45, 30, 31, 31, 32, 32, 35, 35, 40, - 40, 42, 42, 46, 46, 45, 45, 45, 33, 34, 34, 35, 35, 39, 39, 43, 43, 45, - 45, 47, 47, 46, 46, 45, 33, 34, 34, 35, 35, 39, 39, 43, 43, 45, 45, 47, - 47, 46, 46, 45, 36, 38, 38, 40, 40, 43, 43, 47, 47, 47, 47, 48, 48, 46, - 46, 45, 36, 38, 38, 40, 40, 43, 43, 47, 47, 47, 47, 48, 48, 46, 46, 45, - 41, 42, 42, 42, 42, 45, 45, 47, 47, 48, 48, 50, 50, 49, 49, 49, 41, 42, - 42, 42, 42, 45, 45, 47, 47, 48, 48, 50, 50, 49, 49, 49, 49, 47, 47, 46, - 46, 47, 47, 48, 48, 50, 50, 53, 53, 53, 53, 53, 49, 47, 47, 46, 46, 47, - 47, 48, 48, 50, 50, 53, 53, 53, 53, 53, 48, 47, 47, 45, 45, 46, 46, 46, - 46, 49, 49, 53, 53, 54, 54, 55, 48, 47, 47, 45, 45, 46, 46, 46, 46, 49, - 49, 53, 53, 54, 54, 55, 49, 47, 47, 45, 45, 45, 45, 45, 45, 49, 49, 53, - 53, 55, 55, 58, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 31, 30, 30, 30, 32, 33, 33, 33, 35, 36, 36, 36, 39, - 41, 41, 41, 45, 49, 49, 49, 49, 48, 48, 48, 49, 49, 49, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 32, 34, 34, 34, 35, 37, 37, 37, 39, 42, 42, 42, 45, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 33, 34, 34, 34, 36, 38, 38, 38, 40, 42, 42, 42, 45, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 34, 34, - 34, 36, 38, 38, 38, 40, 42, 42, 42, 45, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 34, 34, 34, 36, 38, 38, - 38, 40, 42, 42, 42, 45, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 33, 35, 35, 35, 37, 39, 39, 39, 41, 42, 42, - 42, 44, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 30, 31, 31, 31, 31, 31, - 32, 32, 32, 33, 35, 35, 35, 37, 40, 40, 40, 41, 42, 42, 42, 44, 46, 46, - 46, 46, 45, 45, 45, 45, 45, 45, 30, 31, 31, 31, 31, 31, 32, 32, 32, 33, - 35, 35, 35, 37, 40, 40, 40, 41, 42, 42, 42, 44, 46, 46, 46, 46, 45, 45, - 45, 45, 45, 45, 30, 31, 31, 31, 31, 31, 32, 32, 32, 33, 35, 35, 35, 37, - 40, 40, 40, 41, 42, 42, 42, 44, 46, 46, 46, 46, 45, 45, 45, 45, 45, 45, - 32, 32, 33, 33, 33, 33, 33, 33, 33, 35, 37, 37, 37, 39, 41, 41, 41, 42, - 43, 43, 43, 45, 47, 47, 47, 46, 46, 46, 46, 45, 45, 45, 33, 34, 34, 34, - 34, 35, 35, 35, 35, 37, 39, 39, 39, 41, 43, 43, 43, 44, 45, 45, 45, 46, - 47, 47, 47, 47, 46, 46, 46, 46, 45, 45, 33, 34, 34, 34, 34, 35, 35, 35, - 35, 37, 39, 39, 39, 41, 43, 43, 43, 44, 45, 45, 45, 46, 47, 47, 47, 47, - 46, 46, 46, 46, 45, 45, 33, 34, 34, 34, 34, 35, 35, 35, 35, 37, 39, 39, - 39, 41, 43, 43, 43, 44, 45, 45, 45, 46, 47, 47, 47, 47, 46, 46, 46, 46, - 45, 45, 35, 35, 36, 36, 36, 37, 37, 37, 37, 39, 41, 41, 41, 43, 45, 45, - 45, 45, 46, 46, 46, 47, 47, 47, 47, 47, 46, 46, 46, 46, 45, 45, 36, 37, - 38, 38, 38, 39, 40, 40, 40, 41, 43, 43, 43, 45, 47, 47, 47, 47, 47, 47, - 47, 47, 48, 48, 48, 47, 46, 46, 46, 46, 45, 45, 36, 37, 38, 38, 38, 39, - 40, 40, 40, 41, 43, 43, 43, 45, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, - 48, 47, 46, 46, 46, 46, 45, 45, 36, 37, 38, 38, 38, 39, 40, 40, 40, 41, - 43, 43, 43, 45, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 46, 46, - 46, 46, 45, 45, 39, 39, 40, 40, 40, 41, 41, 41, 41, 42, 44, 44, 44, 45, - 47, 47, 47, 47, 48, 48, 48, 48, 49, 49, 49, 48, 48, 48, 48, 47, 47, 47, - 41, 42, 42, 42, 42, 42, 42, 42, 42, 43, 45, 45, 45, 46, 47, 47, 47, 48, - 48, 48, 48, 49, 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 41, 42, 42, 42, - 42, 42, 42, 42, 42, 43, 45, 45, 45, 46, 47, 47, 47, 48, 48, 48, 48, 49, - 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 41, 42, 42, 42, 42, 42, 42, 42, - 42, 43, 45, 45, 45, 46, 47, 47, 47, 48, 48, 48, 48, 49, 50, 50, 50, 50, - 49, 49, 49, 49, 49, 49, 45, 45, 45, 45, 45, 44, 44, 44, 44, 45, 46, 46, - 46, 47, 47, 47, 47, 48, 49, 49, 49, 50, 51, 51, 51, 51, 51, 51, 51, 51, - 51, 51, 49, 48, 47, 47, 47, 47, 46, 46, 46, 47, 47, 47, 47, 47, 48, 48, - 48, 49, 50, 50, 50, 51, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 49, 48, - 47, 47, 47, 47, 46, 46, 46, 47, 47, 47, 47, 47, 48, 48, 48, 49, 50, 50, - 50, 51, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 49, 48, 47, 47, 47, 47, - 46, 46, 46, 47, 47, 47, 47, 47, 48, 48, 48, 49, 50, 50, 50, 51, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 49, 48, 47, 47, 47, 46, 46, 46, 46, 46, - 47, 47, 47, 47, 47, 47, 47, 48, 50, 50, 50, 51, 53, 53, 53, 53, 53, 53, - 53, 54, 54, 54, 48, 48, 47, 47, 47, 46, 45, 45, 45, 46, 46, 46, 46, 46, - 46, 46, 46, 48, 49, 49, 49, 51, 53, 53, 53, 53, 54, 54, 54, 55, 55, 55, - 48, 48, 47, 47, 47, 46, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, 48, - 49, 49, 49, 51, 53, 53, 53, 53, 54, 54, 54, 55, 55, 55, 48, 48, 47, 47, - 47, 46, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, 48, 49, 49, 49, 51, - 53, 53, 53, 53, 54, 54, 54, 55, 55, 55, 49, 48, 47, 47, 47, 46, 45, 45, - 45, 45, 46, 46, 46, 46, 46, 46, 46, 47, 49, 49, 49, 51, 53, 53, 53, 54, - 55, 55, 55, 56, 57, 57, 49, 48, 47, 47, 47, 46, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 47, 49, 49, 49, 51, 53, 53, 53, 54, 55, 55, 55, 57, - 58, 58, 49, 48, 47, 47, 47, 46, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, - 45, 47, 49, 49, 49, 51, 53, 53, 53, 54, 55, 55, 55, 57, 58, 58, - /* Size 4x8 */ - 31, 34, 42, 48, 31, 35, 42, 46, 33, 37, 44, 46, 36, 41, 46, 46, 40, 44, - 48, 48, 45, 46, 49, 51, 47, 47, 50, 54, 47, 46, 49, 55, - /* Size 8x4 */ - 31, 31, 33, 36, 40, 45, 47, 47, 34, 35, 37, 41, 44, 46, 47, 46, 42, 42, - 44, 46, 48, 49, 50, 49, 48, 46, 46, 46, 48, 51, 54, 55, - /* Size 8x16 */ - 32, 31, 31, 37, 37, 48, 48, 49, 31, 31, 31, 38, 38, 47, 47, 47, 31, 31, - 31, 38, 38, 47, 47, 47, 30, 32, 32, 40, 40, 46, 46, 45, 30, 32, 32, 40, - 40, 46, 46, 45, 33, 36, 36, 43, 43, 47, 47, 46, 33, 36, 36, 43, 43, 47, - 47, 46, 37, 40, 40, 47, 47, 47, 47, 45, 37, 40, 40, 47, 47, 47, 47, 45, - 42, 43, 43, 47, 47, 50, 50, 49, 42, 43, 43, 47, 47, 50, 50, 49, 49, 46, - 46, 48, 48, 53, 53, 53, 49, 46, 46, 48, 48, 53, 53, 53, 48, 46, 46, 47, - 47, 53, 53, 56, 48, 46, 46, 47, 47, 53, 53, 56, 49, 45, 45, 46, 46, 53, - 53, 58, - /* Size 16x8 */ - 32, 31, 31, 30, 30, 33, 33, 37, 37, 42, 42, 49, 49, 48, 48, 49, 31, 31, - 31, 32, 32, 36, 36, 40, 40, 43, 43, 46, 46, 46, 46, 45, 31, 31, 31, 32, - 32, 36, 36, 40, 40, 43, 43, 46, 46, 46, 46, 45, 37, 38, 38, 40, 40, 43, - 43, 47, 47, 47, 47, 48, 48, 47, 47, 46, 37, 38, 38, 40, 40, 43, 43, 47, - 47, 47, 47, 48, 48, 47, 47, 46, 48, 47, 47, 46, 46, 47, 47, 47, 47, 50, - 50, 53, 53, 53, 53, 53, 48, 47, 47, 46, 46, 47, 47, 47, 47, 50, 50, 53, - 53, 53, 53, 53, 49, 47, 47, 45, 45, 46, 46, 45, 45, 49, 49, 53, 53, 56, - 56, 58, - /* Size 16x32 */ - 32, 31, 31, 31, 31, 33, 37, 37, 37, 42, 48, 48, 48, 48, 49, 49, 31, 31, - 31, 31, 31, 34, 37, 37, 37, 42, 47, 47, 47, 48, 48, 48, 31, 31, 31, 31, - 31, 34, 38, 38, 38, 42, 47, 47, 47, 47, 47, 47, 31, 31, 31, 31, 31, 34, - 38, 38, 38, 42, 47, 47, 47, 47, 47, 47, 31, 31, 31, 31, 31, 34, 38, 38, - 38, 42, 47, 47, 47, 47, 47, 47, 31, 31, 32, 32, 32, 35, 39, 39, 39, 42, - 46, 46, 46, 46, 46, 46, 30, 31, 32, 32, 32, 35, 40, 40, 40, 42, 46, 46, - 46, 45, 45, 45, 30, 31, 32, 32, 32, 35, 40, 40, 40, 42, 46, 46, 46, 45, - 45, 45, 30, 31, 32, 32, 32, 35, 40, 40, 40, 42, 46, 46, 46, 45, 45, 45, - 32, 33, 34, 34, 34, 37, 41, 41, 41, 44, 46, 46, 46, 46, 45, 45, 33, 34, - 36, 36, 36, 39, 43, 43, 43, 45, 47, 47, 47, 46, 46, 46, 33, 34, 36, 36, - 36, 39, 43, 43, 43, 45, 47, 47, 47, 46, 46, 46, 33, 34, 36, 36, 36, 39, - 43, 43, 43, 45, 47, 47, 47, 46, 46, 46, 35, 36, 38, 38, 38, 41, 45, 45, - 45, 46, 47, 47, 47, 46, 45, 45, 37, 38, 40, 40, 40, 43, 47, 47, 47, 47, - 47, 47, 47, 46, 45, 45, 37, 38, 40, 40, 40, 43, 47, 47, 47, 47, 47, 47, - 47, 46, 45, 45, 37, 38, 40, 40, 40, 43, 47, 47, 47, 47, 47, 47, 47, 46, - 45, 45, 39, 40, 41, 41, 41, 44, 47, 47, 47, 48, 49, 49, 49, 48, 47, 47, - 42, 42, 43, 43, 43, 45, 47, 47, 47, 48, 50, 50, 50, 50, 49, 49, 42, 42, - 43, 43, 43, 45, 47, 47, 47, 48, 50, 50, 50, 50, 49, 49, 42, 42, 43, 43, - 43, 45, 47, 47, 47, 48, 50, 50, 50, 50, 49, 49, 45, 45, 44, 44, 44, 46, - 47, 47, 47, 49, 51, 51, 51, 51, 51, 51, 49, 48, 46, 46, 46, 47, 48, 48, - 48, 50, 53, 53, 53, 53, 53, 53, 49, 48, 46, 46, 46, 47, 48, 48, 48, 50, - 53, 53, 53, 53, 53, 53, 49, 48, 46, 46, 46, 47, 48, 48, 48, 50, 53, 53, - 53, 53, 53, 53, 48, 47, 46, 46, 46, 47, 47, 47, 47, 50, 53, 53, 53, 54, - 54, 54, 48, 47, 46, 46, 46, 46, 47, 47, 47, 50, 53, 53, 53, 54, 56, 56, - 48, 47, 46, 46, 46, 46, 47, 47, 47, 50, 53, 53, 53, 54, 56, 56, 48, 47, - 46, 46, 46, 46, 47, 47, 47, 50, 53, 53, 53, 54, 56, 56, 48, 47, 45, 45, - 45, 46, 46, 46, 46, 49, 53, 53, 53, 55, 57, 57, 49, 47, 45, 45, 45, 45, - 46, 46, 46, 49, 53, 53, 53, 56, 58, 58, 49, 47, 45, 45, 45, 45, 46, 46, - 46, 49, 53, 53, 53, 56, 58, 58, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 31, 30, 30, 30, 32, 33, 33, 33, 35, 37, 37, 37, 39, - 42, 42, 42, 45, 49, 49, 49, 48, 48, 48, 48, 48, 49, 49, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 33, 34, 34, 34, 36, 38, 38, 38, 40, 42, 42, 42, 45, - 48, 48, 48, 47, 47, 47, 47, 47, 47, 47, 31, 31, 31, 31, 31, 32, 32, 32, - 32, 34, 36, 36, 36, 38, 40, 40, 40, 41, 43, 43, 43, 44, 46, 46, 46, 46, - 46, 46, 46, 45, 45, 45, 31, 31, 31, 31, 31, 32, 32, 32, 32, 34, 36, 36, - 36, 38, 40, 40, 40, 41, 43, 43, 43, 44, 46, 46, 46, 46, 46, 46, 46, 45, - 45, 45, 31, 31, 31, 31, 31, 32, 32, 32, 32, 34, 36, 36, 36, 38, 40, 40, - 40, 41, 43, 43, 43, 44, 46, 46, 46, 46, 46, 46, 46, 45, 45, 45, 33, 34, - 34, 34, 34, 35, 35, 35, 35, 37, 39, 39, 39, 41, 43, 43, 43, 44, 45, 45, - 45, 46, 47, 47, 47, 47, 46, 46, 46, 46, 45, 45, 37, 37, 38, 38, 38, 39, - 40, 40, 40, 41, 43, 43, 43, 45, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, - 48, 47, 47, 47, 47, 46, 46, 46, 37, 37, 38, 38, 38, 39, 40, 40, 40, 41, - 43, 43, 43, 45, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 47, 47, - 47, 46, 46, 46, 37, 37, 38, 38, 38, 39, 40, 40, 40, 41, 43, 43, 43, 45, - 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 47, 47, 47, 46, 46, 46, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 44, 45, 45, 45, 46, 47, 47, 47, 48, - 48, 48, 48, 49, 50, 50, 50, 50, 50, 50, 50, 49, 49, 49, 48, 47, 47, 47, - 47, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 49, 50, 50, 50, 51, - 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 48, 47, 47, 47, 47, 46, 46, 46, - 46, 46, 47, 47, 47, 47, 47, 47, 47, 49, 50, 50, 50, 51, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 48, 47, 47, 47, 47, 46, 46, 46, 46, 46, 47, 47, - 47, 47, 47, 47, 47, 49, 50, 50, 50, 51, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 48, 48, 47, 47, 47, 46, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, - 46, 48, 50, 50, 50, 51, 53, 53, 53, 54, 54, 54, 54, 55, 56, 56, 49, 48, - 47, 47, 47, 46, 45, 45, 45, 45, 46, 46, 46, 45, 45, 45, 45, 47, 49, 49, - 49, 51, 53, 53, 53, 54, 56, 56, 56, 57, 58, 58, 49, 48, 47, 47, 47, 46, - 45, 45, 45, 45, 46, 46, 46, 45, 45, 45, 45, 47, 49, 49, 49, 51, 53, 53, - 53, 54, 56, 56, 56, 57, 58, 58, - /* Size 4x16 */ - 31, 33, 42, 48, 31, 34, 42, 47, 31, 34, 42, 47, 31, 35, 42, 45, 31, 35, - 42, 45, 34, 39, 45, 46, 34, 39, 45, 46, 38, 43, 47, 46, 38, 43, 47, 46, - 42, 45, 48, 50, 42, 45, 48, 50, 48, 47, 50, 53, 48, 47, 50, 53, 47, 46, - 50, 54, 47, 46, 50, 54, 47, 45, 49, 56, - /* Size 16x4 */ - 31, 31, 31, 31, 31, 34, 34, 38, 38, 42, 42, 48, 48, 47, 47, 47, 33, 34, - 34, 35, 35, 39, 39, 43, 43, 45, 45, 47, 47, 46, 46, 45, 42, 42, 42, 42, - 42, 45, 45, 47, 47, 48, 48, 50, 50, 50, 50, 49, 48, 47, 47, 45, 45, 46, - 46, 46, 46, 50, 50, 53, 53, 54, 54, 56, - /* Size 8x32 */ - 32, 31, 31, 37, 37, 48, 48, 49, 31, 31, 31, 37, 37, 47, 47, 48, 31, 31, - 31, 38, 38, 47, 47, 47, 31, 31, 31, 38, 38, 47, 47, 47, 31, 31, 31, 38, - 38, 47, 47, 47, 31, 32, 32, 39, 39, 46, 46, 46, 30, 32, 32, 40, 40, 46, - 46, 45, 30, 32, 32, 40, 40, 46, 46, 45, 30, 32, 32, 40, 40, 46, 46, 45, - 32, 34, 34, 41, 41, 46, 46, 45, 33, 36, 36, 43, 43, 47, 47, 46, 33, 36, - 36, 43, 43, 47, 47, 46, 33, 36, 36, 43, 43, 47, 47, 46, 35, 38, 38, 45, - 45, 47, 47, 45, 37, 40, 40, 47, 47, 47, 47, 45, 37, 40, 40, 47, 47, 47, - 47, 45, 37, 40, 40, 47, 47, 47, 47, 45, 39, 41, 41, 47, 47, 49, 49, 47, - 42, 43, 43, 47, 47, 50, 50, 49, 42, 43, 43, 47, 47, 50, 50, 49, 42, 43, - 43, 47, 47, 50, 50, 49, 45, 44, 44, 47, 47, 51, 51, 51, 49, 46, 46, 48, - 48, 53, 53, 53, 49, 46, 46, 48, 48, 53, 53, 53, 49, 46, 46, 48, 48, 53, - 53, 53, 48, 46, 46, 47, 47, 53, 53, 54, 48, 46, 46, 47, 47, 53, 53, 56, - 48, 46, 46, 47, 47, 53, 53, 56, 48, 46, 46, 47, 47, 53, 53, 56, 48, 45, - 45, 46, 46, 53, 53, 57, 49, 45, 45, 46, 46, 53, 53, 58, 49, 45, 45, 46, - 46, 53, 53, 58, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 31, 30, 30, 30, 32, 33, 33, 33, 35, 37, 37, 37, 39, - 42, 42, 42, 45, 49, 49, 49, 48, 48, 48, 48, 48, 49, 49, 31, 31, 31, 31, - 31, 32, 32, 32, 32, 34, 36, 36, 36, 38, 40, 40, 40, 41, 43, 43, 43, 44, - 46, 46, 46, 46, 46, 46, 46, 45, 45, 45, 31, 31, 31, 31, 31, 32, 32, 32, - 32, 34, 36, 36, 36, 38, 40, 40, 40, 41, 43, 43, 43, 44, 46, 46, 46, 46, - 46, 46, 46, 45, 45, 45, 37, 37, 38, 38, 38, 39, 40, 40, 40, 41, 43, 43, - 43, 45, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 47, 47, 47, 46, - 46, 46, 37, 37, 38, 38, 38, 39, 40, 40, 40, 41, 43, 43, 43, 45, 47, 47, - 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 47, 47, 47, 46, 46, 46, 48, 47, - 47, 47, 47, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 49, 50, 50, - 50, 51, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 48, 47, 47, 47, 47, 46, - 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 49, 50, 50, 50, 51, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 49, 48, 47, 47, 47, 46, 45, 45, 45, 45, - 46, 46, 46, 45, 45, 45, 45, 47, 49, 49, 49, 51, 53, 53, 53, 54, 56, 56, - 56, 57, 58, 58 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 32, 32, 35, 32, 32, 33, 35, 32, 33, 35, 38, 35, 35, 38, 46, - /* Size 8x8 */ - 31, 31, 31, 32, 32, 32, 34, 35, 31, 32, 32, 32, 32, 33, 34, 35, 31, 32, - 32, 32, 32, 33, 33, 34, 32, 32, 32, 33, 34, 34, 35, 36, 32, 32, 32, 34, - 35, 35, 36, 38, 32, 33, 33, 34, 35, 36, 38, 40, 34, 34, 33, 35, 36, 38, - 39, 42, 35, 35, 34, 36, 38, 40, 42, 48, - /* Size 16x16 */ - 32, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 33, 34, 34, 36, 36, 31, 31, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 33, 33, 34, 34, 34, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 34, 34, 34, 31, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34, - 35, 35, 36, 36, 31, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 35, 35, 36, - 36, 36, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 35, 36, 36, 37, 37, - 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 36, 37, 37, 38, 38, 32, 32, - 32, 32, 32, 32, 33, 34, 34, 35, 35, 36, 37, 37, 38, 38, 33, 33, 33, 33, - 33, 33, 34, 35, 35, 36, 36, 38, 39, 40, 42, 42, 34, 34, 34, 34, 33, 33, - 35, 35, 36, 37, 37, 39, 39, 41, 42, 42, 34, 34, 34, 34, 34, 34, 35, 36, - 36, 37, 37, 40, 41, 42, 45, 45, 36, 35, 35, 35, 34, 34, 36, 36, 37, 38, - 38, 42, 42, 45, 48, 48, 36, 35, 35, 35, 34, 34, 36, 36, 37, 38, 38, 42, - 42, 45, 48, 48, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, - 32, 32, 32, 32, 33, 34, 34, 34, 34, 35, 36, 36, 36, 37, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 34, 34, 34, 34, 35, 35, 35, 35, 37, 31, 31, 31, 31, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, - 34, 35, 35, 35, 35, 36, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 34, 35, 35, 35, - 35, 36, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, 36, 31, 31, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, 36, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, - 34, 34, 34, 34, 35, 35, 35, 36, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34, 34, - 34, 34, 34, 35, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, - 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, - 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, - 34, 35, 35, 35, 35, 36, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 35, 35, 35, 35, 36, 36, 36, - 36, 37, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, - 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 36, 36, 36, 36, 36, 37, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, - 34, 34, 35, 35, 35, 35, 36, 36, 36, 36, 36, 37, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, - 35, 35, 36, 36, 36, 36, 36, 37, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, 36, 36, 36, 36, 37, - 37, 37, 37, 38, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, - 34, 34, 34, 35, 35, 35, 35, 35, 36, 36, 36, 36, 37, 37, 38, 38, 38, 39, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 34, 35, - 35, 35, 35, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, 36, - 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, 36, 36, 37, 37, 37, - 37, 38, 38, 38, 38, 39, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 34, 34, 34, 34, 35, 35, 36, 36, 36, 36, 37, 38, 38, 38, 38, 39, 40, 40, - 40, 41, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 35, 35, 35, - 35, 36, 36, 36, 36, 37, 38, 39, 39, 39, 40, 41, 42, 42, 42, 42, 34, 34, - 34, 34, 34, 34, 34, 33, 33, 33, 33, 34, 35, 35, 35, 35, 36, 36, 37, 37, - 37, 38, 39, 39, 39, 39, 41, 42, 42, 42, 42, 43, 34, 34, 34, 34, 34, 34, - 34, 33, 33, 33, 33, 34, 35, 35, 35, 35, 36, 36, 37, 37, 37, 38, 39, 39, - 39, 39, 41, 42, 42, 42, 42, 43, 34, 34, 34, 34, 34, 34, 34, 33, 33, 33, - 33, 34, 35, 35, 35, 35, 36, 36, 37, 37, 37, 38, 39, 39, 39, 39, 41, 42, - 42, 42, 42, 43, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 36, - 36, 36, 36, 37, 37, 37, 37, 38, 40, 41, 41, 41, 42, 44, 45, 45, 45, 45, - 35, 35, 35, 35, 35, 35, 34, 34, 34, 34, 34, 35, 36, 36, 36, 36, 37, 37, - 38, 38, 38, 39, 41, 42, 42, 42, 44, 46, 47, 47, 47, 48, 36, 35, 35, 35, - 35, 35, 35, 34, 34, 34, 34, 35, 36, 36, 36, 36, 37, 38, 38, 38, 38, 40, - 42, 42, 42, 42, 45, 47, 48, 48, 48, 49, 36, 35, 35, 35, 35, 35, 35, 34, - 34, 34, 34, 35, 36, 36, 36, 36, 37, 38, 38, 38, 38, 40, 42, 42, 42, 42, - 45, 47, 48, 48, 48, 49, 36, 35, 35, 35, 35, 35, 35, 34, 34, 34, 34, 35, - 36, 36, 36, 36, 37, 38, 38, 38, 38, 40, 42, 42, 42, 42, 45, 47, 48, 48, - 48, 49, 37, 37, 36, 36, 36, 36, 36, 35, 35, 35, 35, 36, 37, 37, 37, 37, - 38, 39, 39, 39, 39, 41, 42, 43, 43, 43, 45, 48, 49, 49, 49, 50, - /* Size 4x8 */ - 31, 31, 32, 35, 32, 32, 32, 35, 32, 32, 33, 34, 32, 32, 34, 36, 32, 33, - 35, 38, 33, 33, 36, 40, 34, 34, 37, 42, 35, 34, 38, 48, - /* Size 8x4 */ - 31, 32, 32, 32, 32, 33, 34, 35, 31, 32, 32, 32, 33, 33, 34, 34, 32, 32, - 33, 34, 35, 36, 37, 38, 35, 35, 34, 36, 38, 40, 42, 48, - /* Size 8x16 */ - 32, 31, 31, 31, 32, 32, 35, 36, 31, 32, 32, 32, 32, 32, 35, 35, 31, 32, - 32, 32, 32, 32, 35, 35, 31, 32, 32, 32, 32, 32, 34, 35, 31, 32, 32, 32, - 33, 33, 34, 34, 31, 32, 32, 32, 33, 33, 34, 34, 31, 32, 32, 33, 34, 34, - 35, 36, 32, 32, 32, 33, 34, 34, 36, 36, 32, 32, 32, 33, 34, 34, 36, 37, - 32, 32, 33, 34, 35, 35, 37, 38, 32, 32, 33, 34, 35, 35, 37, 38, 33, 33, - 33, 35, 36, 36, 40, 41, 34, 34, 34, 35, 37, 37, 41, 42, 34, 34, 34, 35, - 37, 37, 43, 44, 36, 35, 34, 36, 38, 38, 46, 48, 36, 35, 34, 36, 38, 38, - 46, 48, - /* Size 16x8 */ - 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 33, 34, 34, 36, 36, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 31, 32, 32, 32, 32, 32, - 33, 33, 33, 34, 34, 35, 35, 35, 36, 36, 32, 32, 32, 32, 33, 33, 34, 34, - 34, 35, 35, 36, 37, 37, 38, 38, 32, 32, 32, 32, 33, 33, 34, 34, 34, 35, - 35, 36, 37, 37, 38, 38, 35, 35, 35, 34, 34, 34, 35, 36, 36, 37, 37, 40, - 41, 43, 46, 46, 36, 35, 35, 35, 34, 34, 36, 36, 37, 38, 38, 41, 42, 44, - 48, 48, - /* Size 16x32 */ - 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 33, 35, 36, 36, 36, 31, 31, - 31, 31, 31, 31, 32, 32, 32, 32, 32, 33, 35, 35, 35, 35, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 33, 35, 35, 35, 35, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 35, 35, 35, 35, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 33, 35, 35, 35, 35, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 35, 35, 35, 35, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, - 34, 35, 35, 35, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 35, - 35, 35, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, - 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 31, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 31, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 33, 34, 35, 35, 35, 35, 31, 32, 32, 32, 32, 32, - 33, 33, 34, 34, 34, 34, 35, 36, 36, 36, 32, 32, 32, 32, 32, 32, 33, 34, - 34, 34, 34, 35, 36, 36, 36, 36, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, - 34, 35, 36, 36, 36, 36, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 34, 35, - 36, 36, 36, 36, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 34, 35, 36, 37, - 37, 37, 32, 32, 32, 33, 33, 33, 33, 34, 35, 35, 35, 36, 37, 38, 38, 38, - 32, 32, 32, 33, 33, 33, 34, 35, 35, 35, 35, 36, 37, 38, 38, 38, 32, 32, - 32, 33, 33, 33, 34, 35, 35, 35, 35, 36, 37, 38, 38, 38, 32, 32, 32, 33, - 33, 33, 34, 35, 35, 35, 35, 36, 37, 38, 38, 38, 32, 33, 33, 33, 33, 33, - 34, 35, 36, 36, 36, 37, 39, 40, 40, 40, 33, 33, 33, 33, 33, 33, 35, 36, - 36, 36, 36, 38, 40, 41, 41, 41, 34, 34, 34, 34, 34, 34, 35, 36, 37, 37, - 37, 39, 41, 42, 42, 42, 34, 34, 34, 34, 34, 34, 35, 36, 37, 37, 37, 39, - 41, 42, 42, 42, 34, 34, 34, 34, 34, 34, 35, 36, 37, 37, 37, 39, 41, 42, - 42, 42, 34, 34, 34, 34, 34, 34, 35, 37, 37, 37, 37, 40, 43, 44, 44, 44, - 35, 35, 34, 34, 34, 34, 36, 37, 38, 38, 38, 41, 45, 47, 47, 47, 36, 35, - 35, 34, 34, 34, 36, 37, 38, 38, 38, 42, 46, 48, 48, 48, 36, 35, 35, 34, - 34, 34, 36, 37, 38, 38, 38, 42, 46, 48, 48, 48, 36, 35, 35, 34, 34, 34, - 36, 37, 38, 38, 38, 42, 46, 48, 48, 48, 37, 36, 36, 36, 36, 36, 37, 38, - 39, 39, 39, 42, 46, 49, 49, 49, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 34, 34, 34, 34, 35, 36, 36, 36, 37, 31, 31, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, - 33, 34, 34, 34, 34, 35, 35, 35, 35, 36, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, - 34, 34, 35, 35, 35, 36, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, - 34, 36, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 36, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, - 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 36, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, - 35, 35, 35, 36, 36, 36, 36, 37, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 36, 36, 36, 36, 37, 37, - 37, 37, 37, 38, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, - 34, 34, 34, 35, 35, 35, 35, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, - 35, 35, 35, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 36, - 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 34, 34, 35, 35, 35, 35, 36, 36, 36, 36, 37, 38, 39, 39, 39, - 40, 41, 42, 42, 42, 42, 35, 35, 35, 35, 35, 35, 34, 34, 34, 34, 34, 35, - 35, 36, 36, 36, 36, 37, 37, 37, 37, 39, 40, 41, 41, 41, 43, 45, 46, 46, - 46, 46, 36, 35, 35, 35, 35, 35, 35, 35, 34, 34, 34, 35, 36, 36, 36, 36, - 37, 38, 38, 38, 38, 40, 41, 42, 42, 42, 44, 47, 48, 48, 48, 49, 36, 35, - 35, 35, 35, 35, 35, 35, 34, 34, 34, 35, 36, 36, 36, 36, 37, 38, 38, 38, - 38, 40, 41, 42, 42, 42, 44, 47, 48, 48, 48, 49, 36, 35, 35, 35, 35, 35, - 35, 35, 34, 34, 34, 35, 36, 36, 36, 36, 37, 38, 38, 38, 38, 40, 41, 42, - 42, 42, 44, 47, 48, 48, 48, 49, - /* Size 4x16 */ - 31, 31, 32, 36, 31, 32, 32, 35, 32, 32, 32, 35, 32, 32, 32, 35, 32, 32, - 33, 34, 32, 32, 33, 34, 32, 32, 34, 36, 32, 32, 34, 36, 32, 32, 34, 37, - 32, 33, 35, 38, 32, 33, 35, 38, 33, 33, 36, 41, 34, 34, 37, 42, 34, 34, - 37, 44, 35, 34, 38, 48, 35, 34, 38, 48, - /* Size 16x4 */ - 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 32, 32, 32, 32, - 33, 33, 34, 34, 34, 35, 35, 36, 37, 37, 38, 38, 36, 35, 35, 35, 34, 34, - 36, 36, 37, 38, 38, 41, 42, 44, 48, 48, - /* Size 8x32 */ - 32, 31, 31, 31, 32, 32, 35, 36, 31, 31, 31, 32, 32, 32, 35, 35, 31, 32, - 32, 32, 32, 32, 35, 35, 31, 32, 32, 32, 32, 32, 35, 35, 31, 32, 32, 32, - 32, 32, 35, 35, 31, 32, 32, 32, 32, 32, 35, 35, 31, 32, 32, 32, 32, 32, - 34, 35, 31, 32, 32, 32, 32, 32, 34, 35, 31, 32, 32, 32, 33, 33, 34, 34, - 31, 32, 32, 32, 33, 33, 34, 34, 31, 32, 32, 32, 33, 33, 34, 34, 31, 32, - 32, 33, 33, 33, 35, 35, 31, 32, 32, 33, 34, 34, 35, 36, 32, 32, 32, 33, - 34, 34, 36, 36, 32, 32, 32, 33, 34, 34, 36, 36, 32, 32, 32, 33, 34, 34, - 36, 36, 32, 32, 32, 33, 34, 34, 36, 37, 32, 32, 33, 33, 35, 35, 37, 38, - 32, 32, 33, 34, 35, 35, 37, 38, 32, 32, 33, 34, 35, 35, 37, 38, 32, 32, - 33, 34, 35, 35, 37, 38, 32, 33, 33, 34, 36, 36, 39, 40, 33, 33, 33, 35, - 36, 36, 40, 41, 34, 34, 34, 35, 37, 37, 41, 42, 34, 34, 34, 35, 37, 37, - 41, 42, 34, 34, 34, 35, 37, 37, 41, 42, 34, 34, 34, 35, 37, 37, 43, 44, - 35, 34, 34, 36, 38, 38, 45, 47, 36, 35, 34, 36, 38, 38, 46, 48, 36, 35, - 34, 36, 38, 38, 46, 48, 36, 35, 34, 36, 38, 38, 46, 48, 37, 36, 36, 37, - 39, 39, 46, 49, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 34, 34, 34, 34, 35, 36, 36, 36, 37, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, - 33, 34, 34, 34, 34, 34, 35, 35, 35, 36, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, - 34, 34, 34, 34, 34, 36, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, - 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, 35, 36, 36, 36, - 36, 37, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, - 34, 35, 35, 35, 35, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, 35, - 35, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 35, 35, 35, 35, 35, 35, - 34, 34, 34, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 39, 40, 41, - 41, 41, 43, 45, 46, 46, 46, 46, 36, 35, 35, 35, 35, 35, 35, 35, 34, 34, - 34, 35, 36, 36, 36, 36, 37, 38, 38, 38, 38, 40, 41, 42, 42, 42, 44, 47, - 48, 48, 48, 49 }, - { /* Chroma */ - /* Size 4x4 */ - 31, 32, 38, 46, 32, 34, 41, 46, 38, 41, 47, 47, 46, 46, 47, 52, - /* Size 8x8 */ - 31, 31, 30, 34, 36, 39, 42, 48, 31, 31, 31, 34, 37, 40, 42, 47, 30, 31, - 32, 35, 39, 41, 42, 46, 34, 34, 35, 39, 42, 44, 45, 47, 36, 37, 39, 42, - 46, 47, 47, 47, 39, 40, 41, 44, 47, 47, 48, 49, 42, 42, 42, 45, 47, 48, - 48, 50, 48, 47, 46, 47, 47, 49, 50, 53, - /* Size 16x16 */ - 32, 31, 31, 31, 30, 30, 33, 33, 34, 36, 36, 40, 41, 44, 49, 49, 31, 31, - 31, 31, 31, 31, 33, 34, 36, 38, 38, 41, 42, 44, 48, 48, 31, 31, 31, 31, - 31, 31, 34, 34, 36, 38, 38, 41, 42, 44, 47, 47, 31, 31, 31, 31, 31, 31, - 34, 35, 36, 39, 39, 41, 42, 44, 47, 47, 30, 31, 31, 31, 32, 32, 34, 35, - 37, 40, 40, 42, 42, 44, 46, 46, 30, 31, 31, 31, 32, 32, 34, 35, 37, 40, - 40, 42, 42, 44, 46, 46, 33, 33, 34, 34, 34, 34, 37, 38, 40, 42, 42, 44, - 44, 45, 47, 47, 33, 34, 34, 35, 35, 35, 38, 39, 40, 43, 43, 44, 45, 46, - 47, 47, 34, 36, 36, 36, 37, 37, 40, 40, 42, 45, 45, 45, 46, 46, 47, 47, - 36, 38, 38, 39, 40, 40, 42, 43, 45, 47, 47, 47, 47, 47, 48, 48, 36, 38, - 38, 39, 40, 40, 42, 43, 45, 47, 47, 47, 47, 47, 48, 48, 40, 41, 41, 41, - 42, 42, 44, 44, 45, 47, 47, 48, 48, 49, 50, 50, 41, 42, 42, 42, 42, 42, - 44, 45, 46, 47, 47, 48, 48, 49, 50, 50, 44, 44, 44, 44, 44, 44, 45, 46, - 46, 47, 47, 49, 49, 50, 51, 51, 49, 48, 47, 47, 46, 46, 47, 47, 47, 48, - 48, 50, 50, 51, 53, 53, 49, 48, 47, 47, 46, 46, 47, 47, 47, 48, 48, 50, - 50, 51, 53, 53, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 31, 33, 33, 33, 33, 34, 36, - 36, 36, 36, 38, 40, 41, 41, 41, 44, 47, 49, 49, 49, 49, 31, 31, 31, 31, - 31, 31, 31, 31, 30, 30, 30, 32, 33, 34, 34, 34, 35, 36, 37, 37, 37, 39, - 41, 42, 42, 42, 44, 47, 48, 48, 48, 48, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 32, 33, 34, 34, 34, 36, 37, 38, 38, 38, 39, 41, 42, 42, 42, - 44, 46, 48, 48, 48, 47, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, - 34, 34, 34, 34, 36, 37, 38, 38, 38, 40, 41, 42, 42, 42, 44, 46, 47, 47, - 47, 47, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 34, 34, 34, 34, - 36, 37, 38, 38, 38, 40, 41, 42, 42, 42, 44, 46, 47, 47, 47, 47, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 34, 34, 34, 34, 36, 37, 38, 38, - 38, 40, 41, 42, 42, 42, 44, 46, 47, 47, 47, 47, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 33, 34, 35, 35, 35, 36, 38, 39, 39, 39, 40, 41, 42, - 42, 42, 44, 46, 47, 47, 47, 47, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 33, 34, 35, 35, 35, 37, 38, 39, 39, 39, 41, 42, 42, 42, 42, 44, 46, - 46, 46, 46, 46, 30, 30, 31, 31, 31, 31, 31, 31, 32, 32, 32, 33, 34, 35, - 35, 35, 37, 39, 40, 40, 40, 41, 42, 42, 42, 42, 44, 45, 46, 46, 46, 46, - 30, 30, 31, 31, 31, 31, 31, 31, 32, 32, 32, 33, 34, 35, 35, 35, 37, 39, - 40, 40, 40, 41, 42, 42, 42, 42, 44, 45, 46, 46, 46, 46, 30, 30, 31, 31, - 31, 31, 31, 31, 32, 32, 32, 33, 34, 35, 35, 35, 37, 39, 40, 40, 40, 41, - 42, 42, 42, 42, 44, 45, 46, 46, 46, 46, 31, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 33, 34, 36, 37, 37, 37, 38, 40, 41, 41, 41, 42, 43, 43, 43, 43, - 44, 46, 46, 46, 46, 46, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 36, - 37, 38, 38, 38, 40, 41, 42, 42, 42, 43, 44, 44, 44, 44, 45, 46, 47, 47, - 47, 46, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 37, 38, 39, 39, 39, - 40, 42, 43, 43, 43, 44, 44, 45, 45, 45, 46, 47, 47, 47, 47, 47, 33, 34, - 34, 34, 34, 34, 35, 35, 35, 35, 35, 37, 38, 39, 39, 39, 40, 42, 43, 43, - 43, 44, 44, 45, 45, 45, 46, 47, 47, 47, 47, 47, 33, 34, 34, 34, 34, 34, - 35, 35, 35, 35, 35, 37, 38, 39, 39, 39, 40, 42, 43, 43, 43, 44, 44, 45, - 45, 45, 46, 47, 47, 47, 47, 47, 34, 35, 36, 36, 36, 36, 36, 37, 37, 37, - 37, 38, 40, 40, 40, 40, 42, 44, 45, 45, 45, 45, 45, 46, 46, 46, 46, 47, - 47, 47, 47, 47, 36, 36, 37, 37, 37, 37, 38, 38, 39, 39, 39, 40, 41, 42, - 42, 42, 44, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 36, 37, 38, 38, 38, 38, 39, 39, 40, 40, 40, 41, 42, 43, 43, 43, 45, 46, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 36, 37, 38, 38, - 38, 38, 39, 39, 40, 40, 40, 41, 42, 43, 43, 43, 45, 46, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 36, 37, 38, 38, 38, 38, 39, 39, - 40, 40, 40, 41, 42, 43, 43, 43, 45, 46, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 48, 48, 48, 47, 38, 39, 39, 40, 40, 40, 40, 41, 41, 41, 41, 42, - 43, 44, 44, 44, 45, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 49, 49, - 49, 48, 40, 41, 41, 41, 41, 41, 41, 42, 42, 42, 42, 43, 44, 44, 44, 44, - 45, 47, 47, 47, 47, 48, 48, 48, 48, 48, 49, 49, 50, 50, 50, 49, 41, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 43, 44, 45, 45, 45, 46, 47, 47, 47, - 47, 48, 48, 48, 48, 48, 49, 50, 50, 50, 50, 50, 41, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 43, 44, 45, 45, 45, 46, 47, 47, 47, 47, 48, 48, 48, - 48, 48, 49, 50, 50, 50, 50, 50, 41, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 43, 44, 45, 45, 45, 46, 47, 47, 47, 47, 48, 48, 48, 48, 48, 49, 50, - 50, 50, 50, 50, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 45, 46, - 46, 46, 46, 47, 47, 47, 47, 48, 49, 49, 49, 49, 50, 51, 51, 51, 51, 51, - 47, 47, 46, 46, 46, 46, 46, 46, 45, 45, 45, 46, 46, 47, 47, 47, 47, 47, - 47, 47, 47, 48, 49, 50, 50, 50, 51, 52, 52, 52, 52, 52, 49, 48, 48, 47, - 47, 47, 47, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 48, 48, 48, 49, - 50, 50, 50, 50, 51, 52, 53, 53, 53, 53, 49, 48, 48, 47, 47, 47, 47, 46, - 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 48, 48, 48, 49, 50, 50, 50, 50, - 51, 52, 53, 53, 53, 53, 49, 48, 48, 47, 47, 47, 47, 46, 46, 46, 46, 46, - 47, 47, 47, 47, 47, 47, 48, 48, 48, 49, 50, 50, 50, 50, 51, 52, 53, 53, - 53, 53, 49, 48, 47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 47, 47, 47, - 47, 47, 47, 47, 47, 48, 49, 50, 50, 50, 51, 52, 53, 53, 53, 53, - /* Size 4x8 */ - 31, 31, 37, 48, 31, 31, 38, 47, 31, 32, 40, 46, 34, 36, 43, 47, 37, 39, - 46, 47, 39, 41, 47, 48, 42, 43, 47, 50, 48, 46, 48, 53, - /* Size 8x4 */ - 31, 31, 31, 34, 37, 39, 42, 48, 31, 31, 32, 36, 39, 41, 43, 46, 37, 38, - 40, 43, 46, 47, 47, 48, 48, 47, 46, 47, 47, 48, 50, 53, - /* Size 8x16 */ - 32, 31, 31, 33, 37, 37, 45, 48, 31, 31, 31, 34, 38, 38, 45, 47, 31, 31, - 31, 34, 38, 38, 45, 47, 31, 31, 32, 34, 39, 39, 45, 46, 30, 32, 32, 35, - 40, 40, 44, 46, 30, 32, 32, 35, 40, 40, 44, 46, 33, 34, 35, 37, 42, 42, - 46, 47, 33, 35, 36, 38, 43, 43, 46, 47, 35, 37, 37, 40, 44, 44, 46, 47, - 37, 39, 40, 43, 47, 47, 47, 47, 37, 39, 40, 43, 47, 47, 47, 47, 41, 42, - 42, 44, 47, 47, 49, 49, 42, 42, 43, 44, 47, 47, 49, 50, 44, 44, 44, 45, - 47, 47, 50, 51, 49, 47, 46, 47, 48, 48, 52, 53, 49, 47, 46, 47, 48, 48, - 52, 53, - /* Size 16x8 */ - 32, 31, 31, 31, 30, 30, 33, 33, 35, 37, 37, 41, 42, 44, 49, 49, 31, 31, - 31, 31, 32, 32, 34, 35, 37, 39, 39, 42, 42, 44, 47, 47, 31, 31, 31, 32, - 32, 32, 35, 36, 37, 40, 40, 42, 43, 44, 46, 46, 33, 34, 34, 34, 35, 35, - 37, 38, 40, 43, 43, 44, 44, 45, 47, 47, 37, 38, 38, 39, 40, 40, 42, 43, - 44, 47, 47, 47, 47, 47, 48, 48, 37, 38, 38, 39, 40, 40, 42, 43, 44, 47, - 47, 47, 47, 47, 48, 48, 45, 45, 45, 45, 44, 44, 46, 46, 46, 47, 47, 49, - 49, 50, 52, 52, 48, 47, 47, 46, 46, 46, 47, 47, 47, 47, 47, 49, 50, 51, - 53, 53, - /* Size 16x32 */ - 32, 31, 31, 31, 31, 31, 33, 35, 37, 37, 37, 40, 45, 48, 48, 48, 31, 31, - 31, 31, 31, 31, 33, 36, 37, 37, 37, 41, 45, 48, 48, 48, 31, 31, 31, 31, - 31, 31, 34, 36, 38, 38, 38, 41, 45, 47, 47, 47, 31, 31, 31, 31, 31, 31, - 34, 37, 38, 38, 38, 41, 45, 47, 47, 47, 31, 31, 31, 31, 31, 31, 34, 37, - 38, 38, 38, 41, 45, 47, 47, 47, 31, 31, 31, 31, 31, 31, 34, 37, 38, 38, - 38, 41, 45, 47, 47, 47, 31, 31, 31, 32, 32, 32, 34, 37, 39, 39, 39, 41, - 45, 46, 46, 46, 30, 31, 31, 32, 32, 32, 34, 38, 39, 39, 39, 42, 44, 46, - 46, 46, 30, 31, 32, 32, 32, 32, 35, 38, 40, 40, 40, 42, 44, 46, 46, 46, - 30, 31, 32, 32, 32, 32, 35, 38, 40, 40, 40, 42, 44, 46, 46, 46, 30, 31, - 32, 32, 32, 32, 35, 38, 40, 40, 40, 42, 44, 46, 46, 46, 31, 32, 33, 33, - 33, 33, 36, 39, 41, 41, 41, 43, 45, 46, 46, 46, 33, 34, 34, 35, 35, 35, - 37, 40, 42, 42, 42, 44, 46, 47, 47, 47, 33, 34, 35, 36, 36, 36, 38, 41, - 43, 43, 43, 44, 46, 47, 47, 47, 33, 34, 35, 36, 36, 36, 38, 41, 43, 43, - 43, 44, 46, 47, 47, 47, 33, 34, 35, 36, 36, 36, 38, 41, 43, 43, 43, 44, - 46, 47, 47, 47, 35, 36, 37, 37, 37, 37, 40, 43, 44, 44, 44, 45, 46, 47, - 47, 47, 36, 37, 38, 39, 39, 39, 42, 44, 46, 46, 46, 47, 47, 47, 47, 47, - 37, 38, 39, 40, 40, 40, 43, 45, 47, 47, 47, 47, 47, 47, 47, 47, 37, 38, - 39, 40, 40, 40, 43, 45, 47, 47, 47, 47, 47, 47, 47, 47, 37, 38, 39, 40, - 40, 40, 43, 45, 47, 47, 47, 47, 47, 47, 47, 47, 39, 39, 40, 41, 41, 41, - 43, 46, 47, 47, 47, 48, 48, 48, 48, 48, 41, 41, 42, 42, 42, 42, 44, 46, - 47, 47, 47, 48, 49, 49, 49, 49, 42, 42, 42, 43, 43, 43, 44, 46, 47, 47, - 47, 48, 49, 50, 50, 50, 42, 42, 42, 43, 43, 43, 44, 46, 47, 47, 47, 48, - 49, 50, 50, 50, 42, 42, 42, 43, 43, 43, 44, 46, 47, 47, 47, 48, 49, 50, - 50, 50, 44, 44, 44, 44, 44, 44, 45, 47, 47, 47, 47, 49, 50, 51, 51, 51, - 47, 46, 46, 46, 46, 46, 46, 47, 48, 48, 48, 49, 51, 52, 52, 52, 49, 48, - 47, 46, 46, 46, 47, 48, 48, 48, 48, 50, 52, 53, 53, 53, 49, 48, 47, 46, - 46, 46, 47, 48, 48, 48, 48, 50, 52, 53, 53, 53, 49, 48, 47, 46, 46, 46, - 47, 48, 48, 48, 48, 50, 52, 53, 53, 53, 49, 48, 47, 46, 46, 46, 47, 47, - 47, 47, 47, 49, 52, 53, 53, 53, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 31, 33, 33, 33, 33, 35, 36, - 37, 37, 37, 39, 41, 42, 42, 42, 44, 47, 49, 49, 49, 49, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 32, 34, 34, 34, 34, 36, 37, 38, 38, 38, 39, - 41, 42, 42, 42, 44, 46, 48, 48, 48, 48, 31, 31, 31, 31, 31, 31, 31, 31, - 32, 32, 32, 33, 34, 35, 35, 35, 37, 38, 39, 39, 39, 40, 42, 42, 42, 42, - 44, 46, 47, 47, 47, 47, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 33, - 35, 36, 36, 36, 37, 39, 40, 40, 40, 41, 42, 43, 43, 43, 44, 46, 46, 46, - 46, 46, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 33, 35, 36, 36, 36, - 37, 39, 40, 40, 40, 41, 42, 43, 43, 43, 44, 46, 46, 46, 46, 46, 31, 31, - 31, 31, 31, 31, 32, 32, 32, 32, 32, 33, 35, 36, 36, 36, 37, 39, 40, 40, - 40, 41, 42, 43, 43, 43, 44, 46, 46, 46, 46, 46, 33, 33, 34, 34, 34, 34, - 34, 34, 35, 35, 35, 36, 37, 38, 38, 38, 40, 42, 43, 43, 43, 43, 44, 44, - 44, 44, 45, 46, 47, 47, 47, 47, 35, 36, 36, 37, 37, 37, 37, 38, 38, 38, - 38, 39, 40, 41, 41, 41, 43, 44, 45, 45, 45, 46, 46, 46, 46, 46, 47, 47, - 48, 48, 48, 47, 37, 37, 38, 38, 38, 38, 39, 39, 40, 40, 40, 41, 42, 43, - 43, 43, 44, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 47, - 37, 37, 38, 38, 38, 38, 39, 39, 40, 40, 40, 41, 42, 43, 43, 43, 44, 46, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 47, 37, 37, 38, 38, - 38, 38, 39, 39, 40, 40, 40, 41, 42, 43, 43, 43, 44, 46, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 48, 48, 48, 48, 47, 40, 41, 41, 41, 41, 41, 41, 42, - 42, 42, 42, 43, 44, 44, 44, 44, 45, 47, 47, 47, 47, 48, 48, 48, 48, 48, - 49, 49, 50, 50, 50, 49, 45, 45, 45, 45, 45, 45, 45, 44, 44, 44, 44, 45, - 46, 46, 46, 46, 46, 47, 47, 47, 47, 48, 49, 49, 49, 49, 50, 51, 52, 52, - 52, 52, 48, 48, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 48, 49, 50, 50, 50, 51, 52, 53, 53, 53, 53, 48, 48, - 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 48, 49, 50, 50, 50, 51, 52, 53, 53, 53, 53, 48, 48, 47, 47, 47, 47, - 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 49, 50, - 50, 50, 51, 52, 53, 53, 53, 53, - /* Size 4x16 */ - 31, 31, 37, 48, 31, 31, 38, 47, 31, 31, 38, 47, 31, 32, 39, 46, 31, 32, - 40, 46, 31, 32, 40, 46, 34, 35, 42, 47, 34, 36, 43, 47, 36, 37, 44, 47, - 38, 40, 47, 47, 38, 40, 47, 47, 41, 42, 47, 49, 42, 43, 47, 50, 44, 44, - 47, 51, 48, 46, 48, 53, 48, 46, 48, 53, - /* Size 16x4 */ - 31, 31, 31, 31, 31, 31, 34, 34, 36, 38, 38, 41, 42, 44, 48, 48, 31, 31, - 31, 32, 32, 32, 35, 36, 37, 40, 40, 42, 43, 44, 46, 46, 37, 38, 38, 39, - 40, 40, 42, 43, 44, 47, 47, 47, 47, 47, 48, 48, 48, 47, 47, 46, 46, 46, - 47, 47, 47, 47, 47, 49, 50, 51, 53, 53, - /* Size 8x32 */ - 32, 31, 31, 33, 37, 37, 45, 48, 31, 31, 31, 33, 37, 37, 45, 48, 31, 31, - 31, 34, 38, 38, 45, 47, 31, 31, 31, 34, 38, 38, 45, 47, 31, 31, 31, 34, - 38, 38, 45, 47, 31, 31, 31, 34, 38, 38, 45, 47, 31, 31, 32, 34, 39, 39, - 45, 46, 30, 31, 32, 34, 39, 39, 44, 46, 30, 32, 32, 35, 40, 40, 44, 46, - 30, 32, 32, 35, 40, 40, 44, 46, 30, 32, 32, 35, 40, 40, 44, 46, 31, 33, - 33, 36, 41, 41, 45, 46, 33, 34, 35, 37, 42, 42, 46, 47, 33, 35, 36, 38, - 43, 43, 46, 47, 33, 35, 36, 38, 43, 43, 46, 47, 33, 35, 36, 38, 43, 43, - 46, 47, 35, 37, 37, 40, 44, 44, 46, 47, 36, 38, 39, 42, 46, 46, 47, 47, - 37, 39, 40, 43, 47, 47, 47, 47, 37, 39, 40, 43, 47, 47, 47, 47, 37, 39, - 40, 43, 47, 47, 47, 47, 39, 40, 41, 43, 47, 47, 48, 48, 41, 42, 42, 44, - 47, 47, 49, 49, 42, 42, 43, 44, 47, 47, 49, 50, 42, 42, 43, 44, 47, 47, - 49, 50, 42, 42, 43, 44, 47, 47, 49, 50, 44, 44, 44, 45, 47, 47, 50, 51, - 47, 46, 46, 46, 48, 48, 51, 52, 49, 47, 46, 47, 48, 48, 52, 53, 49, 47, - 46, 47, 48, 48, 52, 53, 49, 47, 46, 47, 48, 48, 52, 53, 49, 47, 46, 47, - 47, 47, 52, 53, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 31, 33, 33, 33, 33, 35, 36, - 37, 37, 37, 39, 41, 42, 42, 42, 44, 47, 49, 49, 49, 49, 31, 31, 31, 31, - 31, 31, 31, 31, 32, 32, 32, 33, 34, 35, 35, 35, 37, 38, 39, 39, 39, 40, - 42, 42, 42, 42, 44, 46, 47, 47, 47, 47, 31, 31, 31, 31, 31, 31, 32, 32, - 32, 32, 32, 33, 35, 36, 36, 36, 37, 39, 40, 40, 40, 41, 42, 43, 43, 43, - 44, 46, 46, 46, 46, 46, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 35, 36, - 37, 38, 38, 38, 40, 42, 43, 43, 43, 43, 44, 44, 44, 44, 45, 46, 47, 47, - 47, 47, 37, 37, 38, 38, 38, 38, 39, 39, 40, 40, 40, 41, 42, 43, 43, 43, - 44, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 47, 37, 37, - 38, 38, 38, 38, 39, 39, 40, 40, 40, 41, 42, 43, 43, 43, 44, 46, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 47, 45, 45, 45, 45, 45, 45, - 45, 44, 44, 44, 44, 45, 46, 46, 46, 46, 46, 47, 47, 47, 47, 48, 49, 49, - 49, 49, 50, 51, 52, 52, 52, 52, 48, 48, 47, 47, 47, 47, 46, 46, 46, 46, - 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 49, 50, 50, 50, 51, 52, - 53, 53, 53, 53 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 31, 32, 32, 32, 32, 32, 32, 33, 32, 32, 33, 34, 32, 33, 34, 35, - /* Size 8x8 */ - 31, 31, 31, 31, 32, 32, 32, 33, 31, 32, 32, 32, 32, 32, 32, 33, 31, 32, - 32, 32, 32, 32, 32, 33, 31, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, - 33, 33, 34, 35, 32, 32, 32, 32, 33, 34, 34, 35, 32, 32, 32, 32, 34, 34, - 35, 36, 33, 33, 33, 33, 35, 35, 36, 38, - /* Size 16x16 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 34, 31, 31, - 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 34, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, - 33, 34, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 35, - 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 35, 31, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 35, 32, 32, 32, 32, - 32, 32, 32, 33, 33, 33, 33, 34, 35, 35, 35, 36, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 34, 34, 35, 35, 35, 36, 37, 32, 32, 32, 32, 32, 32, 32, 33, - 33, 34, 34, 35, 35, 35, 36, 37, 32, 33, 33, 33, 33, 33, 33, 33, 34, 34, - 34, 35, 36, 36, 36, 38, 34, 34, 34, 34, 34, 33, 33, 34, 35, 35, 35, 36, - 37, 37, 38, 39, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 34, 34, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 34, 34, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 31, 31, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 31, 31, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 34, 34, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 34, 34, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, - 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 31, 31, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 33, 33, 33, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, - 33, 33, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 34, 34, 34, 34, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, - 34, 34, 35, 35, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 35, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, - 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 35, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, - 33, 34, 34, 34, 34, 34, 34, 35, 35, 35, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, - 34, 34, 34, 35, 35, 35, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 35, 35, - 35, 35, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 33, 33, 33, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, - 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 36, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, - 35, 35, 35, 35, 36, 36, 37, 37, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, - 36, 36, 37, 37, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 37, 37, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, - 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 37, 37, 32, 32, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, - 35, 35, 36, 36, 36, 36, 36, 37, 38, 38, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 36, - 36, 36, 37, 38, 38, 38, 34, 34, 34, 34, 34, 34, 34, 34, 34, 33, 33, 33, - 33, 33, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 37, 37, 37, 37, 38, 38, - 39, 39, 34, 34, 34, 34, 34, 34, 34, 34, 34, 33, 33, 33, 33, 33, 34, 34, - 35, 35, 35, 35, 35, 35, 36, 36, 37, 37, 37, 37, 38, 38, 39, 39, - /* Size 4x8 */ - 31, 31, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, - 33, 34, 32, 32, 34, 34, 32, 33, 34, 35, 33, 33, 35, 36, - /* Size 8x4 */ - 31, 31, 32, 32, 32, 32, 32, 33, 31, 32, 32, 32, 32, 32, 33, 33, 32, 32, - 32, 32, 33, 34, 34, 35, 32, 32, 32, 33, 34, 34, 35, 36, - /* Size 8x16 */ - 32, 31, 31, 31, 31, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 33, 31, 32, - 32, 32, 32, 32, 32, 33, 31, 32, 32, 32, 32, 32, 32, 33, 31, 32, 32, 32, - 32, 32, 32, 33, 31, 32, 32, 32, 32, 33, 33, 33, 31, 32, 32, 32, 32, 33, - 33, 33, 31, 32, 32, 32, 32, 33, 33, 33, 31, 32, 32, 32, 33, 34, 34, 34, - 32, 32, 32, 32, 33, 34, 34, 34, 32, 32, 32, 32, 33, 34, 34, 34, 32, 32, - 32, 32, 33, 35, 35, 35, 32, 32, 33, 33, 34, 35, 35, 36, 32, 32, 33, 33, - 34, 35, 35, 36, 32, 33, 33, 33, 34, 36, 36, 36, 34, 34, 34, 34, 35, 37, - 37, 38, - /* Size 16x8 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 34, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 31, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 33, 34, 34, 34, 35, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, - 34, 35, 35, 35, 36, 37, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 35, - 35, 35, 36, 37, 32, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 35, 36, 36, - 36, 38, - /* Size 16x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 34, 31, 31, - 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 33, 34, 31, 31, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, - 33, 33, 33, 33, 33, 34, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 33, 33, 34, 34, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 34, - 34, 35, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 35, - 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 35, 32, 32, - 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 35, 32, 32, 32, 32, - 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 35, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 34, 34, 34, 34, 34, 35, 35, 32, 32, 32, 32, 32, 32, 32, 33, - 33, 34, 35, 35, 35, 35, 35, 36, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34, - 35, 35, 35, 35, 36, 36, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 35, 35, - 35, 35, 36, 37, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 35, 35, 35, 35, - 36, 37, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 35, 35, 35, 35, 36, 37, - 32, 32, 32, 33, 33, 33, 33, 33, 34, 34, 35, 35, 35, 35, 36, 37, 32, 33, - 33, 33, 33, 33, 33, 33, 34, 35, 36, 36, 36, 36, 36, 38, 33, 33, 33, 33, - 33, 33, 33, 34, 34, 35, 36, 36, 36, 36, 37, 38, 34, 34, 34, 34, 34, 34, - 34, 34, 35, 36, 37, 37, 37, 37, 38, 39, 34, 34, 34, 34, 34, 34, 34, 34, - 35, 36, 37, 37, 37, 37, 38, 39, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 34, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, - 34, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 34, 34, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 34, 34, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, - 33, 33, 33, 33, 33, 33, 34, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 34, 34, 34, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 35, 35, 36, 36, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, - 35, 35, 35, 35, 35, 35, 36, 36, 37, 37, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, - 35, 35, 36, 36, 37, 37, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, - 37, 37, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, - 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 37, 37, 32, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, - 34, 35, 35, 36, 36, 36, 36, 36, 36, 37, 38, 38, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 33, 33, 33, 33, 33, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, - 37, 37, 37, 37, 38, 38, 39, 39, - /* Size 4x16 */ - 31, 31, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, - 32, 32, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 33, 33, 32, 32, 33, 34, - 32, 32, 33, 34, 32, 32, 33, 34, 32, 32, 34, 35, 32, 33, 34, 35, 32, 33, - 34, 35, 33, 33, 35, 36, 34, 34, 36, 37, - /* Size 16x4 */ - 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 32, 32, 32, 32, - 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 35, 36, 32, 32, 32, 32, 32, 33, - 33, 33, 34, 34, 34, 35, 35, 35, 36, 37, - /* Size 8x32 */ - 32, 31, 31, 31, 31, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 33, 31, 31, - 32, 32, 32, 32, 32, 33, 31, 32, 32, 32, 32, 32, 32, 33, 31, 32, 32, 32, - 32, 32, 32, 33, 31, 32, 32, 32, 32, 32, 32, 33, 31, 32, 32, 32, 32, 32, - 32, 33, 31, 32, 32, 32, 32, 32, 32, 33, 31, 32, 32, 32, 32, 32, 32, 33, - 31, 32, 32, 32, 32, 32, 32, 33, 31, 32, 32, 32, 32, 33, 33, 33, 31, 32, - 32, 32, 32, 33, 33, 33, 31, 32, 32, 32, 32, 33, 33, 33, 31, 32, 32, 32, - 32, 33, 33, 33, 31, 32, 32, 32, 32, 33, 33, 33, 31, 32, 32, 32, 33, 33, - 33, 34, 31, 32, 32, 32, 33, 34, 34, 34, 32, 32, 32, 32, 33, 34, 34, 34, - 32, 32, 32, 32, 33, 34, 34, 34, 32, 32, 32, 32, 33, 34, 34, 34, 32, 32, - 32, 32, 33, 34, 34, 34, 32, 32, 32, 32, 33, 34, 34, 35, 32, 32, 32, 32, - 33, 35, 35, 35, 32, 32, 33, 33, 33, 35, 35, 36, 32, 32, 33, 33, 34, 35, - 35, 36, 32, 32, 33, 33, 34, 35, 35, 36, 32, 32, 33, 33, 34, 35, 35, 36, - 32, 32, 33, 33, 34, 35, 35, 36, 32, 33, 33, 33, 34, 36, 36, 36, 33, 33, - 33, 33, 34, 36, 36, 37, 34, 34, 34, 34, 35, 37, 37, 38, 34, 34, 34, 34, - 35, 37, 37, 38, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 31, 31, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, - 33, 33, 33, 33, 34, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, - 34, 34, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, - 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 37, 37, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, - 35, 35, 35, 35, 36, 36, 37, 37, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 36, 36, 36, 36, 36, - 36, 37, 38, 38 }, - { /* Chroma */ - /* Size 4x4 */ - 31, 31, 34, 38, 31, 32, 35, 40, 34, 35, 39, 43, 38, 40, 43, 47, - /* Size 8x8 */ - 31, 31, 31, 30, 34, 35, 37, 40, 31, 31, 31, 31, 34, 35, 38, 41, 31, 31, - 31, 31, 35, 36, 39, 41, 30, 31, 31, 32, 35, 36, 40, 42, 34, 34, 35, 35, - 39, 40, 43, 44, 35, 35, 36, 36, 40, 41, 44, 45, 37, 38, 39, 40, 43, 44, - 47, 47, 40, 41, 41, 42, 44, 45, 47, 48, - /* Size 16x16 */ - 32, 31, 31, 31, 31, 30, 30, 31, 33, 33, 33, 35, 36, 36, 38, 41, 31, 31, - 31, 31, 31, 31, 31, 31, 33, 34, 34, 36, 37, 37, 39, 42, 31, 31, 31, 31, - 31, 31, 31, 32, 34, 34, 34, 37, 38, 38, 40, 42, 31, 31, 31, 31, 31, 31, - 31, 32, 34, 34, 34, 37, 38, 38, 40, 42, 31, 31, 31, 31, 31, 31, 31, 32, - 34, 35, 35, 37, 39, 39, 40, 42, 30, 31, 31, 31, 31, 32, 32, 32, 34, 35, - 35, 38, 40, 40, 41, 42, 30, 31, 31, 31, 31, 32, 32, 32, 34, 35, 35, 38, - 40, 40, 41, 42, 31, 31, 32, 32, 32, 32, 32, 33, 35, 36, 36, 38, 40, 40, - 41, 43, 33, 33, 34, 34, 34, 34, 34, 35, 37, 38, 38, 41, 42, 42, 43, 44, - 33, 34, 34, 34, 35, 35, 35, 36, 38, 39, 39, 41, 43, 43, 44, 45, 33, 34, - 34, 34, 35, 35, 35, 36, 38, 39, 39, 41, 43, 43, 44, 45, 35, 36, 37, 37, - 37, 38, 38, 38, 41, 41, 41, 44, 46, 46, 46, 46, 36, 37, 38, 38, 39, 40, - 40, 40, 42, 43, 43, 46, 47, 47, 47, 47, 36, 37, 38, 38, 39, 40, 40, 40, - 42, 43, 43, 46, 47, 47, 47, 47, 38, 39, 40, 40, 40, 41, 41, 41, 43, 44, - 44, 46, 47, 47, 47, 48, 41, 42, 42, 42, 42, 42, 42, 43, 44, 45, 45, 46, - 47, 47, 48, 48, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 31, 32, 33, 33, - 33, 33, 33, 34, 35, 36, 36, 36, 36, 37, 38, 40, 41, 41, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 31, 32, 33, 34, 34, 34, 34, 35, - 36, 37, 37, 37, 37, 37, 39, 40, 42, 42, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 34, 34, 34, 35, 36, 37, 37, 37, - 37, 38, 39, 40, 42, 42, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 32, 32, 34, 34, 34, 34, 34, 35, 36, 38, 38, 38, 38, 38, 40, 41, - 42, 42, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, - 34, 34, 34, 34, 34, 35, 37, 38, 38, 38, 38, 39, 40, 41, 42, 42, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 34, 34, 34, - 34, 35, 37, 38, 38, 38, 38, 39, 40, 41, 42, 42, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 34, 34, 34, 34, 35, 37, 38, - 38, 38, 38, 39, 40, 41, 42, 42, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 32, 33, 34, 34, 34, 34, 34, 36, 37, 38, 38, 38, 38, 39, - 40, 41, 42, 42, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 32, 33, 34, 35, 35, 35, 35, 36, 37, 38, 39, 39, 39, 39, 40, 41, 42, 42, - 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 35, - 35, 35, 35, 36, 37, 39, 39, 39, 39, 40, 40, 41, 42, 42, 30, 30, 31, 31, - 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 33, 34, 35, 35, 35, 35, 36, - 38, 39, 40, 40, 40, 40, 41, 42, 42, 42, 30, 30, 31, 31, 31, 31, 31, 31, - 31, 31, 32, 32, 32, 32, 32, 33, 34, 35, 35, 35, 35, 36, 38, 39, 40, 40, - 40, 40, 41, 42, 42, 42, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, - 32, 32, 32, 33, 34, 35, 35, 35, 35, 36, 38, 39, 40, 40, 40, 40, 41, 42, - 42, 42, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 33, - 34, 35, 35, 35, 35, 36, 38, 39, 40, 40, 40, 40, 41, 42, 42, 42, 31, 31, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 36, 36, - 36, 37, 38, 40, 40, 40, 40, 41, 41, 42, 43, 43, 32, 32, 32, 32, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 34, 35, 36, 37, 37, 37, 37, 38, 39, 41, - 41, 41, 41, 42, 42, 43, 43, 43, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 35, 36, 37, 38, 38, 38, 38, 39, 41, 42, 42, 42, 42, 43, - 43, 44, 44, 44, 33, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, - 36, 37, 38, 39, 39, 39, 39, 40, 41, 43, 43, 43, 43, 43, 44, 44, 45, 45, - 33, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 37, 38, 39, - 39, 39, 39, 40, 41, 43, 43, 43, 43, 43, 44, 44, 45, 45, 33, 34, 34, 34, - 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 37, 38, 39, 39, 39, 39, 40, - 41, 43, 43, 43, 43, 43, 44, 44, 45, 45, 33, 34, 34, 34, 34, 34, 34, 34, - 35, 35, 35, 35, 35, 35, 36, 37, 38, 39, 39, 39, 39, 40, 41, 43, 43, 43, - 43, 43, 44, 44, 45, 45, 34, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, - 36, 36, 37, 38, 39, 40, 40, 40, 40, 41, 42, 44, 44, 44, 44, 44, 45, 45, - 45, 45, 35, 36, 36, 36, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 38, 39, - 41, 41, 41, 41, 41, 42, 44, 45, 46, 46, 46, 46, 46, 46, 46, 46, 36, 37, - 37, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 40, 41, 42, 43, 43, 43, - 43, 44, 45, 46, 47, 47, 47, 47, 47, 47, 47, 47, 36, 37, 37, 38, 38, 38, - 38, 38, 39, 39, 40, 40, 40, 40, 40, 41, 42, 43, 43, 43, 43, 44, 46, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 36, 37, 37, 38, 38, 38, 38, 38, 39, 39, - 40, 40, 40, 40, 40, 41, 42, 43, 43, 43, 43, 44, 46, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 36, 37, 37, 38, 38, 38, 38, 38, 39, 39, 40, 40, 40, 40, - 40, 41, 42, 43, 43, 43, 43, 44, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 37, 37, 38, 38, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 41, 42, 43, 43, - 43, 43, 43, 44, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 38, 39, 39, 40, - 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 42, 43, 44, 44, 44, 44, 45, - 46, 47, 47, 47, 47, 47, 47, 48, 48, 48, 40, 40, 40, 41, 41, 41, 41, 41, - 41, 41, 42, 42, 42, 42, 42, 43, 44, 44, 44, 44, 44, 45, 46, 47, 47, 47, - 47, 47, 48, 48, 48, 48, 41, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 43, 43, 44, 45, 45, 45, 45, 45, 46, 47, 47, 47, 47, 47, 48, 48, - 48, 48, 41, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 43, 43, - 44, 45, 45, 45, 45, 45, 46, 47, 47, 47, 47, 47, 48, 48, 48, 48, - /* Size 4x8 */ - 31, 31, 35, 37, 31, 31, 36, 38, 31, 32, 37, 39, 31, 32, 37, 40, 34, 36, - 40, 43, 35, 37, 42, 44, 38, 40, 45, 47, 41, 42, 45, 47, - /* Size 8x4 */ - 31, 31, 31, 31, 34, 35, 38, 41, 31, 31, 32, 32, 36, 37, 40, 42, 35, 36, - 37, 37, 40, 42, 45, 45, 37, 38, 39, 40, 43, 44, 47, 47, - /* Size 8x16 */ - 32, 31, 31, 31, 33, 37, 37, 38, 31, 31, 31, 31, 33, 38, 38, 39, 31, 31, - 31, 31, 34, 38, 38, 40, 31, 31, 31, 31, 34, 38, 38, 40, 31, 31, 32, 32, - 34, 39, 39, 40, 30, 31, 32, 32, 35, 40, 40, 41, 30, 31, 32, 32, 35, 40, - 40, 41, 31, 32, 33, 33, 35, 40, 40, 41, 33, 34, 35, 35, 37, 42, 42, 43, - 33, 35, 36, 36, 38, 43, 43, 44, 33, 35, 36, 36, 38, 43, 43, 44, 35, 37, - 38, 38, 41, 45, 45, 46, 37, 39, 40, 40, 43, 47, 47, 47, 37, 39, 40, 40, - 43, 47, 47, 47, 39, 40, 41, 41, 43, 47, 47, 47, 42, 42, 43, 43, 44, 47, - 47, 48, - /* Size 16x8 */ - 32, 31, 31, 31, 31, 30, 30, 31, 33, 33, 33, 35, 37, 37, 39, 42, 31, 31, - 31, 31, 31, 31, 31, 32, 34, 35, 35, 37, 39, 39, 40, 42, 31, 31, 31, 31, - 32, 32, 32, 33, 35, 36, 36, 38, 40, 40, 41, 43, 31, 31, 31, 31, 32, 32, - 32, 33, 35, 36, 36, 38, 40, 40, 41, 43, 33, 33, 34, 34, 34, 35, 35, 35, - 37, 38, 38, 41, 43, 43, 43, 44, 37, 38, 38, 38, 39, 40, 40, 40, 42, 43, - 43, 45, 47, 47, 47, 47, 37, 38, 38, 38, 39, 40, 40, 40, 42, 43, 43, 45, - 47, 47, 47, 47, 38, 39, 40, 40, 40, 41, 41, 41, 43, 44, 44, 46, 47, 47, - 47, 48, - /* Size 16x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 33, 35, 37, 37, 37, 37, 38, 42, 31, 31, - 31, 31, 31, 31, 31, 31, 33, 35, 37, 37, 37, 37, 39, 42, 31, 31, 31, 31, - 31, 31, 31, 32, 33, 35, 38, 38, 38, 38, 39, 42, 31, 31, 31, 31, 31, 31, - 31, 32, 34, 36, 38, 38, 38, 38, 40, 42, 31, 31, 31, 31, 31, 31, 31, 32, - 34, 36, 38, 38, 38, 38, 40, 42, 31, 31, 31, 31, 31, 31, 31, 32, 34, 36, - 38, 38, 38, 38, 40, 42, 31, 31, 31, 31, 31, 31, 31, 32, 34, 36, 38, 38, - 38, 38, 40, 42, 31, 31, 31, 31, 31, 31, 31, 32, 34, 36, 38, 38, 38, 38, - 40, 42, 31, 31, 31, 31, 32, 32, 32, 32, 34, 36, 39, 39, 39, 39, 40, 42, - 30, 31, 31, 32, 32, 32, 32, 32, 34, 37, 39, 39, 39, 39, 40, 42, 30, 31, - 31, 32, 32, 32, 32, 33, 35, 37, 40, 40, 40, 40, 41, 42, 30, 31, 31, 32, - 32, 32, 32, 33, 35, 37, 40, 40, 40, 40, 41, 42, 30, 31, 31, 32, 32, 32, - 32, 33, 35, 37, 40, 40, 40, 40, 41, 42, 30, 31, 31, 32, 32, 32, 32, 33, - 35, 37, 40, 40, 40, 40, 41, 42, 31, 31, 32, 32, 33, 33, 33, 33, 35, 38, - 40, 40, 40, 40, 41, 43, 32, 32, 33, 33, 34, 34, 34, 34, 36, 39, 41, 41, - 41, 41, 42, 44, 33, 33, 34, 35, 35, 35, 35, 35, 37, 40, 42, 42, 42, 42, - 43, 44, 33, 34, 35, 35, 36, 36, 36, 36, 38, 40, 43, 43, 43, 43, 44, 45, - 33, 34, 35, 35, 36, 36, 36, 36, 38, 40, 43, 43, 43, 43, 44, 45, 33, 34, - 35, 35, 36, 36, 36, 36, 38, 40, 43, 43, 43, 43, 44, 45, 33, 34, 35, 35, - 36, 36, 36, 36, 38, 40, 43, 43, 43, 43, 44, 45, 34, 35, 36, 37, 37, 37, - 37, 37, 39, 42, 44, 44, 44, 44, 45, 45, 35, 36, 37, 38, 38, 38, 38, 39, - 41, 43, 45, 45, 45, 45, 46, 46, 36, 37, 38, 39, 39, 39, 39, 40, 42, 44, - 47, 47, 47, 47, 47, 47, 37, 38, 39, 40, 40, 40, 40, 41, 43, 45, 47, 47, - 47, 47, 47, 47, 37, 38, 39, 40, 40, 40, 40, 41, 43, 45, 47, 47, 47, 47, - 47, 47, 37, 38, 39, 40, 40, 40, 40, 41, 43, 45, 47, 47, 47, 47, 47, 47, - 37, 38, 39, 40, 40, 40, 40, 41, 43, 45, 47, 47, 47, 47, 47, 47, 39, 39, - 40, 41, 41, 41, 41, 42, 43, 45, 47, 47, 47, 47, 47, 48, 40, 41, 41, 42, - 42, 42, 42, 42, 44, 45, 47, 47, 47, 47, 47, 48, 42, 42, 42, 43, 43, 43, - 43, 43, 44, 46, 47, 47, 47, 47, 48, 48, 42, 42, 42, 43, 43, 43, 43, 43, - 44, 46, 47, 47, 47, 47, 48, 48, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 31, 32, 33, 33, - 33, 33, 33, 34, 35, 36, 37, 37, 37, 37, 39, 40, 42, 42, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 34, 34, 34, 35, - 36, 37, 38, 38, 38, 38, 39, 41, 42, 42, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 32, 33, 34, 35, 35, 35, 35, 36, 37, 38, 39, 39, - 39, 39, 40, 41, 42, 42, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, - 32, 32, 32, 33, 35, 35, 35, 35, 35, 37, 38, 39, 40, 40, 40, 40, 41, 42, - 43, 43, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 33, 34, - 35, 36, 36, 36, 36, 37, 38, 39, 40, 40, 40, 40, 41, 42, 43, 43, 31, 31, - 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 36, 36, - 36, 37, 38, 39, 40, 40, 40, 40, 41, 42, 43, 43, 31, 31, 31, 31, 31, 31, - 31, 31, 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 36, 36, 36, 37, 38, 39, - 40, 40, 40, 40, 41, 42, 43, 43, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 33, 33, 34, 35, 36, 36, 36, 36, 37, 39, 40, 41, 41, 41, 41, - 42, 42, 43, 43, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, - 35, 36, 37, 38, 38, 38, 38, 39, 41, 42, 43, 43, 43, 43, 43, 44, 44, 44, - 35, 35, 35, 36, 36, 36, 36, 36, 36, 37, 37, 37, 37, 37, 38, 39, 40, 40, - 40, 40, 40, 42, 43, 44, 45, 45, 45, 45, 45, 45, 46, 46, 37, 37, 38, 38, - 38, 38, 38, 38, 39, 39, 40, 40, 40, 40, 40, 41, 42, 43, 43, 43, 43, 44, - 45, 47, 47, 47, 47, 47, 47, 47, 47, 47, 37, 37, 38, 38, 38, 38, 38, 38, - 39, 39, 40, 40, 40, 40, 40, 41, 42, 43, 43, 43, 43, 44, 45, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 37, 37, 38, 38, 38, 38, 38, 38, 39, 39, 40, 40, - 40, 40, 40, 41, 42, 43, 43, 43, 43, 44, 45, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 37, 37, 38, 38, 38, 38, 38, 38, 39, 39, 40, 40, 40, 40, 40, 41, - 42, 43, 43, 43, 43, 44, 45, 47, 47, 47, 47, 47, 47, 47, 47, 47, 38, 39, - 39, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 42, 43, 44, 44, 44, - 44, 45, 46, 47, 47, 47, 47, 47, 47, 47, 48, 48, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 43, 44, 44, 45, 45, 45, 45, 45, 46, 47, - 47, 47, 47, 47, 48, 48, 48, 48, - /* Size 4x16 */ - 31, 31, 35, 37, 31, 31, 35, 38, 31, 31, 36, 38, 31, 31, 36, 38, 31, 32, - 36, 39, 31, 32, 37, 40, 31, 32, 37, 40, 31, 33, 38, 40, 33, 35, 40, 42, - 34, 36, 40, 43, 34, 36, 40, 43, 36, 38, 43, 45, 38, 40, 45, 47, 38, 40, - 45, 47, 39, 41, 45, 47, 42, 43, 46, 47, - /* Size 16x4 */ - 31, 31, 31, 31, 31, 31, 31, 31, 33, 34, 34, 36, 38, 38, 39, 42, 31, 31, - 31, 31, 32, 32, 32, 33, 35, 36, 36, 38, 40, 40, 41, 43, 35, 35, 36, 36, - 36, 37, 37, 38, 40, 40, 40, 43, 45, 45, 45, 46, 37, 38, 38, 38, 39, 40, - 40, 40, 42, 43, 43, 45, 47, 47, 47, 47, - /* Size 8x32 */ - 32, 31, 31, 31, 33, 37, 37, 38, 31, 31, 31, 31, 33, 37, 37, 39, 31, 31, - 31, 31, 33, 38, 38, 39, 31, 31, 31, 31, 34, 38, 38, 40, 31, 31, 31, 31, - 34, 38, 38, 40, 31, 31, 31, 31, 34, 38, 38, 40, 31, 31, 31, 31, 34, 38, - 38, 40, 31, 31, 31, 31, 34, 38, 38, 40, 31, 31, 32, 32, 34, 39, 39, 40, - 30, 31, 32, 32, 34, 39, 39, 40, 30, 31, 32, 32, 35, 40, 40, 41, 30, 31, - 32, 32, 35, 40, 40, 41, 30, 31, 32, 32, 35, 40, 40, 41, 30, 31, 32, 32, - 35, 40, 40, 41, 31, 32, 33, 33, 35, 40, 40, 41, 32, 33, 34, 34, 36, 41, - 41, 42, 33, 34, 35, 35, 37, 42, 42, 43, 33, 35, 36, 36, 38, 43, 43, 44, - 33, 35, 36, 36, 38, 43, 43, 44, 33, 35, 36, 36, 38, 43, 43, 44, 33, 35, - 36, 36, 38, 43, 43, 44, 34, 36, 37, 37, 39, 44, 44, 45, 35, 37, 38, 38, - 41, 45, 45, 46, 36, 38, 39, 39, 42, 47, 47, 47, 37, 39, 40, 40, 43, 47, - 47, 47, 37, 39, 40, 40, 43, 47, 47, 47, 37, 39, 40, 40, 43, 47, 47, 47, - 37, 39, 40, 40, 43, 47, 47, 47, 39, 40, 41, 41, 43, 47, 47, 47, 40, 41, - 42, 42, 44, 47, 47, 47, 42, 42, 43, 43, 44, 47, 47, 48, 42, 42, 43, 43, - 44, 47, 47, 48, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 31, 32, 33, 33, - 33, 33, 33, 34, 35, 36, 37, 37, 37, 37, 39, 40, 42, 42, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 35, 35, 35, 35, 36, - 37, 38, 39, 39, 39, 39, 40, 41, 42, 42, 31, 31, 31, 31, 31, 31, 31, 31, - 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 36, 36, 36, 37, 38, 39, 40, 40, - 40, 40, 41, 42, 43, 43, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, - 32, 32, 33, 34, 35, 36, 36, 36, 36, 37, 38, 39, 40, 40, 40, 40, 41, 42, - 43, 43, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 36, - 37, 38, 38, 38, 38, 39, 41, 42, 43, 43, 43, 43, 43, 44, 44, 44, 37, 37, - 38, 38, 38, 38, 38, 38, 39, 39, 40, 40, 40, 40, 40, 41, 42, 43, 43, 43, - 43, 44, 45, 47, 47, 47, 47, 47, 47, 47, 47, 47, 37, 37, 38, 38, 38, 38, - 38, 38, 39, 39, 40, 40, 40, 40, 40, 41, 42, 43, 43, 43, 43, 44, 45, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 38, 39, 39, 40, 40, 40, 40, 40, 40, 40, - 41, 41, 41, 41, 41, 42, 43, 44, 44, 44, 44, 45, 46, 47, 47, 47, 47, 47, - 47, 47, 48, 48 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 31, 31, 31, 32, 31, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 33, - /* Size 8x8 */ - 31, 31, 31, 31, 31, 31, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, - 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, - 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, - /* Size 16x16 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 31, 31, 31, 31, - 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, - 33, 33, 33, 33, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - /* Size 4x8 */ - 31, 31, 31, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, - 32, 32, 31, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, - /* Size 8x4 */ - 31, 31, 31, 31, 31, 31, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, - /* Size 8x16 */ - 32, 31, 31, 31, 31, 31, 31, 32, 31, 31, 31, 31, 31, 31, 32, 32, 31, 31, - 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, - 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, - 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, - 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, - 32, 32, 32, 32, 33, 33, 31, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, - 32, 32, 33, 34, 32, 32, 32, 32, 32, 32, 33, 34, 32, 32, 32, 32, 32, 32, - 33, 34, - /* Size 16x8 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 31, 31, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, - 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, - 34, 34, - /* Size 16x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 33, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, - 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, - 33, 33, 33, 34, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, - 34, 34, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 33, 33, 33, 34, 34, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 34, 34, 34, 34, 34, 34, 34, 34, - /* Size 4x16 */ - 31, 31, 31, 32, 31, 31, 31, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, - 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, - 31, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, - 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, - /* Size 16x4 */ - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, - /* Size 8x32 */ - 32, 31, 31, 31, 31, 31, 31, 32, 31, 31, 31, 31, 31, 31, 32, 32, 31, 31, - 31, 31, 31, 31, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, - 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, - 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, - 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, - 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, - 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, - 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, - 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, - 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 33, 31, 32, 32, 32, - 32, 32, 33, 33, 31, 32, 32, 32, 32, 32, 33, 33, 31, 32, 32, 32, 32, 32, - 33, 33, 32, 32, 32, 32, 32, 32, 33, 34, 32, 32, 32, 32, 32, 32, 33, 34, - 32, 32, 32, 32, 32, 32, 33, 34, 32, 32, 32, 32, 32, 32, 33, 34, 32, 32, - 32, 32, 32, 32, 33, 34, 32, 32, 32, 32, 32, 32, 33, 34, 32, 32, 32, 32, - 32, 32, 33, 34, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, - 34, 34, 34, 34 }, - { /* Chroma */ - /* Size 4x4 */ - 31, 31, 31, 34, 31, 31, 31, 35, 31, 31, 32, 35, 34, 35, 35, 39, - /* Size 8x8 */ - 31, 31, 31, 31, 30, 31, 33, 33, 31, 31, 31, 31, 31, 32, 34, 34, 31, 31, - 31, 31, 31, 32, 34, 34, 31, 31, 31, 31, 31, 32, 35, 35, 30, 31, 31, 31, - 32, 32, 35, 35, 31, 32, 32, 32, 32, 33, 36, 36, 33, 34, 34, 35, 35, 36, - 39, 39, 33, 34, 34, 35, 35, 36, 39, 39, - /* Size 16x16 */ - 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 31, 33, 33, 33, 33, 31, 31, - 31, 31, 31, 31, 31, 31, 30, 30, 30, 32, 33, 34, 34, 34, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 34, 34, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 32, 34, 34, 34, 34, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 32, 34, 34, 34, 34, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 32, 34, 34, 34, 34, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, - 34, 35, 35, 35, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, - 35, 35, 30, 30, 31, 31, 31, 31, 31, 31, 32, 32, 32, 33, 34, 35, 35, 35, - 30, 30, 31, 31, 31, 31, 31, 31, 32, 32, 32, 33, 34, 35, 35, 35, 30, 30, - 31, 31, 31, 31, 31, 31, 32, 32, 32, 33, 34, 35, 35, 35, 31, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 33, 34, 36, 37, 37, 37, 33, 33, 33, 34, 34, 34, - 34, 34, 34, 34, 34, 36, 37, 38, 38, 38, 33, 34, 34, 34, 34, 34, 35, 35, - 35, 35, 35, 37, 38, 39, 39, 39, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, - 35, 37, 38, 39, 39, 39, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 37, - 38, 39, 39, 39, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, - 30, 30, 30, 31, 31, 32, 33, 33, 33, 33, 33, 33, 33, 34, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 31, - 31, 32, 33, 33, 33, 33, 33, 33, 33, 34, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 31, 32, 32, 33, 34, - 34, 34, 34, 34, 34, 34, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33, 34, 34, 34, 34, 34, - 34, 35, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 32, 33, 33, 34, 34, 34, 34, 34, 34, 35, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 32, 32, 33, 34, 34, 34, 34, 34, 34, 34, 35, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 33, - 34, 34, 34, 34, 34, 34, 34, 35, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 33, 34, 34, 34, 34, - 34, 34, 34, 35, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 32, 32, 33, 34, 34, 34, 34, 34, 34, 34, 35, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 32, 32, 33, 34, 34, 34, 34, 34, 34, 34, 35, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, - 32, 33, 34, 34, 34, 34, 34, 34, 34, 35, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 33, 34, 35, - 35, 35, 35, 35, 35, 35, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33, 34, 35, 35, 35, 35, 35, - 35, 35, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 32, 33, 33, 34, 35, 35, 35, 35, 35, 35, 36, 30, 30, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 32, 33, 34, 34, 35, 35, 35, 35, 35, 35, 36, 30, 30, 30, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 34, - 34, 35, 35, 35, 35, 35, 35, 36, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 35, - 35, 35, 35, 36, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 35, 35, 35, 35, 36, - 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, - 32, 32, 32, 32, 33, 34, 34, 35, 35, 35, 35, 35, 35, 36, 30, 30, 30, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, - 33, 34, 34, 35, 35, 35, 35, 35, 35, 36, 30, 30, 30, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, - 35, 35, 35, 35, 35, 36, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 36, 36, 36, 36, 36, - 36, 37, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 34, 34, 35, 36, 37, 37, 37, 37, 37, 37, 37, 32, 32, - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, - 34, 34, 35, 36, 37, 37, 37, 37, 37, 37, 37, 38, 33, 33, 33, 33, 33, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 36, 37, - 37, 38, 38, 38, 38, 38, 38, 39, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 37, 37, 38, 39, 39, 39, - 39, 39, 39, 40, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 36, 37, 37, 38, 39, 39, 39, 39, 39, 39, 40, - 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 36, 37, 37, 38, 39, 39, 39, 39, 39, 39, 40, 33, 33, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, - 37, 37, 38, 39, 39, 39, 39, 39, 39, 40, 33, 33, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 37, 37, 38, 39, - 39, 39, 39, 39, 39, 40, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 37, 37, 38, 39, 39, 39, 39, 39, - 39, 40, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 36, 36, - 36, 36, 36, 36, 36, 37, 37, 38, 39, 40, 40, 40, 40, 40, 40, 40, - /* Size 4x8 */ - 31, 31, 31, 34, 31, 31, 31, 35, 31, 31, 31, 35, 31, 32, 32, 36, 31, 32, - 32, 36, 31, 33, 33, 37, 34, 36, 36, 40, 34, 36, 36, 40, - /* Size 8x4 */ - 31, 31, 31, 31, 31, 31, 34, 34, 31, 31, 31, 32, 32, 33, 36, 36, 31, 31, - 31, 32, 32, 33, 36, 36, 34, 35, 35, 36, 36, 37, 40, 40, - /* Size 8x16 */ - 32, 31, 31, 31, 31, 31, 33, 35, 31, 31, 31, 31, 31, 31, 33, 36, 31, 31, - 31, 31, 31, 31, 34, 36, 31, 31, 31, 31, 31, 31, 34, 37, 31, 31, 31, 31, - 31, 31, 34, 37, 31, 31, 31, 31, 31, 31, 34, 37, 31, 31, 31, 32, 32, 32, - 34, 37, 30, 31, 31, 32, 32, 32, 34, 38, 30, 31, 32, 32, 32, 32, 35, 38, - 30, 31, 32, 32, 32, 32, 35, 38, 30, 31, 32, 32, 32, 32, 35, 38, 31, 32, - 33, 33, 33, 33, 36, 39, 33, 34, 34, 35, 35, 35, 37, 40, 33, 34, 35, 36, - 36, 36, 38, 41, 33, 34, 35, 36, 36, 36, 38, 41, 33, 34, 35, 36, 36, 36, - 38, 41, - /* Size 16x8 */ - 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 31, 33, 33, 33, 33, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 34, 34, 34, 34, 31, 31, 31, 31, - 31, 31, 31, 31, 32, 32, 32, 33, 34, 35, 35, 35, 31, 31, 31, 31, 31, 31, - 32, 32, 32, 32, 32, 33, 35, 36, 36, 36, 31, 31, 31, 31, 31, 31, 32, 32, - 32, 32, 32, 33, 35, 36, 36, 36, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, - 32, 33, 35, 36, 36, 36, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 35, 36, - 37, 38, 38, 38, 35, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 40, 41, - 41, 41, - /* Size 16x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 35, 37, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 35, 37, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 36, 37, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 32, 33, 35, 36, 38, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 32, 34, 35, 36, 38, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 33, 34, 35, 37, 38, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, - 34, 35, 37, 38, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, - 37, 38, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 37, 38, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 37, 38, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 37, 38, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 37, 38, 31, 31, 31, 31, 31, 32, - 32, 32, 32, 32, 32, 33, 34, 36, 37, 39, 31, 31, 31, 31, 31, 32, 32, 32, - 32, 32, 32, 33, 34, 36, 37, 39, 30, 31, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 33, 34, 36, 38, 39, 30, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, - 35, 36, 38, 40, 30, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 35, 36, - 38, 40, 30, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 35, 36, 38, 40, - 30, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 35, 36, 38, 40, 30, 31, - 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 35, 36, 38, 40, 30, 31, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 33, 35, 36, 38, 40, 31, 31, 31, 32, 32, 33, - 33, 33, 33, 33, 33, 34, 35, 37, 38, 40, 31, 32, 32, 33, 33, 33, 33, 33, - 33, 33, 33, 35, 36, 37, 39, 41, 32, 32, 33, 33, 34, 34, 34, 34, 34, 34, - 34, 35, 37, 38, 40, 41, 33, 33, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, - 37, 39, 40, 42, 33, 34, 34, 35, 35, 36, 36, 36, 36, 36, 36, 37, 38, 40, - 41, 43, 33, 34, 34, 35, 35, 36, 36, 36, 36, 36, 36, 37, 38, 40, 41, 43, - 33, 34, 34, 35, 35, 36, 36, 36, 36, 36, 36, 37, 38, 40, 41, 43, 33, 34, - 34, 35, 35, 36, 36, 36, 36, 36, 36, 37, 38, 40, 41, 43, 33, 34, 34, 35, - 35, 36, 36, 36, 36, 36, 36, 37, 38, 40, 41, 43, 33, 34, 34, 35, 35, 36, - 36, 36, 36, 36, 36, 37, 38, 40, 41, 43, 34, 34, 35, 35, 36, 36, 36, 36, - 36, 36, 36, 38, 39, 40, 42, 44, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, - 30, 30, 30, 31, 31, 32, 33, 33, 33, 33, 33, 33, 33, 34, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 32, 32, 33, 34, 34, 34, 34, 34, 34, 34, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 34, - 34, 34, 34, 34, 34, 35, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33, 34, 35, 35, 35, 35, 35, - 35, 35, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, - 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 35, 35, 35, 35, 36, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, - 35, 36, 36, 36, 36, 36, 36, 36, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 35, 36, 36, 36, - 36, 36, 36, 36, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 33, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, - 33, 34, 35, 36, 36, 36, 36, 36, 36, 36, 32, 32, 32, 32, 32, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 35, 35, 36, 37, - 37, 37, 37, 37, 37, 38, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 36, 37, 37, 38, 38, 38, 38, 38, - 38, 39, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 37, 37, 38, 39, 40, 40, 40, 40, 40, 40, 40, 35, 35, - 36, 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38, - 38, 38, 39, 40, 40, 41, 41, 41, 41, 41, 41, 42, 37, 37, 37, 38, 38, 38, - 38, 38, 38, 38, 38, 38, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 41, 41, - 42, 43, 43, 43, 43, 43, 43, 44, - /* Size 4x16 */ - 31, 31, 31, 34, 31, 31, 31, 34, 31, 31, 31, 35, 31, 31, 31, 35, 31, 31, - 31, 35, 31, 31, 31, 35, 31, 32, 32, 36, 31, 32, 32, 36, 31, 32, 32, 36, - 31, 32, 32, 36, 31, 32, 32, 36, 32, 33, 33, 37, 33, 35, 35, 39, 34, 36, - 36, 40, 34, 36, 36, 40, 34, 36, 36, 40, - /* Size 16x4 */ - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 34, 34, 31, 31, - 31, 31, 31, 31, 32, 32, 32, 32, 32, 33, 35, 36, 36, 36, 31, 31, 31, 31, - 31, 31, 32, 32, 32, 32, 32, 33, 35, 36, 36, 36, 34, 34, 35, 35, 35, 35, - 36, 36, 36, 36, 36, 37, 39, 40, 40, 40, - /* Size 8x32 */ - 32, 31, 31, 31, 31, 31, 33, 35, 31, 31, 31, 31, 31, 31, 33, 35, 31, 31, - 31, 31, 31, 31, 33, 36, 31, 31, 31, 31, 31, 31, 33, 36, 31, 31, 31, 31, - 31, 31, 34, 36, 31, 31, 31, 31, 31, 31, 34, 37, 31, 31, 31, 31, 31, 31, - 34, 37, 31, 31, 31, 31, 31, 31, 34, 37, 31, 31, 31, 31, 31, 31, 34, 37, - 31, 31, 31, 31, 31, 31, 34, 37, 31, 31, 31, 31, 31, 31, 34, 37, 31, 31, - 31, 31, 31, 31, 34, 37, 31, 31, 31, 32, 32, 32, 34, 37, 31, 31, 31, 32, - 32, 32, 34, 37, 30, 31, 31, 32, 32, 32, 34, 38, 30, 31, 32, 32, 32, 32, - 35, 38, 30, 31, 32, 32, 32, 32, 35, 38, 30, 31, 32, 32, 32, 32, 35, 38, - 30, 31, 32, 32, 32, 32, 35, 38, 30, 31, 32, 32, 32, 32, 35, 38, 30, 31, - 32, 32, 32, 32, 35, 38, 31, 31, 32, 33, 33, 33, 35, 38, 31, 32, 33, 33, - 33, 33, 36, 39, 32, 33, 34, 34, 34, 34, 37, 40, 33, 34, 34, 35, 35, 35, - 37, 40, 33, 34, 35, 36, 36, 36, 38, 41, 33, 34, 35, 36, 36, 36, 38, 41, - 33, 34, 35, 36, 36, 36, 38, 41, 33, 34, 35, 36, 36, 36, 38, 41, 33, 34, - 35, 36, 36, 36, 38, 41, 33, 34, 35, 36, 36, 36, 38, 41, 34, 35, 36, 36, - 36, 36, 39, 42, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, - 30, 30, 30, 31, 31, 32, 33, 33, 33, 33, 33, 33, 33, 34, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 32, 33, 34, 34, 34, 34, 34, 34, 34, 35, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, - 35, 35, 35, 35, 35, 36, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 35, 36, 36, 36, 36, 36, - 36, 36, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 33, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 33, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36, 33, 33, 33, 33, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 36, 37, - 37, 38, 38, 38, 38, 38, 38, 39, 35, 35, 36, 36, 36, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 38, 38, 38, 38, 38, 38, 38, 38, 39, 40, 40, 41, 41, 41, - 41, 41, 41, 42 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 31, 31, 31, 31, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, - /* Size 8x8 */ - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, - 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, - 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, - /* Size 16x16 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, - 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 4x8 */ - 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, - 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, - /* Size 8x4 */ - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, - 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, - /* Size 8x16 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 31, 31, 31, 32, - 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, - 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, - 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, - 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, - 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, - 32, 32, - /* Size 16x8 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, - /* Size 16x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 4x16 */ - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 31, 32, - 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, - 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, - 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, - /* Size 16x4 */ - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, - 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, - 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, - 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, - 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, - 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, - 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, - 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, - 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, - 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, - 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, - 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, - 32, 32, 32, 32, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32 }, - { /* Chroma */ - /* Size 4x4 */ - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - /* Size 8x8 */ - 31, 31, 31, 31, 31, 31, 31, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 30, 31, 31, 31, 31, 31, 31, 31, - /* Size 16x16 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 32, - /* Size 32x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, - 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 32, 32, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, - /* Size 4x8 */ - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 30, 31, 32, 32, - /* Size 8x4 */ - 31, 31, 31, 31, 31, 31, 31, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 32, 32, 31, 31, 31, 31, 31, 31, 32, 32, - /* Size 8x16 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 31, 31, 31, 31, - 31, 32, 32, 32, 30, 31, 31, 31, 31, 32, 32, 32, 30, 31, 31, 31, 32, 32, - 32, 32, - /* Size 16x8 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, - 32, 32, - /* Size 16x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, - 30, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 30, 31, - 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 30, 30, 31, 31, - 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 30, 30, 31, 31, 31, 31, - 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 30, 30, 31, 31, 31, 31, 31, 31, - 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 32x16 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 4x16 */ - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 31, 31, - 32, 32, 31, 31, 32, 32, 30, 31, 32, 32, - /* Size 16x4 */ - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, - /* Size 8x32 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 31, 31, 31, 31, 31, 32, - 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, - 30, 31, 31, 31, 31, 32, 32, 32, 30, 31, 31, 31, 31, 32, 32, 32, 30, 31, - 31, 31, 32, 32, 32, 32, 30, 31, 31, 31, 32, 32, 32, 32, 30, 31, 31, 31, - 32, 32, 32, 32, - /* Size 32x8 */ - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, - 32, 32, 32, 32 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x8 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 16x16 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, - /* Size 32x32 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 4x8 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x4 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x16 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, - /* Size 16x8 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, - /* Size 16x32 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 32x16 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 4x16 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 16x4 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x32 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, - /* Size 32x8 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32 }, - { /* Chroma */ - /* Size 4x4 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x8 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 16x16 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, - /* Size 32x32 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 4x8 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x4 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x16 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, - /* Size 16x8 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, - /* Size 16x32 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 32x16 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 4x16 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 16x4 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x32 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, - /* Size 32x8 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32 }, - }, -}; - -static const qm_val_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = { - { - { /* Luma */ - /* Size 4x4 */ - 32, 24, 14, 11, 24, 15, 11, 9, 14, 11, 7, 7, 11, 9, 7, 5, - /* Size 8x8 */ - 32, 32, 27, 20, 15, 12, 11, 9, 32, 29, 26, 21, 16, 13, 12, 10, 27, 26, - 19, 16, 13, 11, 10, 10, 20, 21, 16, 12, 11, 9, 9, 8, 15, 16, 13, 11, 9, - 8, 7, 7, 12, 13, 11, 9, 8, 7, 6, 6, 11, 12, 10, 9, 7, 6, 6, 5, 9, 10, - 10, 8, 7, 6, 5, 5, - /* Size 16x16 */ - 32, 33, 33, 30, 28, 23, 21, 17, 16, 13, 12, 11, 11, 10, 9, 9, 33, 32, - 32, 31, 30, 25, 23, 19, 17, 14, 14, 12, 11, 11, 10, 9, 33, 32, 31, 29, - 28, 24, 23, 19, 17, 14, 14, 13, 12, 11, 10, 10, 30, 31, 29, 26, 24, 22, - 20, 18, 16, 14, 13, 13, 12, 11, 11, 10, 28, 30, 28, 24, 21, 19, 18, 16, - 15, 13, 13, 12, 11, 11, 10, 10, 23, 25, 24, 22, 19, 16, 15, 14, 13, 11, - 11, 11, 10, 10, 9, 9, 21, 23, 23, 20, 18, 15, 14, 13, 12, 11, 10, 10, 9, - 9, 9, 9, 17, 19, 19, 18, 16, 14, 13, 11, 10, 9, 9, 9, 9, 8, 8, 8, 16, - 17, 17, 16, 15, 13, 12, 10, 10, 9, 8, 8, 8, 8, 8, 7, 13, 14, 14, 14, 13, - 11, 11, 9, 9, 8, 7, 7, 7, 7, 7, 7, 12, 14, 14, 13, 13, 11, 10, 9, 8, 7, - 7, 7, 7, 7, 6, 6, 11, 12, 13, 13, 12, 11, 10, 9, 8, 7, 7, 6, 6, 6, 6, 6, - 11, 11, 12, 12, 11, 10, 9, 9, 8, 7, 7, 6, 6, 6, 5, 5, 10, 11, 11, 11, - 11, 10, 9, 8, 8, 7, 7, 6, 6, 5, 5, 5, 9, 10, 10, 11, 10, 9, 9, 8, 8, 7, - 6, 6, 5, 5, 5, 5, 9, 9, 10, 10, 10, 9, 9, 8, 7, 7, 6, 6, 5, 5, 5, 4, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 32, 30, 29, 28, 26, 23, 22, 21, 19, 17, 17, 16, 14, - 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 8, 33, 32, 32, 32, 32, - 32, 30, 30, 29, 27, 24, 23, 22, 20, 18, 17, 17, 15, 13, 13, 13, 12, 12, - 12, 11, 11, 10, 10, 10, 9, 9, 9, 33, 32, 32, 32, 32, 32, 31, 30, 30, 28, - 25, 24, 23, 21, 19, 18, 17, 16, 14, 14, 14, 13, 12, 12, 11, 11, 11, 10, - 10, 9, 9, 9, 33, 32, 32, 32, 31, 31, 30, 29, 29, 27, 25, 24, 23, 21, 19, - 18, 17, 16, 14, 14, 14, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 9, 33, - 32, 32, 31, 31, 30, 29, 28, 28, 26, 24, 23, 23, 20, 19, 18, 17, 16, 14, - 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 32, 32, 32, 31, 30, - 29, 28, 28, 27, 26, 24, 23, 22, 21, 19, 19, 18, 16, 15, 15, 14, 13, 13, - 12, 12, 12, 11, 11, 10, 10, 10, 9, 30, 30, 31, 30, 29, 28, 26, 25, 24, - 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, - 11, 11, 10, 10, 9, 29, 30, 30, 29, 28, 28, 25, 24, 23, 22, 20, 20, 19, - 18, 17, 16, 16, 15, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, - 10, 28, 29, 30, 29, 28, 27, 24, 23, 21, 20, 19, 19, 18, 17, 16, 16, 15, - 14, 13, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, 26, 27, 28, - 27, 26, 26, 23, 22, 20, 19, 18, 17, 17, 16, 15, 14, 14, 13, 12, 12, 12, - 11, 11, 11, 11, 10, 10, 10, 10, 10, 9, 9, 23, 24, 25, 25, 24, 24, 22, - 20, 19, 18, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 11, 11, 11, 11, 10, - 10, 10, 10, 9, 9, 9, 9, 22, 23, 24, 24, 23, 23, 21, 20, 19, 17, 16, 15, - 15, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, - 8, 21, 22, 23, 23, 23, 22, 20, 19, 18, 17, 15, 15, 14, 13, 13, 12, 12, - 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 8, 19, 20, 21, 21, 20, - 21, 19, 18, 17, 16, 14, 14, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, - 9, 9, 9, 9, 9, 8, 8, 8, 17, 18, 19, 19, 19, 19, 18, 17, 16, 15, 14, 13, - 13, 12, 11, 11, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 17, - 17, 18, 18, 18, 19, 17, 16, 16, 14, 13, 13, 12, 12, 11, 10, 10, 10, 9, - 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 16, 17, 17, 17, 17, 18, 16, 16, - 15, 14, 13, 12, 12, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 7, 7, 7, 14, 15, 16, 16, 16, 16, 15, 15, 14, 13, 12, 12, 11, 11, 10, 10, - 9, 9, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 13, 13, 14, 14, 14, 15, - 14, 13, 13, 12, 11, 11, 11, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 13, 13, 14, 14, 14, 15, 14, 13, 13, 12, 11, 11, 11, 10, - 9, 9, 9, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 12, 13, 14, 14, - 14, 14, 13, 13, 13, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 6, 6, 6, 6, 6, 12, 12, 13, 13, 13, 13, 13, 12, 12, 11, 11, 10, 10, - 9, 9, 9, 8, 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 11, 12, 12, 12, - 13, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 11, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 10, 10, - 9, 9, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 11, 11, 11, 12, - 12, 12, 12, 12, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, - 6, 6, 5, 5, 5, 5, 5, 10, 11, 11, 11, 12, 12, 12, 11, 11, 10, 10, 10, 9, - 9, 9, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 10, 10, 11, 11, - 11, 11, 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, - 5, 5, 5, 5, 5, 5, 10, 10, 10, 11, 11, 11, 11, 11, 10, 10, 10, 9, 9, 9, - 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 9, 10, 10, 10, 10, - 10, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, - 5, 5, 5, 5, 5, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 8, 8, 8, 7, - 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 4, 9, 9, 9, 10, 10, 10, 10, - 10, 10, 9, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, - 4, 4, 8, 9, 9, 9, 9, 9, 9, 10, 10, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6, - 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, - /* Size 4x8 */ - 32, 24, 14, 11, 31, 24, 15, 12, 28, 18, 12, 11, 21, 14, 10, 9, 16, 12, - 8, 8, 13, 11, 7, 7, 11, 10, 7, 6, 10, 9, 7, 5, - /* Size 8x4 */ - 32, 31, 28, 21, 16, 13, 11, 10, 24, 24, 18, 14, 12, 11, 10, 9, 14, 15, - 12, 10, 8, 7, 7, 7, 11, 12, 11, 9, 8, 7, 6, 5, - /* Size 8x16 */ - 32, 32, 28, 19, 16, 12, 11, 10, 33, 31, 30, 21, 17, 13, 12, 11, 32, 30, - 28, 20, 17, 13, 12, 12, 30, 28, 24, 19, 16, 13, 13, 12, 28, 27, 21, 17, - 15, 12, 12, 11, 23, 24, 19, 14, 13, 11, 11, 11, 21, 22, 18, 13, 12, 10, - 10, 10, 18, 19, 16, 12, 10, 9, 9, 9, 16, 18, 15, 11, 10, 8, 8, 8, 13, - 15, 13, 10, 9, 7, 8, 8, 12, 14, 13, 10, 8, 7, 7, 7, 11, 13, 12, 10, 8, - 7, 6, 6, 11, 12, 11, 10, 8, 7, 6, 6, 10, 11, 10, 9, 8, 7, 6, 6, 9, 10, - 10, 9, 7, 6, 6, 5, 9, 10, 10, 9, 8, 7, 6, 5, - /* Size 16x8 */ - 32, 33, 32, 30, 28, 23, 21, 18, 16, 13, 12, 11, 11, 10, 9, 9, 32, 31, - 30, 28, 27, 24, 22, 19, 18, 15, 14, 13, 12, 11, 10, 10, 28, 30, 28, 24, - 21, 19, 18, 16, 15, 13, 13, 12, 11, 10, 10, 10, 19, 21, 20, 19, 17, 14, - 13, 12, 11, 10, 10, 10, 10, 9, 9, 9, 16, 17, 17, 16, 15, 13, 12, 10, 10, - 9, 8, 8, 8, 8, 7, 8, 12, 13, 13, 13, 12, 11, 10, 9, 8, 7, 7, 7, 7, 7, 6, - 7, 11, 12, 12, 13, 12, 11, 10, 9, 8, 8, 7, 6, 6, 6, 6, 6, 10, 11, 12, - 12, 11, 11, 10, 9, 8, 8, 7, 6, 6, 6, 5, 5, - /* Size 16x32 */ - 32, 33, 32, 30, 28, 23, 19, 17, 16, 13, 12, 11, 11, 11, 10, 10, 33, 32, - 32, 30, 29, 24, 20, 18, 17, 14, 12, 12, 12, 11, 11, 11, 33, 32, 31, 31, - 30, 25, 21, 19, 17, 14, 13, 12, 12, 11, 11, 11, 33, 32, 31, 30, 29, 25, - 21, 19, 17, 14, 13, 13, 12, 12, 11, 11, 32, 32, 30, 29, 28, 24, 20, 19, - 17, 14, 13, 13, 12, 12, 12, 11, 32, 31, 29, 28, 27, 24, 21, 19, 18, 15, - 14, 13, 12, 12, 12, 11, 30, 30, 28, 26, 24, 21, 19, 18, 16, 14, 13, 13, - 13, 12, 12, 11, 29, 30, 28, 25, 23, 20, 18, 17, 16, 13, 12, 12, 12, 12, - 12, 11, 28, 30, 27, 24, 21, 19, 17, 16, 15, 13, 12, 12, 12, 12, 11, 11, - 26, 28, 26, 23, 20, 18, 16, 15, 14, 12, 12, 12, 11, 11, 11, 11, 23, 25, - 24, 21, 19, 16, 14, 14, 13, 11, 11, 11, 11, 11, 11, 11, 22, 24, 23, 21, - 19, 16, 14, 13, 12, 11, 10, 10, 10, 10, 10, 10, 21, 23, 22, 20, 18, 15, - 13, 13, 12, 11, 10, 10, 10, 10, 10, 10, 19, 21, 20, 19, 17, 14, 12, 12, - 11, 10, 9, 10, 10, 9, 10, 9, 18, 19, 19, 18, 16, 14, 12, 11, 10, 9, 9, - 9, 9, 9, 9, 9, 17, 18, 18, 17, 16, 13, 12, 11, 10, 9, 9, 9, 9, 9, 9, 9, - 16, 17, 18, 16, 15, 13, 11, 10, 10, 9, 8, 8, 8, 8, 8, 8, 14, 16, 16, 15, - 14, 12, 11, 10, 9, 8, 8, 8, 8, 8, 8, 8, 13, 14, 15, 14, 13, 11, 10, 9, - 9, 8, 7, 8, 8, 8, 8, 8, 13, 14, 14, 14, 13, 11, 10, 9, 9, 8, 7, 7, 7, 7, - 7, 7, 12, 14, 14, 13, 13, 11, 10, 9, 8, 8, 7, 7, 7, 7, 7, 7, 12, 13, 13, - 13, 12, 11, 9, 9, 8, 7, 7, 7, 7, 7, 7, 7, 11, 12, 13, 13, 12, 10, 10, 9, - 8, 7, 7, 7, 6, 6, 6, 7, 11, 12, 12, 12, 11, 10, 10, 9, 8, 7, 7, 6, 6, 6, - 6, 6, 11, 12, 12, 12, 11, 10, 10, 8, 8, 7, 7, 6, 6, 6, 6, 6, 10, 11, 12, - 12, 11, 10, 9, 8, 8, 7, 7, 6, 6, 6, 6, 6, 10, 11, 11, 11, 10, 10, 9, 9, - 8, 7, 7, 6, 6, 6, 6, 6, 10, 11, 11, 11, 10, 10, 9, 9, 8, 7, 7, 6, 6, 5, - 5, 5, 9, 10, 10, 11, 10, 9, 9, 8, 7, 7, 6, 6, 6, 5, 5, 5, 9, 10, 10, 10, - 10, 9, 9, 8, 7, 7, 6, 6, 6, 5, 5, 5, 9, 9, 10, 10, 10, 9, 9, 8, 8, 7, 7, - 6, 6, 5, 5, 5, 8, 9, 9, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6, 5, 5, 5, - /* Size 32x16 */ - 32, 33, 33, 33, 32, 32, 30, 29, 28, 26, 23, 22, 21, 19, 18, 17, 16, 14, - 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 8, 33, 32, 32, 32, 32, - 31, 30, 30, 30, 28, 25, 24, 23, 21, 19, 18, 17, 16, 14, 14, 14, 13, 12, - 12, 12, 11, 11, 11, 10, 10, 9, 9, 32, 32, 31, 31, 30, 29, 28, 28, 27, - 26, 24, 23, 22, 20, 19, 18, 18, 16, 15, 14, 14, 13, 13, 12, 12, 12, 11, - 11, 10, 10, 10, 9, 30, 30, 31, 30, 29, 28, 26, 25, 24, 23, 21, 21, 20, - 19, 18, 17, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, - 10, 28, 29, 30, 29, 28, 27, 24, 23, 21, 20, 19, 19, 18, 17, 16, 16, 15, - 14, 13, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 23, 24, 25, - 25, 24, 24, 21, 20, 19, 18, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 11, - 11, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 19, 20, 21, 21, 20, 21, 19, 18, - 17, 16, 14, 14, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 10, 10, 10, 9, 9, - 9, 9, 9, 9, 9, 17, 18, 19, 19, 19, 19, 18, 17, 16, 15, 14, 13, 13, 12, - 11, 11, 10, 10, 9, 9, 9, 9, 9, 9, 8, 8, 9, 9, 8, 8, 8, 8, 16, 17, 17, - 17, 17, 18, 16, 16, 15, 14, 13, 12, 12, 11, 10, 10, 10, 9, 9, 9, 8, 8, - 8, 8, 8, 8, 8, 8, 7, 7, 8, 8, 13, 14, 14, 14, 14, 15, 14, 13, 13, 12, - 11, 11, 11, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 12, 12, 13, 13, 13, 14, 13, 12, 12, 12, 11, 10, 10, 9, 9, 9, 8, 8, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 7, 7, 11, 12, 12, 13, 13, 13, 13, 12, 12, - 12, 11, 10, 10, 10, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 11, 12, 12, 12, 12, 12, 13, 12, 12, 11, 11, 10, 10, 10, 9, 9, 8, 8, - 8, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 11, 11, 11, 12, 12, 12, 12, - 12, 12, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, - 5, 5, 5, 10, 11, 11, 11, 12, 12, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, - 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 10, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 5, - 5, 5, 5, 5, - /* Size 4x16 */ - 33, 23, 13, 11, 32, 25, 14, 11, 32, 24, 14, 12, 30, 21, 14, 12, 30, 19, - 13, 12, 25, 16, 11, 11, 23, 15, 11, 10, 19, 14, 9, 9, 17, 13, 9, 8, 14, - 11, 8, 8, 14, 11, 8, 7, 12, 10, 7, 6, 12, 10, 7, 6, 11, 10, 7, 6, 10, 9, - 7, 5, 9, 9, 7, 5, - /* Size 16x4 */ - 33, 32, 32, 30, 30, 25, 23, 19, 17, 14, 14, 12, 12, 11, 10, 9, 23, 25, - 24, 21, 19, 16, 15, 14, 13, 11, 11, 10, 10, 10, 9, 9, 13, 14, 14, 14, - 13, 11, 11, 9, 9, 8, 8, 7, 7, 7, 7, 7, 11, 11, 12, 12, 12, 11, 10, 9, 8, - 8, 7, 6, 6, 6, 5, 5, - /* Size 8x32 */ - 32, 32, 28, 19, 16, 12, 11, 10, 33, 32, 29, 20, 17, 12, 12, 11, 33, 31, - 30, 21, 17, 13, 12, 11, 33, 31, 29, 21, 17, 13, 12, 11, 32, 30, 28, 20, - 17, 13, 12, 12, 32, 29, 27, 21, 18, 14, 12, 12, 30, 28, 24, 19, 16, 13, - 13, 12, 29, 28, 23, 18, 16, 12, 12, 12, 28, 27, 21, 17, 15, 12, 12, 11, - 26, 26, 20, 16, 14, 12, 11, 11, 23, 24, 19, 14, 13, 11, 11, 11, 22, 23, - 19, 14, 12, 10, 10, 10, 21, 22, 18, 13, 12, 10, 10, 10, 19, 20, 17, 12, - 11, 9, 10, 10, 18, 19, 16, 12, 10, 9, 9, 9, 17, 18, 16, 12, 10, 9, 9, 9, - 16, 18, 15, 11, 10, 8, 8, 8, 14, 16, 14, 11, 9, 8, 8, 8, 13, 15, 13, 10, - 9, 7, 8, 8, 13, 14, 13, 10, 9, 7, 7, 7, 12, 14, 13, 10, 8, 7, 7, 7, 12, - 13, 12, 9, 8, 7, 7, 7, 11, 13, 12, 10, 8, 7, 6, 6, 11, 12, 11, 10, 8, 7, - 6, 6, 11, 12, 11, 10, 8, 7, 6, 6, 10, 12, 11, 9, 8, 7, 6, 6, 10, 11, 10, - 9, 8, 7, 6, 6, 10, 11, 10, 9, 8, 7, 6, 5, 9, 10, 10, 9, 7, 6, 6, 5, 9, - 10, 10, 9, 7, 6, 6, 5, 9, 10, 10, 9, 8, 7, 6, 5, 8, 9, 10, 9, 8, 7, 6, - 5, - /* Size 32x8 */ - 32, 33, 33, 33, 32, 32, 30, 29, 28, 26, 23, 22, 21, 19, 18, 17, 16, 14, - 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 8, 32, 32, 31, 31, 30, - 29, 28, 28, 27, 26, 24, 23, 22, 20, 19, 18, 18, 16, 15, 14, 14, 13, 13, - 12, 12, 12, 11, 11, 10, 10, 10, 9, 28, 29, 30, 29, 28, 27, 24, 23, 21, - 20, 19, 19, 18, 17, 16, 16, 15, 14, 13, 13, 13, 12, 12, 11, 11, 11, 10, - 10, 10, 10, 10, 10, 19, 20, 21, 21, 20, 21, 19, 18, 17, 16, 14, 14, 13, - 12, 12, 12, 11, 11, 10, 10, 10, 9, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 16, - 17, 17, 17, 17, 18, 16, 16, 15, 14, 13, 12, 12, 11, 10, 10, 10, 9, 9, 9, - 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 8, 8, 12, 12, 13, 13, 13, 14, 13, 12, 12, - 12, 11, 10, 10, 9, 9, 9, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 7, 7, - 11, 12, 12, 12, 12, 12, 13, 12, 12, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, - 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 11, 11, 11, 12, 12, 12, 12, - 11, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, - 5, 5 }, - { /* Chroma */ - /* Size 4x4 */ - 29, 22, 18, 16, 22, 17, 15, 14, 18, 15, 11, 11, 16, 14, 11, 9, - /* Size 8x8 */ - 33, 27, 22, 20, 18, 16, 15, 14, 27, 22, 22, 22, 20, 18, 17, 15, 22, 22, - 19, 18, 17, 16, 15, 15, 20, 22, 18, 16, 14, 13, 14, 14, 18, 20, 17, 14, - 12, 12, 12, 12, 16, 18, 16, 13, 12, 11, 11, 11, 15, 17, 15, 14, 12, 11, - 10, 10, 14, 15, 15, 14, 12, 11, 10, 9, - /* Size 16x16 */ - 32, 34, 31, 25, 21, 21, 20, 19, 18, 16, 16, 15, 15, 14, 14, 13, 34, 32, - 29, 24, 22, 23, 22, 21, 20, 18, 18, 17, 16, 15, 15, 14, 31, 29, 26, 23, - 22, 23, 22, 21, 20, 18, 18, 17, 17, 16, 16, 15, 25, 24, 23, 21, 20, 21, - 20, 20, 19, 18, 18, 17, 17, 17, 16, 15, 21, 22, 22, 20, 19, 19, 19, 19, - 18, 17, 17, 16, 16, 16, 16, 16, 21, 23, 23, 21, 19, 18, 17, 17, 16, 15, - 15, 15, 15, 15, 15, 15, 20, 22, 22, 20, 19, 17, 17, 16, 15, 14, 14, 14, - 14, 14, 14, 14, 19, 21, 21, 20, 19, 17, 16, 14, 14, 13, 13, 13, 13, 13, - 13, 13, 18, 20, 20, 19, 18, 16, 15, 14, 13, 12, 12, 12, 12, 12, 12, 12, - 16, 18, 18, 18, 17, 15, 14, 13, 12, 12, 11, 11, 12, 12, 12, 12, 16, 18, - 18, 18, 17, 15, 14, 13, 12, 11, 11, 11, 11, 11, 11, 11, 15, 17, 17, 17, - 16, 15, 14, 13, 12, 11, 11, 10, 10, 10, 10, 10, 15, 16, 17, 17, 16, 15, - 14, 13, 12, 12, 11, 10, 10, 10, 10, 10, 14, 15, 16, 17, 16, 15, 14, 13, - 12, 12, 11, 10, 10, 10, 9, 9, 14, 15, 16, 16, 16, 15, 14, 13, 12, 12, - 11, 10, 10, 9, 9, 9, 13, 14, 15, 15, 16, 15, 14, 13, 12, 12, 11, 10, 10, - 9, 9, 9, - /* Size 32x32 */ - 32, 33, 34, 32, 31, 28, 25, 23, 21, 21, 21, 20, 20, 20, 19, 18, 18, 17, - 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 33, 33, 33, 31, - 30, 27, 24, 23, 22, 22, 22, 22, 21, 20, 20, 19, 19, 18, 17, 17, 17, 16, - 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 34, 33, 32, 31, 29, 26, 24, 23, - 22, 23, 23, 23, 22, 22, 21, 20, 20, 19, 18, 18, 18, 17, 17, 16, 16, 16, - 15, 15, 15, 14, 14, 14, 32, 31, 31, 29, 28, 25, 24, 23, 22, 22, 23, 22, - 22, 22, 21, 20, 20, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, - 15, 15, 31, 30, 29, 28, 26, 24, 23, 22, 22, 22, 23, 22, 22, 22, 21, 20, - 20, 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 28, 27, - 26, 25, 24, 22, 22, 22, 21, 22, 23, 22, 22, 22, 21, 21, 20, 20, 19, 19, - 19, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 15, 25, 24, 24, 24, 23, 22, - 21, 21, 20, 21, 21, 21, 20, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, - 17, 17, 17, 16, 16, 16, 15, 15, 23, 23, 23, 23, 22, 22, 21, 20, 20, 20, - 20, 20, 20, 20, 19, 19, 19, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, - 16, 16, 16, 16, 21, 22, 22, 22, 22, 21, 20, 20, 19, 19, 19, 19, 19, 19, - 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 21, 22, 23, 22, 22, 22, 21, 20, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, - 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 21, 22, 23, 23, - 23, 23, 21, 20, 19, 19, 18, 17, 17, 17, 17, 16, 16, 16, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 20, 22, 23, 22, 22, 22, 21, 20, - 19, 18, 17, 17, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 14, 14, 15, 15, - 14, 14, 14, 14, 14, 14, 20, 21, 22, 22, 22, 22, 20, 20, 19, 18, 17, 17, - 17, 16, 16, 16, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 20, 20, 22, 22, 22, 22, 20, 20, 19, 18, 17, 17, 16, 16, 15, 15, - 15, 14, 14, 14, 14, 13, 14, 14, 13, 14, 14, 13, 14, 14, 13, 13, 19, 20, - 21, 21, 21, 21, 20, 19, 19, 18, 17, 16, 16, 15, 14, 14, 14, 14, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 18, 19, 20, 20, 20, 21, - 20, 19, 18, 17, 16, 16, 16, 15, 14, 14, 14, 13, 13, 13, 13, 13, 12, 13, - 13, 13, 13, 13, 13, 13, 13, 12, 18, 19, 20, 20, 20, 20, 19, 19, 18, 17, - 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 17, 18, 19, 19, 19, 20, 19, 18, 18, 17, 16, 15, 15, 14, - 14, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 16, 17, 18, 18, 18, 19, 18, 17, 17, 16, 15, 15, 14, 14, 13, 13, 12, 12, - 12, 12, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 17, 18, 18, - 18, 19, 18, 17, 17, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 16, 17, 18, 18, 18, 19, 18, 17, - 17, 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 15, 16, 17, 17, 17, 18, 17, 17, 16, 16, 15, 15, - 14, 13, 13, 13, 12, 12, 11, 11, 11, 11, 11, 11, 10, 11, 11, 11, 11, 11, - 11, 11, 15, 16, 17, 17, 17, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 12, - 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 15, 16, - 16, 17, 17, 17, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, - 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15, 16, 16, 16, 17, 17, - 17, 16, 16, 16, 15, 15, 14, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 14, 15, 16, 16, 16, 17, 17, 16, 16, 15, - 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 14, 15, 15, 16, 16, 16, 17, 16, 16, 15, 15, 14, 14, 14, - 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 9, 9, 9, 10, 14, - 15, 15, 16, 16, 16, 16, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, - 11, 11, 11, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 14, 15, 15, 15, 16, 16, - 16, 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, - 10, 10, 9, 9, 9, 9, 9, 9, 14, 14, 14, 15, 15, 15, 16, 16, 16, 15, 15, - 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, - 9, 9, 13, 14, 14, 15, 15, 15, 15, 16, 16, 15, 15, 14, 14, 13, 13, 13, - 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 13, 14, 14, - 15, 15, 15, 15, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, - 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9, - /* Size 4x8 */ - 33, 22, 17, 16, 26, 23, 19, 17, 22, 18, 16, 16, 21, 17, 14, 14, 19, 16, - 12, 12, 17, 15, 11, 11, 16, 15, 11, 10, 15, 14, 12, 10, - /* Size 8x4 */ - 33, 26, 22, 21, 19, 17, 16, 15, 22, 23, 18, 17, 16, 15, 15, 14, 17, 19, - 16, 14, 12, 11, 11, 12, 16, 17, 16, 14, 12, 11, 10, 10, - /* Size 8x16 */ - 32, 28, 21, 20, 18, 16, 15, 14, 34, 26, 22, 21, 20, 17, 16, 16, 31, 24, - 22, 22, 20, 17, 17, 16, 24, 22, 20, 20, 19, 17, 17, 17, 21, 21, 19, 19, - 18, 17, 17, 17, 21, 22, 19, 17, 16, 15, 16, 16, 20, 22, 19, 16, 15, 14, - 14, 15, 19, 21, 19, 15, 14, 13, 13, 14, 18, 20, 18, 15, 13, 12, 13, 13, - 16, 19, 17, 14, 12, 11, 12, 12, 16, 18, 17, 14, 12, 11, 11, 12, 15, 17, - 16, 14, 12, 11, 10, 11, 15, 17, 16, 14, 12, 11, 10, 10, 14, 16, 16, 14, - 12, 11, 10, 10, 14, 15, 16, 14, 12, 11, 10, 10, 13, 15, 15, 14, 12, 11, - 10, 9, - /* Size 16x8 */ - 32, 34, 31, 24, 21, 21, 20, 19, 18, 16, 16, 15, 15, 14, 14, 13, 28, 26, - 24, 22, 21, 22, 22, 21, 20, 19, 18, 17, 17, 16, 15, 15, 21, 22, 22, 20, - 19, 19, 19, 19, 18, 17, 17, 16, 16, 16, 16, 15, 20, 21, 22, 20, 19, 17, - 16, 15, 15, 14, 14, 14, 14, 14, 14, 14, 18, 20, 20, 19, 18, 16, 15, 14, - 13, 12, 12, 12, 12, 12, 12, 12, 16, 17, 17, 17, 17, 15, 14, 13, 12, 11, - 11, 11, 11, 11, 11, 11, 15, 16, 17, 17, 17, 16, 14, 13, 13, 12, 11, 10, - 10, 10, 10, 10, 14, 16, 16, 17, 17, 16, 15, 14, 13, 12, 12, 11, 10, 10, - 10, 9, - /* Size 16x32 */ - 32, 33, 28, 24, 21, 21, 20, 19, 18, 16, 16, 15, 15, 15, 14, 14, 33, 33, - 27, 24, 22, 22, 20, 20, 19, 17, 16, 16, 16, 16, 15, 15, 34, 32, 26, 24, - 22, 23, 21, 20, 20, 18, 17, 17, 16, 16, 16, 15, 32, 30, 25, 23, 22, 23, - 21, 21, 20, 18, 17, 17, 17, 16, 16, 16, 31, 28, 24, 23, 22, 22, 22, 21, - 20, 18, 17, 17, 17, 17, 16, 16, 28, 26, 22, 22, 22, 23, 22, 21, 20, 19, - 18, 18, 17, 17, 17, 16, 24, 24, 22, 21, 20, 21, 20, 20, 19, 18, 17, 18, - 17, 17, 17, 16, 23, 23, 22, 21, 20, 20, 20, 19, 19, 17, 17, 17, 17, 17, - 17, 17, 21, 22, 21, 20, 19, 19, 19, 19, 18, 17, 17, 16, 17, 16, 17, 17, - 21, 22, 22, 20, 19, 18, 18, 17, 17, 16, 16, 16, 16, 16, 16, 16, 21, 23, - 22, 21, 19, 18, 17, 17, 16, 15, 15, 15, 16, 16, 16, 16, 21, 22, 22, 21, - 19, 17, 17, 16, 16, 15, 14, 15, 15, 15, 15, 15, 20, 22, 22, 20, 19, 17, - 16, 16, 15, 14, 14, 14, 14, 15, 15, 15, 20, 21, 22, 20, 19, 17, 16, 15, - 14, 14, 13, 14, 14, 14, 14, 14, 19, 20, 21, 20, 19, 17, 15, 14, 14, 13, - 13, 13, 13, 14, 14, 14, 19, 20, 21, 20, 18, 16, 15, 14, 14, 13, 12, 13, - 13, 13, 13, 13, 18, 20, 20, 19, 18, 16, 15, 14, 13, 12, 12, 12, 13, 13, - 13, 13, 17, 19, 20, 19, 18, 16, 14, 14, 13, 12, 12, 12, 12, 12, 13, 13, - 16, 18, 19, 18, 17, 15, 14, 13, 12, 12, 11, 12, 12, 12, 12, 13, 16, 18, - 19, 18, 17, 15, 14, 13, 12, 12, 11, 11, 12, 12, 12, 12, 16, 17, 18, 18, - 17, 15, 14, 13, 12, 11, 11, 11, 11, 11, 12, 12, 15, 17, 18, 17, 16, 15, - 13, 13, 12, 11, 11, 11, 11, 11, 11, 11, 15, 17, 17, 17, 16, 14, 14, 13, - 12, 11, 11, 11, 10, 11, 11, 11, 15, 17, 17, 17, 16, 15, 14, 13, 12, 12, - 11, 10, 10, 10, 11, 11, 15, 16, 17, 17, 16, 15, 14, 13, 12, 12, 11, 11, - 10, 10, 10, 11, 14, 16, 16, 17, 15, 15, 14, 13, 12, 11, 11, 10, 10, 10, - 10, 10, 14, 16, 16, 17, 16, 15, 14, 13, 12, 12, 11, 10, 10, 10, 10, 10, - 14, 16, 16, 16, 16, 15, 14, 13, 12, 12, 11, 10, 10, 10, 10, 10, 14, 15, - 15, 16, 16, 15, 14, 13, 12, 12, 11, 11, 10, 10, 10, 10, 14, 15, 15, 16, - 16, 14, 14, 13, 12, 12, 11, 11, 10, 10, 9, 9, 13, 15, 15, 16, 15, 14, - 14, 13, 12, 12, 11, 11, 10, 10, 9, 9, 13, 15, 15, 15, 15, 14, 14, 13, - 13, 11, 11, 10, 10, 9, 9, 9, - /* Size 32x16 */ - 32, 33, 34, 32, 31, 28, 24, 23, 21, 21, 21, 21, 20, 20, 19, 19, 18, 17, - 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 33, 33, 32, 30, - 28, 26, 24, 23, 22, 22, 23, 22, 22, 21, 20, 20, 20, 19, 18, 18, 17, 17, - 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 28, 27, 26, 25, 24, 22, 22, 22, - 21, 22, 22, 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, - 16, 16, 15, 15, 15, 15, 24, 24, 24, 23, 23, 22, 21, 21, 20, 20, 21, 21, - 20, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, - 16, 15, 21, 22, 22, 22, 22, 22, 20, 20, 19, 19, 19, 19, 19, 19, 19, 18, - 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 16, 16, 16, 16, 15, 15, 21, 22, - 23, 23, 22, 23, 21, 20, 19, 18, 18, 17, 17, 17, 17, 16, 16, 16, 15, 15, - 15, 15, 14, 15, 15, 15, 15, 15, 15, 14, 14, 14, 20, 20, 21, 21, 22, 22, - 20, 20, 19, 18, 17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 14, 13, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 19, 20, 20, 21, 21, 21, 20, 19, 19, 17, - 17, 16, 16, 15, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 18, 19, 20, 20, 20, 20, 19, 19, 18, 17, 16, 16, 15, 14, - 14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, - 16, 17, 18, 18, 18, 19, 18, 17, 17, 16, 15, 15, 14, 14, 13, 13, 12, 12, - 12, 12, 11, 11, 11, 12, 12, 11, 12, 12, 12, 12, 12, 11, 16, 16, 17, 17, - 17, 18, 17, 17, 17, 16, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 15, 16, 17, 17, 17, 18, 18, 17, - 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 10, 11, 10, - 10, 10, 11, 11, 11, 10, 15, 16, 16, 17, 17, 17, 17, 17, 17, 16, 16, 15, - 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 15, 16, 16, 16, 17, 17, 17, 17, 16, 16, 16, 15, 15, 14, 14, 13, - 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 9, 14, 15, - 16, 16, 16, 17, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, - 12, 11, 11, 11, 10, 10, 10, 10, 10, 9, 9, 9, 14, 15, 15, 16, 16, 16, 16, - 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 11, 11, 11, 11, - 10, 10, 10, 10, 9, 9, 9, - /* Size 4x16 */ - 33, 21, 16, 15, 32, 23, 18, 16, 28, 22, 18, 17, 24, 21, 18, 17, 22, 19, - 17, 16, 23, 18, 15, 16, 22, 17, 14, 15, 20, 17, 13, 14, 20, 16, 12, 13, - 18, 15, 12, 12, 17, 15, 11, 11, 17, 14, 11, 11, 16, 15, 12, 10, 16, 15, - 12, 10, 15, 15, 12, 10, 15, 14, 12, 10, - /* Size 16x4 */ - 33, 32, 28, 24, 22, 23, 22, 20, 20, 18, 17, 17, 16, 16, 15, 15, 21, 23, - 22, 21, 19, 18, 17, 17, 16, 15, 15, 14, 15, 15, 15, 14, 16, 18, 18, 18, - 17, 15, 14, 13, 12, 12, 11, 11, 12, 12, 12, 12, 15, 16, 17, 17, 16, 16, - 15, 14, 13, 12, 11, 11, 10, 10, 10, 10, - /* Size 8x32 */ - 32, 28, 21, 20, 18, 16, 15, 14, 33, 27, 22, 20, 19, 16, 16, 15, 34, 26, - 22, 21, 20, 17, 16, 16, 32, 25, 22, 21, 20, 17, 17, 16, 31, 24, 22, 22, - 20, 17, 17, 16, 28, 22, 22, 22, 20, 18, 17, 17, 24, 22, 20, 20, 19, 17, - 17, 17, 23, 22, 20, 20, 19, 17, 17, 17, 21, 21, 19, 19, 18, 17, 17, 17, - 21, 22, 19, 18, 17, 16, 16, 16, 21, 22, 19, 17, 16, 15, 16, 16, 21, 22, - 19, 17, 16, 14, 15, 15, 20, 22, 19, 16, 15, 14, 14, 15, 20, 22, 19, 16, - 14, 13, 14, 14, 19, 21, 19, 15, 14, 13, 13, 14, 19, 21, 18, 15, 14, 12, - 13, 13, 18, 20, 18, 15, 13, 12, 13, 13, 17, 20, 18, 14, 13, 12, 12, 13, - 16, 19, 17, 14, 12, 11, 12, 12, 16, 19, 17, 14, 12, 11, 12, 12, 16, 18, - 17, 14, 12, 11, 11, 12, 15, 18, 16, 13, 12, 11, 11, 11, 15, 17, 16, 14, - 12, 11, 10, 11, 15, 17, 16, 14, 12, 11, 10, 11, 15, 17, 16, 14, 12, 11, - 10, 10, 14, 16, 15, 14, 12, 11, 10, 10, 14, 16, 16, 14, 12, 11, 10, 10, - 14, 16, 16, 14, 12, 11, 10, 10, 14, 15, 16, 14, 12, 11, 10, 10, 14, 15, - 16, 14, 12, 11, 10, 9, 13, 15, 15, 14, 12, 11, 10, 9, 13, 15, 15, 14, - 13, 11, 10, 9, - /* Size 32x8 */ - 32, 33, 34, 32, 31, 28, 24, 23, 21, 21, 21, 21, 20, 20, 19, 19, 18, 17, - 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 28, 27, 26, 25, - 24, 22, 22, 22, 21, 22, 22, 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, - 17, 17, 17, 16, 16, 16, 15, 15, 15, 15, 21, 22, 22, 22, 22, 22, 20, 20, - 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, - 16, 16, 16, 16, 15, 15, 20, 20, 21, 21, 22, 22, 20, 20, 19, 18, 17, 17, - 16, 16, 15, 15, 15, 14, 14, 14, 14, 13, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 18, 19, 20, 20, 20, 20, 19, 19, 18, 17, 16, 16, 15, 14, 14, 14, - 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 16, 16, - 17, 17, 17, 18, 17, 17, 17, 16, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 15, 16, 16, 17, 17, 17, - 17, 17, 17, 16, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 14, 15, 16, 16, 16, 17, 17, 17, 17, 16, - 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, - 10, 9, 9, 9 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 25, 15, 11, 25, 16, 12, 10, 15, 12, 8, 7, 11, 10, 7, 6, - /* Size 8x8 */ - 32, 32, 28, 22, 17, 13, 11, 10, 32, 29, 26, 22, 18, 14, 12, 11, 28, 26, - 20, 17, 14, 12, 11, 10, 22, 22, 17, 14, 12, 10, 10, 9, 17, 18, 14, 12, - 10, 8, 8, 8, 13, 14, 12, 10, 8, 7, 7, 7, 11, 12, 11, 10, 8, 7, 6, 6, 10, - 11, 10, 9, 8, 7, 6, 5, - /* Size 16x16 */ - 32, 33, 33, 32, 28, 26, 22, 19, 17, 14, 13, 12, 11, 10, 10, 9, 33, 32, - 32, 31, 30, 28, 23, 20, 18, 16, 14, 13, 12, 11, 10, 10, 33, 32, 31, 30, - 28, 26, 23, 20, 18, 16, 14, 13, 12, 12, 11, 10, 32, 31, 30, 28, 26, 24, - 22, 20, 18, 16, 14, 13, 13, 12, 11, 10, 28, 30, 28, 26, 21, 20, 18, 17, - 16, 14, 13, 12, 12, 11, 11, 10, 26, 28, 26, 24, 20, 19, 17, 16, 15, 13, - 12, 12, 11, 11, 10, 10, 22, 23, 23, 22, 18, 17, 15, 14, 13, 12, 11, 10, - 10, 10, 9, 9, 19, 20, 20, 20, 17, 16, 14, 12, 12, 11, 10, 9, 9, 9, 9, 8, - 17, 18, 18, 18, 16, 15, 13, 12, 11, 10, 9, 9, 8, 8, 8, 8, 14, 16, 16, - 16, 14, 13, 12, 11, 10, 9, 8, 8, 8, 8, 8, 7, 13, 14, 14, 14, 13, 12, 11, - 10, 9, 8, 8, 7, 7, 7, 7, 7, 12, 13, 13, 13, 12, 12, 10, 9, 9, 8, 7, 7, - 7, 7, 6, 6, 11, 12, 12, 13, 12, 11, 10, 9, 8, 8, 7, 7, 6, 6, 6, 6, 10, - 11, 12, 12, 11, 11, 10, 9, 8, 8, 7, 7, 6, 6, 6, 5, 10, 10, 11, 11, 11, - 10, 9, 9, 8, 8, 7, 6, 6, 6, 5, 5, 9, 10, 10, 10, 10, 10, 9, 8, 8, 7, 7, - 6, 6, 5, 5, 5, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 32, 32, 30, 28, 27, 26, 23, 22, 21, 19, 17, 17, 16, - 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 33, 32, 32, 32, 32, - 32, 31, 30, 29, 28, 27, 24, 23, 22, 20, 18, 18, 17, 15, 14, 13, 13, 12, - 12, 12, 11, 11, 11, 10, 10, 10, 9, 33, 32, 32, 32, 32, 32, 31, 31, 30, - 28, 28, 25, 23, 22, 20, 19, 18, 17, 16, 15, 14, 13, 13, 12, 12, 12, 11, - 11, 10, 10, 10, 9, 33, 32, 32, 32, 32, 31, 31, 30, 29, 28, 27, 25, 23, - 23, 21, 19, 18, 17, 16, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, - 10, 33, 32, 32, 32, 31, 30, 30, 29, 28, 27, 26, 24, 23, 22, 20, 19, 18, - 17, 16, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 32, 32, 32, - 31, 30, 29, 28, 28, 27, 26, 26, 24, 23, 22, 21, 19, 19, 18, 16, 16, 15, - 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 32, 31, 31, 31, 30, 28, 28, - 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 14, 13, 13, 13, - 12, 12, 12, 11, 11, 10, 10, 30, 30, 31, 30, 29, 28, 27, 26, 24, 23, 23, - 22, 20, 20, 19, 18, 17, 16, 15, 14, 14, 13, 13, 12, 12, 12, 12, 12, 11, - 11, 11, 10, 28, 29, 30, 29, 28, 27, 26, 24, 21, 20, 20, 19, 18, 18, 17, - 16, 16, 15, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 27, - 28, 28, 28, 27, 26, 25, 23, 20, 20, 20, 18, 18, 17, 16, 15, 15, 14, 13, - 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 10, 10, 10, 26, 27, 28, 27, 26, - 26, 24, 23, 20, 20, 19, 18, 17, 17, 16, 15, 15, 14, 13, 13, 12, 12, 12, - 11, 11, 11, 11, 10, 10, 10, 10, 10, 23, 24, 25, 25, 24, 24, 23, 22, 19, - 18, 18, 16, 16, 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 11, 10, 10, - 10, 10, 10, 9, 9, 22, 23, 23, 23, 23, 23, 22, 20, 18, 18, 17, 16, 15, - 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, - 21, 22, 22, 23, 22, 22, 21, 20, 18, 17, 17, 15, 14, 14, 13, 13, 12, 12, - 11, 11, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 19, 20, 20, 21, 20, - 21, 20, 19, 17, 16, 16, 14, 14, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, - 9, 9, 9, 9, 9, 9, 8, 8, 8, 17, 18, 19, 19, 19, 19, 19, 18, 16, 15, 15, - 14, 13, 13, 12, 11, 11, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, - 17, 18, 18, 18, 18, 19, 18, 17, 16, 15, 15, 13, 13, 12, 12, 11, 11, 10, - 10, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 16, 17, 17, 17, 17, 18, 17, - 16, 15, 14, 14, 13, 12, 12, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 7, 14, 15, 16, 16, 16, 16, 16, 15, 14, 13, 13, 12, 12, 11, - 11, 10, 10, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 13, 14, 15, 15, - 15, 16, 15, 14, 13, 13, 13, 12, 11, 11, 10, 10, 9, 9, 8, 8, 8, 8, 8, 8, - 7, 7, 7, 7, 7, 7, 7, 7, 13, 13, 14, 14, 14, 15, 14, 14, 13, 12, 12, 11, - 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 13, - 13, 14, 14, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 8, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 12, 12, 13, 13, 13, 14, 13, 13, 12, 12, - 12, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 6, 7, 6, 6, 6, 6, 6, - 12, 12, 12, 13, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10, 9, 9, 9, 8, 8, - 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 11, 12, 12, 12, 12, 13, 13, 12, - 12, 11, 11, 11, 10, 10, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, - 6, 6, 11, 11, 12, 12, 12, 12, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 8, - 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 10, 11, 11, 12, 12, 12, 12, - 12, 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, - 5, 5, 5, 10, 11, 11, 11, 11, 11, 12, 12, 11, 11, 10, 10, 10, 9, 9, 8, 8, - 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 10, 10, 10, 11, 11, 11, 11, - 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 6, 5, - 5, 5, 5, 9, 10, 10, 10, 11, 11, 11, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, - 8, 7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 9, 10, 10, 10, 10, 10, 10, - 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, - 5, 5, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, - 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, - /* Size 4x8 */ - 32, 24, 15, 12, 31, 24, 16, 12, 28, 18, 13, 12, 22, 15, 11, 10, 17, 13, - 9, 8, 14, 11, 8, 7, 12, 11, 8, 6, 10, 10, 8, 6, - /* Size 8x4 */ - 32, 31, 28, 22, 17, 14, 12, 10, 24, 24, 18, 15, 13, 11, 11, 10, 15, 16, - 13, 11, 9, 8, 8, 8, 12, 12, 12, 10, 8, 7, 6, 6, - /* Size 8x16 */ - 32, 32, 28, 22, 16, 13, 11, 11, 33, 32, 29, 23, 17, 14, 12, 11, 32, 30, - 28, 23, 17, 14, 13, 12, 32, 29, 26, 22, 17, 14, 13, 12, 28, 28, 21, 18, - 15, 13, 12, 12, 26, 26, 20, 17, 14, 12, 11, 11, 22, 23, 18, 15, 12, 11, - 10, 10, 19, 20, 17, 14, 11, 10, 9, 9, 17, 18, 16, 13, 10, 9, 9, 9, 14, - 16, 14, 12, 9, 8, 8, 8, 13, 15, 13, 11, 9, 8, 7, 7, 12, 13, 12, 10, 8, - 7, 7, 7, 11, 12, 12, 10, 8, 7, 7, 6, 10, 12, 11, 9, 8, 7, 6, 6, 10, 11, - 11, 9, 8, 7, 6, 6, 9, 10, 10, 9, 8, 7, 6, 5, - /* Size 16x8 */ - 32, 33, 32, 32, 28, 26, 22, 19, 17, 14, 13, 12, 11, 10, 10, 9, 32, 32, - 30, 29, 28, 26, 23, 20, 18, 16, 15, 13, 12, 12, 11, 10, 28, 29, 28, 26, - 21, 20, 18, 17, 16, 14, 13, 12, 12, 11, 11, 10, 22, 23, 23, 22, 18, 17, - 15, 14, 13, 12, 11, 10, 10, 9, 9, 9, 16, 17, 17, 17, 15, 14, 12, 11, 10, - 9, 9, 8, 8, 8, 8, 8, 13, 14, 14, 14, 13, 12, 11, 10, 9, 8, 8, 7, 7, 7, - 7, 7, 11, 12, 13, 13, 12, 11, 10, 9, 9, 8, 7, 7, 7, 6, 6, 6, 11, 11, 12, - 12, 12, 11, 10, 9, 9, 8, 7, 7, 6, 6, 6, 5, - /* Size 16x32 */ - 32, 33, 32, 32, 28, 23, 22, 19, 16, 14, 13, 12, 11, 11, 11, 10, 33, 32, - 32, 31, 29, 24, 23, 20, 17, 15, 14, 12, 12, 12, 11, 11, 33, 32, 32, 31, - 29, 25, 23, 21, 17, 15, 14, 13, 12, 12, 11, 11, 33, 32, 31, 31, 29, 25, - 23, 21, 17, 16, 14, 13, 12, 12, 12, 11, 32, 32, 30, 30, 28, 24, 23, 20, - 17, 16, 14, 13, 13, 12, 12, 11, 32, 31, 29, 28, 27, 24, 23, 21, 18, 16, - 15, 13, 13, 12, 12, 12, 32, 31, 29, 28, 26, 23, 22, 20, 17, 16, 14, 13, - 13, 13, 12, 12, 30, 30, 28, 27, 24, 21, 20, 19, 16, 15, 14, 13, 12, 13, - 12, 12, 28, 30, 28, 26, 21, 19, 18, 17, 15, 14, 13, 12, 12, 12, 12, 12, - 27, 28, 26, 25, 21, 18, 18, 16, 14, 13, 13, 12, 12, 12, 11, 11, 26, 28, - 26, 24, 20, 18, 17, 16, 14, 13, 12, 11, 11, 11, 11, 11, 23, 25, 24, 23, - 19, 16, 16, 14, 13, 12, 11, 11, 11, 11, 11, 10, 22, 23, 23, 22, 18, 16, - 15, 14, 12, 11, 11, 10, 10, 10, 10, 10, 21, 22, 22, 21, 18, 15, 14, 13, - 12, 11, 11, 10, 10, 10, 10, 10, 19, 21, 20, 20, 17, 14, 14, 12, 11, 10, - 10, 9, 9, 10, 9, 10, 18, 19, 19, 19, 16, 14, 13, 12, 10, 10, 9, 9, 9, 9, - 9, 9, 17, 18, 18, 18, 16, 13, 13, 12, 10, 10, 9, 9, 9, 9, 9, 9, 16, 17, - 17, 17, 15, 13, 12, 11, 10, 9, 9, 8, 8, 8, 8, 8, 14, 16, 16, 16, 14, 12, - 12, 11, 9, 9, 8, 8, 8, 8, 8, 8, 13, 15, 15, 15, 13, 12, 11, 10, 9, 8, 8, - 8, 8, 8, 8, 8, 13, 14, 15, 14, 13, 11, 11, 10, 9, 8, 8, 7, 7, 7, 7, 8, - 12, 14, 14, 14, 13, 11, 11, 10, 8, 8, 8, 7, 7, 7, 7, 7, 12, 13, 13, 13, - 12, 11, 10, 9, 8, 8, 7, 7, 7, 7, 7, 7, 12, 13, 13, 13, 12, 11, 10, 9, 8, - 8, 7, 7, 7, 7, 7, 6, 11, 12, 12, 13, 12, 11, 10, 9, 8, 8, 7, 7, 7, 6, 6, - 6, 11, 12, 12, 12, 11, 11, 10, 9, 9, 8, 7, 7, 6, 6, 6, 6, 10, 12, 12, - 12, 11, 11, 9, 9, 8, 8, 7, 6, 6, 6, 6, 6, 10, 11, 11, 12, 11, 10, 9, 9, - 8, 8, 7, 6, 6, 6, 6, 6, 10, 11, 11, 11, 11, 10, 9, 9, 8, 8, 7, 7, 6, 6, - 6, 6, 10, 10, 11, 11, 11, 10, 9, 9, 8, 8, 7, 7, 6, 6, 5, 5, 9, 10, 10, - 11, 10, 9, 9, 8, 8, 7, 7, 6, 6, 6, 5, 5, 9, 10, 10, 10, 10, 9, 9, 8, 8, - 7, 7, 6, 6, 5, 5, 5, - /* Size 32x16 */ - 32, 33, 33, 33, 32, 32, 32, 30, 28, 27, 26, 23, 22, 21, 19, 18, 17, 16, - 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 33, 32, 32, 32, - 32, 31, 31, 30, 30, 28, 28, 25, 23, 22, 21, 19, 18, 17, 16, 15, 14, 14, - 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 32, 32, 32, 31, 30, 29, 29, 28, - 28, 26, 26, 24, 23, 22, 20, 19, 18, 17, 16, 15, 15, 14, 13, 13, 12, 12, - 12, 11, 11, 11, 10, 10, 32, 31, 31, 31, 30, 28, 28, 27, 26, 25, 24, 23, - 22, 21, 20, 19, 18, 17, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, - 11, 10, 28, 29, 29, 29, 28, 27, 26, 24, 21, 21, 20, 19, 18, 18, 17, 16, - 16, 15, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 10, 10, 23, 24, - 25, 25, 24, 24, 23, 21, 19, 18, 18, 16, 16, 15, 14, 14, 13, 13, 12, 12, - 11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 9, 9, 22, 23, 23, 23, 23, 23, - 22, 20, 18, 18, 17, 16, 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, - 10, 10, 9, 9, 9, 9, 9, 9, 19, 20, 21, 21, 20, 21, 20, 19, 17, 16, 16, - 14, 14, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 8, - 8, 16, 17, 17, 17, 17, 18, 17, 16, 15, 14, 14, 13, 12, 12, 11, 10, 10, - 10, 9, 9, 9, 8, 8, 8, 8, 9, 8, 8, 8, 8, 8, 8, 14, 15, 15, 16, 16, 16, - 16, 15, 14, 13, 13, 12, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 7, 7, 13, 14, 14, 14, 14, 15, 14, 14, 13, 13, 12, 11, 11, - 11, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 13, - 13, 13, 13, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, - 7, 7, 7, 6, 6, 7, 7, 6, 6, 11, 12, 12, 12, 13, 13, 13, 12, 12, 12, 11, - 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 11, - 12, 12, 12, 12, 12, 13, 13, 12, 12, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, - 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 5, 11, 11, 11, 12, 12, 12, 12, 12, 12, - 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, - 5, 10, 11, 11, 11, 11, 12, 12, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 8, - 8, 8, 8, 7, 7, 6, 6, 6, 6, 6, 6, 5, 5, 5, - /* Size 4x16 */ - 33, 23, 14, 11, 32, 25, 15, 12, 32, 24, 16, 12, 31, 23, 16, 13, 30, 19, - 14, 12, 28, 18, 13, 11, 23, 16, 11, 10, 21, 14, 10, 10, 18, 13, 10, 9, - 16, 12, 9, 8, 14, 11, 8, 7, 13, 11, 8, 7, 12, 11, 8, 6, 12, 11, 8, 6, - 11, 10, 8, 6, 10, 9, 7, 6, - /* Size 16x4 */ - 33, 32, 32, 31, 30, 28, 23, 21, 18, 16, 14, 13, 12, 12, 11, 10, 23, 25, - 24, 23, 19, 18, 16, 14, 13, 12, 11, 11, 11, 11, 10, 9, 14, 15, 16, 16, - 14, 13, 11, 10, 10, 9, 8, 8, 8, 8, 8, 7, 11, 12, 12, 13, 12, 11, 10, 10, - 9, 8, 7, 7, 6, 6, 6, 6, - /* Size 8x32 */ - 32, 32, 28, 22, 16, 13, 11, 11, 33, 32, 29, 23, 17, 14, 12, 11, 33, 32, - 29, 23, 17, 14, 12, 11, 33, 31, 29, 23, 17, 14, 12, 12, 32, 30, 28, 23, - 17, 14, 13, 12, 32, 29, 27, 23, 18, 15, 13, 12, 32, 29, 26, 22, 17, 14, - 13, 12, 30, 28, 24, 20, 16, 14, 12, 12, 28, 28, 21, 18, 15, 13, 12, 12, - 27, 26, 21, 18, 14, 13, 12, 11, 26, 26, 20, 17, 14, 12, 11, 11, 23, 24, - 19, 16, 13, 11, 11, 11, 22, 23, 18, 15, 12, 11, 10, 10, 21, 22, 18, 14, - 12, 11, 10, 10, 19, 20, 17, 14, 11, 10, 9, 9, 18, 19, 16, 13, 10, 9, 9, - 9, 17, 18, 16, 13, 10, 9, 9, 9, 16, 17, 15, 12, 10, 9, 8, 8, 14, 16, 14, - 12, 9, 8, 8, 8, 13, 15, 13, 11, 9, 8, 8, 8, 13, 15, 13, 11, 9, 8, 7, 7, - 12, 14, 13, 11, 8, 8, 7, 7, 12, 13, 12, 10, 8, 7, 7, 7, 12, 13, 12, 10, - 8, 7, 7, 7, 11, 12, 12, 10, 8, 7, 7, 6, 11, 12, 11, 10, 9, 7, 6, 6, 10, - 12, 11, 9, 8, 7, 6, 6, 10, 11, 11, 9, 8, 7, 6, 6, 10, 11, 11, 9, 8, 7, - 6, 6, 10, 11, 11, 9, 8, 7, 6, 5, 9, 10, 10, 9, 8, 7, 6, 5, 9, 10, 10, 9, - 8, 7, 6, 5, - /* Size 32x8 */ - 32, 33, 33, 33, 32, 32, 32, 30, 28, 27, 26, 23, 22, 21, 19, 18, 17, 16, - 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 32, 32, 32, 31, - 30, 29, 29, 28, 28, 26, 26, 24, 23, 22, 20, 19, 18, 17, 16, 15, 15, 14, - 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 28, 29, 29, 29, 28, 27, 26, 24, - 21, 21, 20, 19, 18, 18, 17, 16, 16, 15, 14, 13, 13, 13, 12, 12, 12, 11, - 11, 11, 11, 11, 10, 10, 22, 23, 23, 23, 23, 23, 22, 20, 18, 18, 17, 16, - 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9, - 9, 16, 17, 17, 17, 17, 18, 17, 16, 15, 14, 14, 13, 12, 12, 11, 10, 10, - 10, 9, 9, 9, 8, 8, 8, 8, 9, 8, 8, 8, 8, 8, 8, 13, 14, 14, 14, 14, 15, - 14, 14, 13, 13, 12, 11, 11, 11, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 11, 12, 12, 12, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10, - 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 11, 11, 11, 12, - 12, 12, 12, 12, 12, 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, - 6, 6, 6, 6, 5, 5, 5 }, - { /* Chroma */ - /* Size 4x4 */ - 31, 23, 18, 16, 23, 18, 16, 15, 18, 16, 12, 12, 16, 15, 12, 10, - /* Size 8x8 */ - 33, 27, 22, 21, 19, 17, 16, 15, 27, 22, 22, 22, 20, 19, 17, 16, 22, 22, - 19, 19, 18, 16, 16, 16, 21, 22, 19, 17, 15, 14, 14, 14, 19, 20, 18, 15, - 13, 12, 12, 12, 17, 19, 16, 14, 12, 11, 11, 11, 16, 17, 16, 14, 12, 11, - 10, 10, 15, 16, 16, 14, 12, 11, 10, 9, - /* Size 16x16 */ - 32, 34, 31, 27, 21, 21, 20, 20, 19, 17, 16, 16, 15, 15, 14, 14, 34, 33, - 29, 25, 22, 22, 22, 21, 20, 19, 18, 17, 16, 16, 15, 15, 31, 29, 26, 23, - 22, 22, 22, 22, 20, 19, 18, 18, 17, 17, 16, 15, 27, 25, 23, 22, 21, 21, - 22, 21, 20, 19, 19, 18, 18, 17, 17, 16, 21, 22, 22, 21, 19, 19, 19, 19, - 18, 18, 17, 17, 17, 16, 16, 16, 21, 22, 22, 21, 19, 19, 18, 18, 17, 17, - 16, 16, 15, 16, 15, 15, 20, 22, 22, 22, 19, 18, 17, 16, 16, 15, 15, 14, - 14, 14, 14, 14, 20, 21, 22, 21, 19, 18, 16, 16, 15, 14, 14, 13, 14, 13, - 13, 13, 19, 20, 20, 20, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, - 17, 19, 19, 19, 18, 17, 15, 14, 13, 12, 12, 12, 12, 12, 12, 12, 16, 18, - 18, 19, 17, 16, 15, 14, 13, 12, 12, 11, 11, 11, 12, 11, 16, 17, 18, 18, - 17, 16, 14, 13, 13, 12, 11, 11, 11, 11, 11, 11, 15, 16, 17, 18, 17, 15, - 14, 14, 13, 12, 11, 11, 10, 10, 10, 10, 15, 16, 17, 17, 16, 16, 14, 13, - 13, 12, 11, 11, 10, 10, 10, 10, 14, 15, 16, 17, 16, 15, 14, 13, 13, 12, - 12, 11, 10, 10, 10, 9, 14, 15, 15, 16, 16, 15, 14, 13, 13, 12, 11, 11, - 10, 10, 9, 9, - /* Size 32x32 */ - 32, 33, 34, 33, 31, 28, 27, 25, 21, 21, 21, 21, 20, 20, 20, 19, 19, 18, - 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 33, 33, 33, 32, - 30, 27, 26, 24, 22, 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 17, 17, 17, - 16, 16, 16, 16, 15, 15, 15, 15, 15, 14, 34, 33, 33, 32, 29, 26, 25, 24, - 22, 22, 22, 23, 22, 22, 21, 20, 20, 20, 19, 18, 18, 17, 17, 17, 16, 16, - 16, 15, 15, 15, 15, 14, 33, 32, 32, 31, 28, 26, 25, 24, 22, 22, 23, 23, - 22, 22, 22, 21, 20, 20, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, - 15, 15, 31, 30, 29, 28, 26, 24, 23, 23, 22, 22, 22, 23, 22, 22, 22, 21, - 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 15, 15, 28, 27, - 26, 26, 24, 22, 22, 22, 21, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, 19, - 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 27, 26, 25, 25, 23, 22, - 22, 21, 21, 21, 21, 22, 22, 22, 21, 21, 20, 20, 19, 19, 19, 18, 18, 18, - 18, 17, 17, 17, 17, 16, 16, 16, 25, 24, 24, 24, 23, 22, 21, 21, 20, 20, - 21, 21, 20, 20, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17, - 17, 16, 16, 16, 21, 22, 22, 22, 22, 21, 21, 20, 19, 19, 19, 19, 19, 19, - 19, 19, 18, 18, 18, 17, 17, 17, 17, 16, 17, 17, 16, 16, 16, 16, 16, 16, - 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, - 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 21, 22, 22, 23, - 22, 22, 21, 21, 19, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 16, 16, 16, - 16, 16, 15, 16, 16, 15, 15, 15, 15, 15, 21, 22, 23, 23, 23, 23, 22, 21, - 19, 19, 19, 18, 17, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 14, 20, 21, 22, 22, 22, 22, 22, 20, 19, 19, 18, 17, - 17, 17, 16, 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 20, 21, 22, 22, 22, 22, 22, 20, 19, 19, 18, 17, 17, 17, 16, 16, - 16, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 20, 20, - 21, 22, 22, 22, 21, 20, 19, 18, 18, 17, 16, 16, 16, 15, 15, 15, 14, 14, - 14, 14, 13, 13, 14, 13, 13, 14, 13, 13, 13, 14, 19, 20, 20, 21, 21, 21, - 21, 20, 19, 18, 18, 17, 16, 16, 15, 14, 14, 14, 14, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 19, 19, 20, 20, 20, 21, 20, 20, 18, 18, - 17, 16, 16, 16, 15, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 18, 19, 20, 20, 20, 20, 20, 19, 18, 18, 17, 16, 16, 15, - 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 17, 18, 19, 19, 19, 20, 19, 19, 18, 17, 17, 16, 15, 15, 14, 14, 13, 13, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 17, 17, 18, 18, - 19, 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 13, 13, 13, 12, 12, 12, 12, - 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 17, 18, 18, 18, 19, 19, 18, - 17, 17, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, - 11, 11, 12, 11, 11, 12, 16, 17, 17, 18, 18, 19, 18, 18, 17, 16, 16, 15, - 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 16, 16, 17, 17, 18, 18, 18, 17, 17, 16, 16, 15, 14, 14, 13, 13, - 13, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 15, 16, - 17, 17, 17, 18, 18, 17, 16, 16, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, - 11, 11, 11, 11, 11, 10, 10, 11, 11, 11, 11, 10, 15, 16, 16, 17, 17, 17, - 18, 17, 17, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, - 10, 10, 10, 10, 10, 10, 10, 10, 15, 16, 16, 16, 17, 17, 17, 17, 17, 16, - 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 15, 15, 16, 16, 17, 17, 17, 17, 16, 16, 16, 15, 14, 14, - 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 14, 15, 15, 16, 16, 16, 17, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, - 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 9, 14, 15, 15, 16, - 16, 16, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, - 11, 11, 10, 10, 10, 10, 10, 10, 9, 9, 14, 15, 15, 16, 16, 16, 16, 16, - 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 11, 10, 10, - 10, 10, 10, 9, 9, 9, 14, 15, 15, 15, 15, 16, 16, 16, 16, 15, 15, 15, 14, - 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, - 14, 14, 14, 15, 15, 16, 16, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, - 12, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9, - /* Size 4x8 */ - 33, 22, 18, 16, 26, 23, 20, 17, 22, 19, 17, 16, 22, 17, 15, 14, 20, 16, - 13, 13, 17, 15, 12, 11, 16, 16, 12, 10, 16, 15, 12, 10, - /* Size 8x4 */ - 33, 26, 22, 22, 20, 17, 16, 16, 22, 23, 19, 17, 16, 15, 16, 15, 18, 20, - 17, 15, 13, 12, 12, 12, 16, 17, 16, 14, 13, 11, 10, 10, - /* Size 8x16 */ - 32, 29, 21, 20, 18, 16, 15, 15, 34, 27, 22, 22, 20, 18, 16, 16, 31, 25, - 22, 22, 20, 18, 17, 16, 26, 22, 21, 22, 20, 19, 18, 17, 21, 21, 19, 19, - 18, 17, 17, 17, 21, 22, 19, 18, 17, 16, 16, 16, 20, 22, 19, 17, 16, 15, - 14, 15, 20, 22, 19, 16, 14, 14, 14, 14, 19, 21, 18, 16, 14, 13, 13, 13, - 17, 19, 18, 15, 13, 12, 12, 12, 16, 19, 17, 15, 12, 12, 11, 12, 16, 18, - 17, 14, 12, 11, 11, 11, 15, 17, 16, 14, 13, 11, 11, 11, 15, 17, 16, 14, - 13, 12, 10, 10, 14, 16, 16, 14, 12, 11, 10, 10, 14, 15, 16, 14, 13, 12, - 10, 10, - /* Size 16x8 */ - 32, 34, 31, 26, 21, 21, 20, 20, 19, 17, 16, 16, 15, 15, 14, 14, 29, 27, - 25, 22, 21, 22, 22, 22, 21, 19, 19, 18, 17, 17, 16, 15, 21, 22, 22, 21, - 19, 19, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 20, 22, 22, 22, 19, 18, - 17, 16, 16, 15, 15, 14, 14, 14, 14, 14, 18, 20, 20, 20, 18, 17, 16, 14, - 14, 13, 12, 12, 13, 13, 12, 13, 16, 18, 18, 19, 17, 16, 15, 14, 13, 12, - 12, 11, 11, 12, 11, 12, 15, 16, 17, 18, 17, 16, 14, 14, 13, 12, 11, 11, - 11, 10, 10, 10, 15, 16, 16, 17, 17, 16, 15, 14, 13, 12, 12, 11, 11, 10, - 10, 10, - /* Size 16x32 */ - 32, 33, 29, 27, 21, 21, 20, 20, 18, 17, 16, 15, 15, 15, 15, 14, 33, 33, - 28, 26, 22, 22, 21, 20, 19, 18, 17, 16, 16, 16, 16, 15, 34, 32, 27, 26, - 22, 23, 22, 21, 20, 19, 18, 17, 16, 16, 16, 15, 33, 31, 27, 25, 22, 23, - 22, 21, 20, 19, 18, 17, 17, 17, 16, 16, 31, 28, 25, 23, 22, 22, 22, 22, - 20, 19, 18, 17, 17, 17, 16, 16, 28, 26, 23, 22, 22, 23, 22, 22, 20, 20, - 19, 18, 17, 17, 17, 17, 26, 25, 22, 22, 21, 22, 22, 21, 20, 19, 19, 18, - 18, 17, 17, 17, 24, 24, 22, 21, 20, 21, 20, 20, 19, 18, 18, 17, 17, 17, - 17, 17, 21, 22, 21, 21, 19, 19, 19, 19, 18, 17, 17, 16, 17, 17, 17, 17, - 21, 22, 22, 21, 19, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 16, 21, 22, - 22, 21, 19, 18, 18, 18, 17, 17, 16, 16, 16, 16, 16, 16, 21, 23, 23, 22, - 19, 18, 17, 17, 16, 16, 15, 15, 15, 15, 16, 15, 20, 22, 22, 21, 19, 17, - 17, 16, 16, 15, 15, 14, 14, 15, 15, 15, 20, 22, 22, 21, 19, 17, 17, 16, - 15, 15, 14, 14, 14, 14, 15, 14, 20, 21, 22, 21, 19, 17, 16, 16, 14, 14, - 14, 13, 14, 14, 14, 14, 19, 20, 21, 20, 19, 17, 16, 15, 14, 13, 13, 13, - 13, 13, 14, 14, 19, 20, 21, 20, 18, 16, 16, 15, 14, 13, 13, 13, 13, 13, - 13, 14, 18, 20, 20, 20, 18, 16, 16, 15, 13, 13, 12, 12, 12, 13, 13, 13, - 17, 19, 19, 19, 18, 16, 15, 14, 13, 12, 12, 12, 12, 12, 12, 13, 17, 18, - 19, 19, 17, 16, 15, 14, 13, 12, 12, 12, 12, 12, 12, 12, 16, 18, 19, 18, - 17, 15, 15, 14, 12, 12, 12, 11, 11, 12, 12, 12, 16, 17, 18, 18, 17, 15, - 14, 14, 12, 12, 11, 11, 11, 11, 12, 12, 16, 17, 18, 18, 17, 15, 14, 13, - 12, 12, 11, 11, 11, 11, 11, 12, 15, 17, 17, 18, 16, 15, 14, 13, 12, 12, - 11, 11, 11, 11, 11, 11, 15, 17, 17, 17, 16, 15, 14, 13, 13, 12, 11, 11, - 11, 10, 11, 11, 15, 16, 17, 17, 16, 16, 14, 13, 13, 12, 11, 11, 10, 10, - 10, 10, 15, 16, 17, 17, 16, 16, 14, 13, 13, 12, 12, 11, 10, 10, 10, 10, - 14, 16, 16, 17, 16, 15, 14, 14, 12, 12, 11, 11, 10, 10, 10, 10, 14, 16, - 16, 17, 16, 15, 14, 14, 12, 12, 11, 11, 10, 10, 10, 10, 14, 16, 16, 16, - 16, 15, 14, 13, 13, 12, 11, 11, 10, 10, 10, 10, 14, 15, 15, 16, 16, 15, - 14, 13, 13, 12, 12, 11, 10, 10, 10, 10, 14, 15, 15, 16, 16, 14, 14, 13, - 13, 12, 12, 11, 11, 10, 10, 9, - /* Size 32x16 */ - 32, 33, 34, 33, 31, 28, 26, 24, 21, 21, 21, 21, 20, 20, 20, 19, 19, 18, - 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 33, 33, 32, 31, - 28, 26, 25, 24, 22, 22, 22, 23, 22, 22, 21, 20, 20, 20, 19, 18, 18, 17, - 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 29, 28, 27, 27, 25, 23, 22, 22, - 21, 22, 22, 23, 22, 22, 22, 21, 21, 20, 19, 19, 19, 18, 18, 17, 17, 17, - 17, 16, 16, 16, 15, 15, 27, 26, 26, 25, 23, 22, 22, 21, 21, 21, 21, 22, - 21, 21, 21, 20, 20, 20, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 17, 16, - 16, 16, 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 19, 19, 19, 19, 19, 19, - 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 16, 21, 22, - 23, 23, 22, 23, 22, 21, 19, 19, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, - 15, 15, 15, 15, 15, 16, 16, 15, 15, 15, 15, 14, 20, 21, 22, 22, 22, 22, - 22, 20, 19, 19, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 20, 20, 21, 21, 22, 22, 21, 20, 19, 18, - 18, 17, 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 13, 13, 13, 13, 13, 14, - 14, 13, 13, 13, 18, 19, 20, 20, 20, 20, 20, 19, 18, 18, 17, 16, 16, 15, - 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 13, 13, 13, 12, 12, 13, 13, 13, - 17, 18, 19, 19, 19, 20, 19, 18, 17, 17, 17, 16, 15, 15, 14, 13, 13, 13, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 17, 18, 18, - 18, 19, 19, 18, 17, 17, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11, - 11, 11, 11, 11, 12, 11, 11, 11, 12, 12, 15, 16, 17, 17, 17, 18, 18, 17, - 16, 16, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 15, 16, 16, 17, 17, 17, 18, 17, 17, 16, 16, 15, - 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, - 10, 11, 15, 16, 16, 17, 17, 17, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, - 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 15, 16, - 16, 16, 16, 17, 17, 17, 17, 16, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, - 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 14, 15, 15, 16, 16, 17, - 17, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, - 11, 10, 10, 10, 10, 10, 10, 9, - /* Size 4x16 */ - 33, 21, 17, 15, 32, 23, 19, 16, 28, 22, 19, 17, 25, 22, 19, 17, 22, 19, - 17, 17, 22, 18, 17, 16, 22, 17, 15, 15, 21, 17, 14, 14, 20, 16, 13, 13, - 19, 16, 12, 12, 18, 15, 12, 12, 17, 15, 12, 11, 17, 15, 12, 10, 16, 16, - 12, 10, 16, 15, 12, 10, 15, 15, 12, 10, - /* Size 16x4 */ - 33, 32, 28, 25, 22, 22, 22, 21, 20, 19, 18, 17, 17, 16, 16, 15, 21, 23, - 22, 22, 19, 18, 17, 17, 16, 16, 15, 15, 15, 16, 15, 15, 17, 19, 19, 19, - 17, 17, 15, 14, 13, 12, 12, 12, 12, 12, 12, 12, 15, 16, 17, 17, 17, 16, - 15, 14, 13, 12, 12, 11, 10, 10, 10, 10, - /* Size 8x32 */ - 32, 29, 21, 20, 18, 16, 15, 15, 33, 28, 22, 21, 19, 17, 16, 16, 34, 27, - 22, 22, 20, 18, 16, 16, 33, 27, 22, 22, 20, 18, 17, 16, 31, 25, 22, 22, - 20, 18, 17, 16, 28, 23, 22, 22, 20, 19, 17, 17, 26, 22, 21, 22, 20, 19, - 18, 17, 24, 22, 20, 20, 19, 18, 17, 17, 21, 21, 19, 19, 18, 17, 17, 17, - 21, 22, 19, 19, 18, 17, 16, 16, 21, 22, 19, 18, 17, 16, 16, 16, 21, 23, - 19, 17, 16, 15, 15, 16, 20, 22, 19, 17, 16, 15, 14, 15, 20, 22, 19, 17, - 15, 14, 14, 15, 20, 22, 19, 16, 14, 14, 14, 14, 19, 21, 19, 16, 14, 13, - 13, 14, 19, 21, 18, 16, 14, 13, 13, 13, 18, 20, 18, 16, 13, 12, 12, 13, - 17, 19, 18, 15, 13, 12, 12, 12, 17, 19, 17, 15, 13, 12, 12, 12, 16, 19, - 17, 15, 12, 12, 11, 12, 16, 18, 17, 14, 12, 11, 11, 12, 16, 18, 17, 14, - 12, 11, 11, 11, 15, 17, 16, 14, 12, 11, 11, 11, 15, 17, 16, 14, 13, 11, - 11, 11, 15, 17, 16, 14, 13, 11, 10, 10, 15, 17, 16, 14, 13, 12, 10, 10, - 14, 16, 16, 14, 12, 11, 10, 10, 14, 16, 16, 14, 12, 11, 10, 10, 14, 16, - 16, 14, 13, 11, 10, 10, 14, 15, 16, 14, 13, 12, 10, 10, 14, 15, 16, 14, - 13, 12, 11, 10, - /* Size 32x8 */ - 32, 33, 34, 33, 31, 28, 26, 24, 21, 21, 21, 21, 20, 20, 20, 19, 19, 18, - 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 29, 28, 27, 27, - 25, 23, 22, 22, 21, 22, 22, 23, 22, 22, 22, 21, 21, 20, 19, 19, 19, 18, - 18, 17, 17, 17, 17, 16, 16, 16, 15, 15, 21, 22, 22, 22, 22, 22, 21, 20, - 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 20, 21, 22, 22, 22, 22, 22, 20, 19, 19, 18, 17, - 17, 17, 16, 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 18, 19, 20, 20, 20, 20, 20, 19, 18, 18, 17, 16, 16, 15, 14, 14, - 14, 13, 13, 13, 12, 12, 12, 12, 13, 13, 13, 12, 12, 13, 13, 13, 16, 17, - 18, 18, 18, 19, 19, 18, 17, 17, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, - 12, 11, 11, 11, 11, 11, 12, 11, 11, 11, 12, 12, 15, 16, 16, 17, 17, 17, - 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, - 11, 10, 10, 10, 10, 10, 10, 11, 15, 16, 16, 16, 16, 17, 17, 17, 17, 16, - 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 10, 10, 10, - 10, 10, 10, 10 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 27, 16, 12, 27, 18, 13, 11, 16, 13, 9, 8, 12, 11, 8, 6, - /* Size 8x8 */ - 32, 32, 29, 22, 18, 13, 12, 11, 32, 30, 28, 23, 19, 15, 13, 11, 29, 28, - 21, 18, 16, 13, 12, 11, 22, 23, 18, 15, 13, 11, 10, 10, 18, 19, 16, 13, - 11, 9, 8, 8, 13, 15, 13, 11, 9, 8, 7, 7, 12, 13, 12, 10, 8, 7, 7, 6, 11, - 11, 11, 10, 8, 7, 6, 6, - /* Size 16x16 */ - 32, 33, 33, 32, 30, 26, 23, 21, 18, 16, 14, 13, 12, 11, 10, 10, 33, 32, - 32, 32, 30, 27, 25, 22, 19, 17, 16, 14, 13, 12, 11, 10, 33, 32, 31, 30, - 28, 26, 24, 22, 19, 17, 16, 14, 13, 12, 12, 11, 32, 32, 30, 29, 28, 26, - 24, 22, 20, 18, 16, 14, 14, 13, 12, 11, 30, 30, 28, 28, 24, 22, 20, 19, - 17, 16, 15, 13, 12, 12, 12, 11, 26, 27, 26, 26, 22, 19, 18, 17, 15, 14, - 13, 12, 11, 11, 11, 10, 23, 25, 24, 24, 20, 18, 16, 15, 14, 13, 12, 11, - 11, 10, 10, 10, 21, 22, 22, 22, 19, 17, 15, 14, 13, 12, 11, 10, 10, 10, - 9, 9, 18, 19, 19, 20, 17, 15, 14, 13, 11, 11, 10, 9, 9, 9, 9, 8, 16, 17, - 17, 18, 16, 14, 13, 12, 11, 10, 9, 9, 8, 8, 8, 8, 14, 16, 16, 16, 15, - 13, 12, 11, 10, 9, 9, 8, 8, 8, 8, 7, 13, 14, 14, 14, 13, 12, 11, 10, 9, - 9, 8, 7, 7, 7, 7, 7, 12, 13, 13, 14, 12, 11, 11, 10, 9, 8, 8, 7, 7, 7, - 6, 6, 11, 12, 12, 13, 12, 11, 10, 10, 9, 8, 8, 7, 7, 6, 6, 6, 10, 11, - 12, 12, 12, 11, 10, 9, 9, 8, 8, 7, 6, 6, 6, 6, 10, 10, 11, 11, 11, 10, - 10, 9, 8, 8, 7, 7, 6, 6, 6, 5, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 32, 32, 30, 30, 28, 26, 25, 23, 21, 21, 19, 18, 17, - 16, 15, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 33, 32, 32, 32, - 32, 32, 32, 30, 30, 29, 27, 26, 24, 22, 22, 20, 19, 18, 17, 16, 15, 13, - 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 33, 32, 32, 32, 32, 32, 32, 31, - 30, 30, 27, 26, 25, 23, 22, 20, 19, 19, 17, 16, 16, 14, 14, 13, 13, 12, - 12, 12, 11, 11, 10, 10, 33, 32, 32, 32, 32, 32, 32, 31, 30, 30, 28, 27, - 25, 23, 23, 21, 19, 19, 17, 16, 16, 14, 14, 14, 13, 13, 12, 12, 12, 11, - 11, 11, 33, 32, 32, 32, 31, 31, 30, 29, 28, 28, 26, 26, 24, 23, 22, 20, - 19, 19, 17, 16, 16, 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 32, 32, - 32, 32, 31, 30, 30, 28, 28, 28, 26, 26, 24, 23, 22, 21, 19, 19, 18, 17, - 16, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 32, 32, 32, 32, 30, 30, - 29, 28, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 14, - 14, 13, 13, 12, 12, 12, 11, 11, 30, 30, 31, 31, 29, 28, 28, 26, 25, 24, - 23, 22, 22, 20, 20, 19, 18, 17, 16, 16, 15, 14, 14, 13, 13, 13, 12, 12, - 12, 12, 11, 11, 30, 30, 30, 30, 28, 28, 28, 25, 24, 23, 22, 21, 20, 19, - 19, 18, 17, 17, 16, 15, 15, 13, 13, 13, 12, 12, 12, 12, 12, 11, 11, 11, - 28, 29, 30, 30, 28, 28, 27, 24, 23, 21, 20, 20, 19, 18, 18, 17, 16, 16, - 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 10, 26, 27, 27, 28, - 26, 26, 26, 23, 22, 20, 19, 19, 18, 17, 17, 16, 15, 15, 14, 13, 13, 12, - 12, 12, 11, 12, 11, 11, 11, 11, 10, 10, 25, 26, 26, 27, 26, 26, 25, 22, - 21, 20, 19, 18, 17, 17, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, - 11, 11, 11, 10, 10, 10, 23, 24, 25, 25, 24, 24, 24, 22, 20, 19, 18, 17, - 16, 16, 15, 14, 14, 14, 13, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, - 10, 10, 21, 22, 23, 23, 23, 23, 23, 20, 19, 18, 17, 17, 16, 15, 14, 13, - 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 9, 9, 21, 22, - 22, 23, 22, 22, 22, 20, 19, 18, 17, 16, 15, 14, 14, 13, 13, 12, 12, 11, - 11, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 19, 20, 20, 21, 20, 21, 21, - 19, 18, 17, 16, 15, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 9, - 9, 9, 9, 9, 9, 9, 9, 18, 19, 19, 19, 19, 19, 20, 18, 17, 16, 15, 15, 14, - 13, 13, 12, 11, 11, 11, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 9, 17, 18, - 19, 19, 19, 19, 19, 17, 17, 16, 15, 14, 14, 13, 12, 12, 11, 11, 10, 10, - 10, 9, 9, 9, 9, 8, 9, 8, 8, 8, 8, 8, 16, 17, 17, 17, 17, 18, 18, 16, 16, - 15, 14, 14, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 15, 16, 16, 16, 16, 17, 17, 16, 15, 14, 13, 13, 12, 12, 11, 11, - 10, 10, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 14, 15, 16, 16, 16, - 16, 16, 15, 15, 14, 13, 13, 12, 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 8, - 8, 8, 7, 8, 7, 7, 7, 13, 13, 14, 14, 14, 15, 15, 14, 13, 13, 12, 12, 11, - 11, 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 13, 13, 14, - 14, 14, 14, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 13, 13, 14, 14, 14, 14, 13, 13, 13, 12, - 12, 11, 10, 10, 10, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 12, - 12, 13, 13, 13, 13, 14, 13, 12, 12, 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, - 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 11, 12, 12, 13, 13, 13, 13, 13, 12, - 12, 12, 11, 10, 10, 10, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, - 6, 11, 12, 12, 12, 12, 12, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, - 8, 8, 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 11, 11, 12, 12, 12, 12, 12, - 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, - 6, 6, 6, 6, 10, 11, 11, 12, 12, 12, 12, 12, 12, 11, 11, 11, 10, 10, 9, - 9, 9, 8, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 10, 11, 11, 11, 11, - 11, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6, - 6, 6, 6, 6, 6, 5, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 9, - 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 5, 5, 10, 10, 10, 11, - 11, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 8, 8, 7, 7, 7, 7, 6, 6, - 6, 6, 6, 6, 5, 5, 5, - /* Size 4x8 */ - 32, 27, 17, 12, 32, 26, 18, 13, 30, 20, 15, 12, 23, 17, 12, 10, 19, 15, - 10, 9, 14, 12, 9, 8, 12, 12, 8, 7, 11, 10, 8, 6, - /* Size 8x4 */ - 32, 32, 30, 23, 19, 14, 12, 11, 27, 26, 20, 17, 15, 12, 12, 10, 17, 18, - 15, 12, 10, 9, 8, 8, 12, 13, 12, 10, 9, 8, 7, 6, - /* Size 8x16 */ - 32, 32, 28, 23, 18, 13, 12, 11, 33, 32, 29, 25, 19, 14, 13, 12, 32, 31, - 28, 24, 19, 14, 13, 12, 32, 30, 27, 24, 20, 15, 13, 12, 30, 28, 23, 20, - 17, 14, 13, 12, 26, 26, 20, 18, 15, 12, 12, 11, 23, 24, 19, 16, 14, 11, - 11, 11, 21, 22, 18, 15, 13, 11, 10, 10, 18, 19, 16, 14, 11, 9, 9, 9, 16, - 17, 15, 13, 11, 9, 8, 8, 14, 16, 14, 12, 10, 8, 8, 8, 13, 14, 13, 11, 9, - 8, 7, 7, 12, 13, 12, 11, 9, 7, 7, 7, 11, 12, 12, 10, 9, 8, 7, 6, 10, 12, - 12, 10, 8, 7, 6, 6, 10, 11, 11, 10, 9, 7, 6, 6, - /* Size 16x8 */ - 32, 33, 32, 32, 30, 26, 23, 21, 18, 16, 14, 13, 12, 11, 10, 10, 32, 32, - 31, 30, 28, 26, 24, 22, 19, 17, 16, 14, 13, 12, 12, 11, 28, 29, 28, 27, - 23, 20, 19, 18, 16, 15, 14, 13, 12, 12, 12, 11, 23, 25, 24, 24, 20, 18, - 16, 15, 14, 13, 12, 11, 11, 10, 10, 10, 18, 19, 19, 20, 17, 15, 14, 13, - 11, 11, 10, 9, 9, 9, 8, 9, 13, 14, 14, 15, 14, 12, 11, 11, 9, 9, 8, 8, - 7, 8, 7, 7, 12, 13, 13, 13, 13, 12, 11, 10, 9, 8, 8, 7, 7, 7, 6, 6, 11, - 12, 12, 12, 12, 11, 11, 10, 9, 8, 8, 7, 7, 6, 6, 6, - /* Size 16x32 */ - 32, 33, 32, 32, 28, 26, 23, 19, 18, 16, 13, 13, 12, 11, 11, 11, 33, 32, - 32, 32, 29, 27, 24, 20, 19, 17, 14, 13, 12, 12, 12, 11, 33, 32, 32, 32, - 29, 27, 25, 20, 19, 17, 14, 14, 13, 12, 12, 11, 33, 32, 32, 31, 30, 28, - 25, 21, 19, 17, 14, 14, 13, 12, 12, 12, 32, 32, 31, 30, 28, 26, 24, 20, - 19, 17, 14, 14, 13, 13, 12, 12, 32, 32, 30, 30, 28, 26, 24, 21, 19, 18, - 15, 14, 13, 13, 12, 12, 32, 31, 30, 29, 27, 26, 24, 21, 20, 18, 15, 15, - 13, 13, 12, 12, 30, 30, 29, 28, 24, 23, 21, 19, 18, 16, 14, 14, 13, 13, - 13, 12, 30, 30, 28, 28, 23, 22, 20, 18, 17, 16, 14, 13, 13, 12, 12, 12, - 28, 30, 28, 27, 21, 20, 19, 17, 16, 15, 13, 13, 12, 12, 12, 12, 26, 28, - 26, 26, 20, 19, 18, 16, 15, 14, 12, 12, 12, 12, 11, 12, 26, 27, 26, 25, - 20, 19, 17, 15, 15, 14, 12, 12, 11, 11, 11, 11, 23, 25, 24, 24, 19, 18, - 16, 14, 14, 13, 11, 11, 11, 11, 11, 11, 22, 23, 23, 22, 18, 17, 16, 14, - 13, 12, 11, 11, 10, 10, 10, 10, 21, 22, 22, 22, 18, 17, 15, 13, 13, 12, - 11, 10, 10, 10, 10, 10, 19, 21, 20, 20, 17, 16, 14, 12, 12, 11, 10, 10, - 9, 9, 10, 9, 18, 19, 19, 19, 16, 15, 14, 12, 11, 11, 9, 9, 9, 9, 9, 9, - 17, 19, 19, 19, 16, 15, 14, 12, 11, 10, 9, 9, 9, 9, 9, 9, 16, 17, 17, - 18, 15, 14, 13, 11, 11, 10, 9, 9, 8, 8, 8, 9, 15, 16, 17, 17, 14, 13, - 12, 11, 10, 9, 8, 8, 8, 8, 8, 8, 14, 16, 16, 16, 14, 13, 12, 11, 10, 9, - 8, 8, 8, 8, 8, 8, 13, 14, 14, 15, 13, 12, 11, 10, 9, 9, 8, 8, 7, 8, 8, - 7, 13, 14, 14, 14, 13, 12, 11, 10, 9, 9, 8, 7, 7, 7, 7, 7, 12, 14, 14, - 14, 13, 12, 11, 10, 9, 8, 8, 7, 7, 7, 7, 7, 12, 13, 13, 13, 12, 11, 11, - 9, 9, 8, 7, 7, 7, 7, 7, 7, 11, 12, 13, 13, 12, 12, 10, 9, 9, 8, 8, 7, 7, - 7, 6, 6, 11, 12, 12, 13, 12, 11, 10, 10, 9, 8, 8, 7, 7, 6, 6, 6, 11, 12, - 12, 12, 12, 11, 10, 10, 9, 8, 7, 7, 7, 6, 6, 6, 10, 12, 12, 12, 12, 11, - 10, 9, 8, 8, 7, 7, 6, 6, 6, 6, 10, 11, 11, 12, 11, 10, 10, 9, 9, 8, 7, - 7, 6, 6, 6, 6, 10, 11, 11, 11, 11, 10, 10, 9, 9, 8, 7, 7, 6, 6, 6, 6, - 10, 11, 11, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6, 5, - /* Size 32x16 */ - 32, 33, 33, 33, 32, 32, 32, 30, 30, 28, 26, 26, 23, 22, 21, 19, 18, 17, - 16, 15, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 33, 32, 32, 32, - 32, 32, 31, 30, 30, 30, 28, 27, 25, 23, 22, 21, 19, 19, 17, 16, 16, 14, - 14, 14, 13, 12, 12, 12, 12, 11, 11, 11, 32, 32, 32, 32, 31, 30, 30, 29, - 28, 28, 26, 26, 24, 23, 22, 20, 19, 19, 17, 17, 16, 14, 14, 14, 13, 13, - 12, 12, 12, 11, 11, 11, 32, 32, 32, 31, 30, 30, 29, 28, 28, 27, 26, 25, - 24, 22, 22, 20, 19, 19, 18, 17, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, - 11, 11, 28, 29, 29, 30, 28, 28, 27, 24, 23, 21, 20, 20, 19, 18, 18, 17, - 16, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 11, 11, 11, 26, 27, - 27, 28, 26, 26, 26, 23, 22, 20, 19, 19, 18, 17, 17, 16, 15, 15, 14, 13, - 13, 12, 12, 12, 11, 12, 11, 11, 11, 10, 10, 10, 23, 24, 25, 25, 24, 24, - 24, 21, 20, 19, 18, 17, 16, 16, 15, 14, 14, 14, 13, 12, 12, 11, 11, 11, - 11, 10, 10, 10, 10, 10, 10, 10, 19, 20, 20, 21, 20, 21, 21, 19, 18, 17, - 16, 15, 14, 14, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 10, 10, 9, - 9, 9, 9, 18, 19, 19, 19, 19, 19, 20, 18, 17, 16, 15, 15, 14, 13, 13, 12, - 11, 11, 11, 10, 10, 9, 9, 9, 9, 9, 9, 9, 8, 9, 9, 9, 16, 17, 17, 17, 17, - 18, 18, 16, 16, 15, 14, 14, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 9, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 13, 14, 14, 14, 14, 15, 15, 14, 14, 13, 12, 12, - 11, 11, 11, 10, 9, 9, 9, 8, 8, 8, 8, 8, 7, 8, 8, 7, 7, 7, 7, 8, 13, 13, - 14, 14, 14, 14, 15, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 8, 8, - 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 13, 13, 13, 13, 13, 13, 13, 12, - 12, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 7, - 11, 12, 12, 12, 13, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10, 9, 9, 9, 8, - 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 11, 12, 12, 12, 12, 12, 12, 13, - 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, - 6, 6, 6, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 10, 10, 9, - 9, 9, 9, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 5, - /* Size 4x16 */ - 33, 26, 16, 11, 32, 27, 17, 12, 32, 26, 17, 13, 31, 26, 18, 13, 30, 22, - 16, 12, 28, 19, 14, 12, 25, 18, 13, 11, 22, 17, 12, 10, 19, 15, 11, 9, - 17, 14, 10, 8, 16, 13, 9, 8, 14, 12, 9, 7, 13, 11, 8, 7, 12, 11, 8, 6, - 12, 11, 8, 6, 11, 10, 8, 6, - /* Size 16x4 */ - 33, 32, 32, 31, 30, 28, 25, 22, 19, 17, 16, 14, 13, 12, 12, 11, 26, 27, - 26, 26, 22, 19, 18, 17, 15, 14, 13, 12, 11, 11, 11, 10, 16, 17, 17, 18, - 16, 14, 13, 12, 11, 10, 9, 9, 8, 8, 8, 8, 11, 12, 13, 13, 12, 12, 11, - 10, 9, 8, 8, 7, 7, 6, 6, 6, - /* Size 8x32 */ - 32, 32, 28, 23, 18, 13, 12, 11, 33, 32, 29, 24, 19, 14, 12, 12, 33, 32, - 29, 25, 19, 14, 13, 12, 33, 32, 30, 25, 19, 14, 13, 12, 32, 31, 28, 24, - 19, 14, 13, 12, 32, 30, 28, 24, 19, 15, 13, 12, 32, 30, 27, 24, 20, 15, - 13, 12, 30, 29, 24, 21, 18, 14, 13, 13, 30, 28, 23, 20, 17, 14, 13, 12, - 28, 28, 21, 19, 16, 13, 12, 12, 26, 26, 20, 18, 15, 12, 12, 11, 26, 26, - 20, 17, 15, 12, 11, 11, 23, 24, 19, 16, 14, 11, 11, 11, 22, 23, 18, 16, - 13, 11, 10, 10, 21, 22, 18, 15, 13, 11, 10, 10, 19, 20, 17, 14, 12, 10, - 9, 10, 18, 19, 16, 14, 11, 9, 9, 9, 17, 19, 16, 14, 11, 9, 9, 9, 16, 17, - 15, 13, 11, 9, 8, 8, 15, 17, 14, 12, 10, 8, 8, 8, 14, 16, 14, 12, 10, 8, - 8, 8, 13, 14, 13, 11, 9, 8, 7, 8, 13, 14, 13, 11, 9, 8, 7, 7, 12, 14, - 13, 11, 9, 8, 7, 7, 12, 13, 12, 11, 9, 7, 7, 7, 11, 13, 12, 10, 9, 8, 7, - 6, 11, 12, 12, 10, 9, 8, 7, 6, 11, 12, 12, 10, 9, 7, 7, 6, 10, 12, 12, - 10, 8, 7, 6, 6, 10, 11, 11, 10, 9, 7, 6, 6, 10, 11, 11, 10, 9, 7, 6, 6, - 10, 11, 11, 10, 9, 8, 7, 6, - /* Size 32x8 */ - 32, 33, 33, 33, 32, 32, 32, 30, 30, 28, 26, 26, 23, 22, 21, 19, 18, 17, - 16, 15, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 32, 32, 32, 32, - 31, 30, 30, 29, 28, 28, 26, 26, 24, 23, 22, 20, 19, 19, 17, 17, 16, 14, - 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 28, 29, 29, 30, 28, 28, 27, 24, - 23, 21, 20, 20, 19, 18, 18, 17, 16, 16, 15, 14, 14, 13, 13, 13, 12, 12, - 12, 12, 12, 11, 11, 11, 23, 24, 25, 25, 24, 24, 24, 21, 20, 19, 18, 17, - 16, 16, 15, 14, 14, 14, 13, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, - 10, 10, 18, 19, 19, 19, 19, 19, 20, 18, 17, 16, 15, 15, 14, 13, 13, 12, - 11, 11, 11, 10, 10, 9, 9, 9, 9, 9, 9, 9, 8, 9, 9, 9, 13, 14, 14, 14, 14, - 15, 15, 14, 14, 13, 12, 12, 11, 11, 11, 10, 9, 9, 9, 8, 8, 8, 8, 8, 7, - 8, 8, 7, 7, 7, 7, 8, 12, 12, 13, 13, 13, 13, 13, 13, 13, 12, 12, 11, 11, - 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 7, 11, 12, 12, - 12, 12, 12, 12, 13, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, 8, 7, - 7, 7, 6, 6, 6, 6, 6, 6, 6 }, - { /* Chroma */ - /* Size 4x4 */ - 32, 23, 19, 16, 23, 19, 17, 15, 19, 17, 13, 12, 16, 15, 12, 10, - /* Size 8x8 */ - 33, 28, 22, 21, 20, 17, 16, 15, 28, 24, 22, 22, 21, 19, 17, 16, 22, 22, - 19, 19, 19, 17, 16, 16, 21, 22, 19, 17, 16, 15, 14, 14, 20, 21, 19, 16, - 14, 13, 13, 13, 17, 19, 17, 15, 13, 12, 12, 12, 16, 17, 16, 14, 13, 12, - 11, 10, 15, 16, 16, 14, 13, 12, 10, 10, - /* Size 16x16 */ - 32, 34, 31, 28, 23, 21, 21, 20, 19, 18, 17, 16, 15, 15, 15, 14, 34, 33, - 29, 26, 23, 22, 22, 22, 20, 19, 19, 17, 17, 16, 16, 15, 31, 29, 26, 24, - 22, 22, 23, 22, 21, 20, 19, 18, 17, 17, 16, 16, 28, 26, 24, 22, 22, 22, - 23, 22, 21, 20, 20, 19, 18, 18, 17, 16, 23, 23, 22, 22, 20, 20, 20, 20, - 19, 19, 18, 17, 17, 17, 16, 17, 21, 22, 22, 22, 20, 19, 19, 18, 18, 17, - 17, 16, 16, 16, 16, 16, 21, 22, 23, 23, 20, 19, 18, 17, 17, 16, 16, 15, - 15, 15, 15, 15, 20, 22, 22, 22, 20, 18, 17, 17, 16, 15, 15, 14, 14, 14, - 14, 14, 19, 20, 21, 21, 19, 18, 17, 16, 15, 14, 14, 13, 13, 13, 13, 13, - 18, 19, 20, 20, 19, 17, 16, 15, 14, 13, 13, 12, 12, 12, 12, 12, 17, 19, - 19, 20, 18, 17, 16, 15, 14, 13, 12, 12, 12, 12, 12, 12, 16, 17, 18, 19, - 17, 16, 15, 14, 13, 12, 12, 11, 11, 11, 11, 11, 15, 17, 17, 18, 17, 16, - 15, 14, 13, 12, 12, 11, 11, 11, 11, 11, 15, 16, 17, 18, 17, 16, 15, 14, - 13, 12, 12, 11, 11, 10, 10, 10, 15, 16, 16, 17, 16, 16, 15, 14, 13, 12, - 12, 11, 11, 10, 10, 10, 14, 15, 16, 16, 17, 16, 15, 14, 13, 12, 12, 11, - 11, 10, 10, 10, - /* Size 32x32 */ - 32, 33, 34, 34, 31, 29, 28, 25, 23, 21, 21, 21, 21, 20, 20, 20, 19, 19, - 18, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 14, 14, 14, 33, 33, 33, 33, - 30, 28, 27, 24, 23, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 18, 18, 17, - 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 34, 33, 33, 33, 29, 28, 26, 24, - 23, 22, 22, 22, 22, 22, 22, 21, 20, 20, 19, 19, 19, 18, 17, 17, 17, 16, - 16, 16, 16, 15, 15, 15, 34, 33, 33, 32, 29, 28, 26, 24, 23, 22, 23, 23, - 23, 22, 22, 22, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, - 16, 16, 31, 30, 29, 29, 26, 25, 24, 23, 22, 22, 22, 22, 23, 22, 22, 22, - 21, 21, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 29, 28, - 28, 28, 25, 24, 23, 22, 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, - 19, 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 28, 27, 26, 26, 24, 23, - 22, 22, 22, 21, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, - 18, 18, 18, 17, 17, 17, 16, 16, 25, 24, 24, 24, 23, 22, 22, 21, 21, 20, - 21, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, - 17, 17, 17, 17, 23, 23, 23, 23, 22, 22, 22, 21, 20, 20, 20, 20, 20, 20, - 20, 20, 19, 19, 19, 18, 18, 17, 17, 17, 17, 17, 17, 17, 16, 17, 17, 17, - 21, 22, 22, 22, 22, 22, 21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 21, 22, 22, 23, - 22, 22, 22, 21, 20, 19, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 21, 22, 22, 23, 22, 22, 22, 21, - 20, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 15, - 15, 15, 15, 15, 15, 15, 21, 22, 22, 23, 23, 23, 23, 21, 20, 19, 19, 18, - 18, 17, 17, 17, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 20, 21, 22, 22, 22, 22, 22, 20, 20, 19, 18, 18, 17, 17, 17, 16, - 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 20, 21, - 22, 22, 22, 22, 22, 20, 20, 19, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, - 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 20, 20, 21, 22, 22, 22, - 22, 20, 20, 19, 18, 18, 17, 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, - 13, 13, 14, 13, 13, 14, 14, 13, 19, 20, 20, 21, 21, 21, 21, 20, 19, 19, - 18, 17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 19, 20, 20, 20, 21, 21, 21, 20, 19, 19, 17, 17, 17, 16, - 16, 15, 15, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 18, 19, 19, 20, 20, 20, 20, 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, - 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 17, 18, 19, 19, - 19, 20, 20, 19, 18, 18, 17, 17, 16, 15, 15, 14, 14, 14, 13, 13, 13, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 17, 18, 19, 19, 19, 19, 20, 19, - 18, 18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 16, 17, 18, 18, 18, 19, 19, 18, 17, 17, 16, 16, - 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 12, 11, 11, 12, 11, 12, 11, 12, - 12, 12, 16, 17, 17, 18, 18, 18, 19, 18, 17, 17, 16, 16, 15, 14, 14, 14, - 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 16, 17, - 17, 18, 18, 18, 19, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, - 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 15, 16, 17, 17, 17, 18, - 18, 17, 17, 16, 16, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 15, 16, 16, 17, 17, 17, 18, 17, 17, 16, - 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 10, - 10, 10, 11, 10, 15, 16, 16, 17, 17, 17, 18, 17, 17, 16, 16, 15, 15, 14, - 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, - 15, 16, 16, 16, 17, 17, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, - 12, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 15, 15, 16, 16, - 16, 17, 17, 17, 16, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, - 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 14, 15, 15, 16, 16, 16, 17, 17, - 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 10, - 10, 10, 10, 10, 10, 10, 14, 15, 15, 16, 16, 16, 16, 17, 17, 16, 16, 15, - 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, - 10, 10, 14, 15, 15, 16, 16, 16, 16, 17, 17, 16, 16, 15, 15, 14, 14, 13, - 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 9, - /* Size 4x8 */ - 33, 22, 19, 16, 27, 22, 20, 17, 22, 19, 18, 17, 22, 18, 16, 14, 20, 17, - 14, 13, 18, 16, 12, 12, 17, 16, 12, 11, 16, 15, 12, 10, - /* Size 8x4 */ - 33, 27, 22, 22, 20, 18, 17, 16, 22, 22, 19, 18, 17, 16, 16, 15, 19, 20, - 18, 16, 14, 12, 12, 12, 16, 17, 17, 14, 13, 12, 11, 10, - /* Size 8x16 */ - 32, 30, 21, 21, 19, 16, 15, 15, 33, 28, 22, 22, 20, 18, 17, 16, 31, 26, - 22, 22, 21, 18, 17, 17, 28, 23, 22, 23, 21, 19, 18, 17, 23, 22, 20, 20, - 19, 17, 17, 17, 21, 22, 19, 18, 18, 16, 16, 16, 21, 23, 19, 18, 17, 15, - 15, 15, 20, 22, 19, 17, 16, 14, 14, 14, 19, 21, 19, 17, 15, 13, 13, 13, - 18, 20, 18, 16, 14, 12, 12, 13, 17, 19, 18, 16, 14, 12, 12, 12, 16, 18, - 17, 15, 13, 12, 11, 12, 16, 17, 16, 15, 13, 11, 11, 11, 15, 17, 16, 14, - 13, 12, 11, 10, 15, 16, 16, 15, 13, 12, 11, 10, 14, 16, 16, 15, 13, 12, - 11, 10, - /* Size 16x8 */ - 32, 33, 31, 28, 23, 21, 21, 20, 19, 18, 17, 16, 16, 15, 15, 14, 30, 28, - 26, 23, 22, 22, 23, 22, 21, 20, 19, 18, 17, 17, 16, 16, 21, 22, 22, 22, - 20, 19, 19, 19, 19, 18, 18, 17, 16, 16, 16, 16, 21, 22, 22, 23, 20, 18, - 18, 17, 17, 16, 16, 15, 15, 14, 15, 15, 19, 20, 21, 21, 19, 18, 17, 16, - 15, 14, 14, 13, 13, 13, 13, 13, 16, 18, 18, 19, 17, 16, 15, 14, 13, 12, - 12, 12, 11, 12, 12, 12, 15, 17, 17, 18, 17, 16, 15, 14, 13, 12, 12, 11, - 11, 11, 11, 11, 15, 16, 17, 17, 17, 16, 15, 14, 13, 13, 12, 12, 11, 10, - 10, 10, - /* Size 16x32 */ - 32, 33, 30, 28, 21, 21, 21, 20, 19, 18, 16, 16, 15, 15, 15, 15, 33, 33, - 29, 27, 22, 22, 22, 20, 20, 19, 17, 17, 16, 16, 16, 16, 33, 32, 28, 26, - 22, 22, 22, 21, 20, 19, 18, 17, 17, 16, 16, 16, 34, 32, 28, 26, 22, 23, - 23, 21, 21, 20, 18, 18, 17, 17, 17, 16, 31, 28, 26, 24, 22, 22, 22, 22, - 21, 20, 18, 18, 17, 17, 17, 16, 29, 27, 24, 23, 22, 22, 23, 22, 21, 20, - 19, 18, 18, 17, 17, 17, 28, 26, 23, 22, 22, 22, 23, 22, 21, 20, 19, 19, - 18, 18, 17, 17, 24, 24, 23, 22, 20, 20, 21, 20, 20, 19, 18, 18, 17, 18, - 17, 17, 23, 23, 22, 22, 20, 20, 20, 20, 19, 19, 17, 17, 17, 17, 17, 17, - 21, 22, 22, 21, 19, 19, 19, 19, 19, 18, 17, 17, 16, 17, 17, 16, 21, 22, - 22, 22, 19, 19, 18, 18, 18, 17, 16, 16, 16, 16, 16, 16, 21, 23, 22, 22, - 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 16, 16, 21, 23, 23, 22, 19, 18, - 18, 17, 17, 16, 15, 15, 15, 15, 15, 16, 20, 22, 22, 22, 19, 18, 17, 16, - 16, 16, 15, 14, 15, 14, 15, 15, 20, 22, 22, 22, 19, 18, 17, 16, 16, 15, - 14, 14, 14, 14, 14, 15, 20, 21, 22, 22, 19, 18, 17, 16, 15, 14, 14, 14, - 13, 14, 14, 14, 19, 21, 21, 21, 19, 18, 17, 15, 15, 14, 13, 13, 13, 13, - 13, 14, 19, 20, 21, 21, 19, 17, 17, 15, 15, 14, 13, 13, 13, 13, 13, 13, - 18, 20, 20, 20, 18, 17, 16, 15, 14, 13, 12, 12, 12, 12, 13, 13, 17, 19, - 20, 20, 18, 17, 16, 14, 14, 13, 12, 12, 12, 12, 12, 12, 17, 19, 19, 20, - 18, 17, 16, 14, 14, 13, 12, 12, 12, 12, 12, 12, 16, 18, 18, 19, 17, 16, - 15, 14, 13, 12, 12, 11, 11, 12, 12, 12, 16, 18, 18, 19, 17, 16, 15, 14, - 13, 12, 12, 11, 11, 11, 12, 12, 16, 17, 18, 18, 17, 16, 15, 14, 13, 12, - 11, 11, 11, 11, 11, 11, 16, 17, 17, 18, 16, 16, 15, 13, 13, 12, 11, 11, - 11, 11, 11, 11, 15, 17, 17, 18, 16, 16, 15, 14, 13, 12, 12, 11, 11, 11, - 11, 11, 15, 17, 17, 17, 16, 16, 14, 14, 13, 12, 12, 11, 11, 11, 10, 11, - 15, 16, 17, 17, 16, 16, 14, 14, 13, 12, 12, 11, 11, 10, 10, 10, 15, 16, - 16, 17, 16, 16, 15, 14, 13, 13, 12, 11, 11, 10, 10, 10, 14, 16, 16, 17, - 16, 15, 15, 14, 13, 12, 12, 11, 11, 10, 10, 10, 14, 16, 16, 17, 16, 15, - 15, 14, 13, 12, 12, 11, 11, 10, 10, 10, 14, 16, 16, 16, 16, 15, 15, 13, - 13, 12, 12, 11, 11, 10, 10, 10, - /* Size 32x16 */ - 32, 33, 33, 34, 31, 29, 28, 24, 23, 21, 21, 21, 21, 20, 20, 20, 19, 19, - 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 33, 33, 32, 32, - 28, 27, 26, 24, 23, 22, 22, 23, 23, 22, 22, 21, 21, 20, 20, 19, 19, 18, - 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 30, 29, 28, 28, 26, 24, 23, 23, - 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, 19, 18, 18, 18, 17, 17, - 17, 17, 16, 16, 16, 16, 28, 27, 26, 26, 24, 23, 22, 22, 22, 21, 22, 22, - 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, - 17, 16, 21, 22, 22, 22, 22, 22, 22, 20, 20, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 21, 22, - 22, 23, 22, 22, 22, 20, 20, 19, 19, 19, 18, 18, 18, 18, 18, 17, 17, 17, - 17, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 21, 22, 22, 23, 22, 23, - 23, 21, 20, 19, 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 15, 15, 15, - 15, 15, 14, 14, 15, 15, 15, 15, 20, 20, 21, 21, 22, 22, 22, 20, 20, 19, - 18, 18, 17, 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 13, 14, 14, 14, - 14, 14, 14, 13, 19, 20, 20, 21, 21, 21, 21, 20, 19, 19, 18, 17, 17, 16, - 16, 15, 15, 15, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 18, 19, 19, 20, 20, 20, 20, 19, 19, 18, 17, 17, 16, 16, 15, 14, 14, 14, - 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 13, 12, 12, 12, 16, 17, 18, 18, - 18, 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, - 12, 11, 11, 12, 12, 12, 12, 12, 12, 12, 16, 17, 17, 18, 18, 18, 19, 18, - 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 15, 16, 17, 17, 17, 18, 18, 17, 17, 16, 16, 16, - 15, 15, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 15, 16, 16, 17, 17, 17, 18, 18, 17, 17, 16, 16, 15, 14, 14, 14, - 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 15, 16, - 16, 17, 17, 17, 17, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, - 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 15, 16, 16, 16, 16, 17, - 17, 17, 17, 16, 16, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11, - 11, 11, 11, 10, 10, 10, 10, 10, - /* Size 4x16 */ - 33, 21, 18, 15, 32, 22, 19, 16, 28, 22, 20, 17, 26, 22, 20, 18, 23, 20, - 19, 17, 22, 19, 17, 16, 23, 18, 16, 15, 22, 18, 15, 14, 21, 18, 14, 13, - 20, 17, 13, 12, 19, 17, 13, 12, 18, 16, 12, 11, 17, 16, 12, 11, 17, 16, - 12, 11, 16, 16, 13, 10, 16, 15, 12, 10, - /* Size 16x4 */ - 33, 32, 28, 26, 23, 22, 23, 22, 21, 20, 19, 18, 17, 17, 16, 16, 21, 22, - 22, 22, 20, 19, 18, 18, 18, 17, 17, 16, 16, 16, 16, 15, 18, 19, 20, 20, - 19, 17, 16, 15, 14, 13, 13, 12, 12, 12, 13, 12, 15, 16, 17, 18, 17, 16, - 15, 14, 13, 12, 12, 11, 11, 11, 10, 10, - /* Size 8x32 */ - 32, 30, 21, 21, 19, 16, 15, 15, 33, 29, 22, 22, 20, 17, 16, 16, 33, 28, - 22, 22, 20, 18, 17, 16, 34, 28, 22, 23, 21, 18, 17, 17, 31, 26, 22, 22, - 21, 18, 17, 17, 29, 24, 22, 23, 21, 19, 18, 17, 28, 23, 22, 23, 21, 19, - 18, 17, 24, 23, 20, 21, 20, 18, 17, 17, 23, 22, 20, 20, 19, 17, 17, 17, - 21, 22, 19, 19, 19, 17, 16, 17, 21, 22, 19, 18, 18, 16, 16, 16, 21, 22, - 19, 18, 17, 16, 16, 16, 21, 23, 19, 18, 17, 15, 15, 15, 20, 22, 19, 17, - 16, 15, 15, 15, 20, 22, 19, 17, 16, 14, 14, 14, 20, 22, 19, 17, 15, 14, - 13, 14, 19, 21, 19, 17, 15, 13, 13, 13, 19, 21, 19, 17, 15, 13, 13, 13, - 18, 20, 18, 16, 14, 12, 12, 13, 17, 20, 18, 16, 14, 12, 12, 12, 17, 19, - 18, 16, 14, 12, 12, 12, 16, 18, 17, 15, 13, 12, 11, 12, 16, 18, 17, 15, - 13, 12, 11, 12, 16, 18, 17, 15, 13, 11, 11, 11, 16, 17, 16, 15, 13, 11, - 11, 11, 15, 17, 16, 15, 13, 12, 11, 11, 15, 17, 16, 14, 13, 12, 11, 10, - 15, 17, 16, 14, 13, 12, 11, 10, 15, 16, 16, 15, 13, 12, 11, 10, 14, 16, - 16, 15, 13, 12, 11, 10, 14, 16, 16, 15, 13, 12, 11, 10, 14, 16, 16, 15, - 13, 12, 11, 10, - /* Size 32x8 */ - 32, 33, 33, 34, 31, 29, 28, 24, 23, 21, 21, 21, 21, 20, 20, 20, 19, 19, - 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 30, 29, 28, 28, - 26, 24, 23, 23, 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, 19, 18, - 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 21, 22, 22, 22, 22, 22, 22, 20, - 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, - 16, 16, 16, 16, 16, 16, 21, 22, 22, 23, 22, 23, 23, 21, 20, 19, 18, 18, - 18, 17, 17, 17, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 14, 14, 15, 15, - 15, 15, 19, 20, 20, 21, 21, 21, 21, 20, 19, 19, 18, 17, 17, 16, 16, 15, - 15, 15, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 16, 17, - 18, 18, 18, 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, - 12, 12, 12, 11, 11, 12, 12, 12, 12, 12, 12, 12, 15, 16, 17, 17, 17, 18, - 18, 17, 17, 16, 16, 16, 15, 15, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, - 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 10, 10, - 10, 10, 10, 10 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 28, 18, 13, 28, 19, 14, 11, 18, 14, 10, 8, 13, 11, 8, 7, - /* Size 8x8 */ - 32, 32, 29, 24, 19, 15, 13, 11, 32, 31, 28, 24, 20, 16, 14, 12, 29, 28, - 22, 20, 17, 14, 13, 12, 24, 24, 20, 16, 14, 12, 11, 10, 19, 20, 17, 14, - 12, 10, 9, 9, 15, 16, 14, 12, 10, 9, 8, 8, 13, 14, 13, 11, 9, 8, 7, 7, - 11, 12, 12, 10, 9, 8, 7, 6, - /* Size 16x16 */ - 32, 33, 33, 32, 30, 28, 25, 22, 19, 17, 16, 14, 12, 12, 11, 11, 33, 32, - 32, 32, 30, 29, 26, 23, 20, 19, 17, 15, 13, 13, 12, 11, 33, 32, 31, 31, - 29, 28, 26, 23, 21, 19, 17, 15, 14, 13, 12, 12, 32, 32, 31, 29, 28, 27, - 25, 23, 21, 19, 18, 16, 14, 14, 13, 12, 30, 30, 29, 28, 26, 24, 22, 20, - 19, 18, 16, 15, 13, 13, 12, 12, 28, 29, 28, 27, 24, 21, 20, 18, 17, 16, - 15, 14, 13, 12, 11, 11, 25, 26, 26, 25, 22, 20, 18, 17, 15, 14, 14, 12, - 12, 11, 11, 11, 22, 23, 23, 23, 20, 18, 17, 15, 14, 13, 12, 11, 11, 10, - 10, 10, 19, 20, 21, 21, 19, 17, 15, 14, 12, 12, 11, 10, 10, 9, 9, 9, 17, - 19, 19, 19, 18, 16, 14, 13, 12, 11, 10, 10, 9, 9, 9, 8, 16, 17, 17, 18, - 16, 15, 14, 12, 11, 10, 10, 9, 9, 8, 8, 8, 14, 15, 15, 16, 15, 14, 12, - 11, 10, 10, 9, 8, 8, 8, 7, 7, 12, 13, 14, 14, 13, 13, 12, 11, 10, 9, 9, - 8, 7, 7, 7, 7, 12, 13, 13, 14, 13, 12, 11, 10, 9, 9, 8, 8, 7, 7, 7, 6, - 11, 12, 12, 13, 12, 11, 11, 10, 9, 9, 8, 7, 7, 7, 6, 6, 11, 11, 12, 12, - 12, 11, 11, 10, 9, 8, 8, 7, 7, 6, 6, 6, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 33, 32, 32, 30, 29, 28, 26, 25, 23, 22, 21, 19, 18, - 17, 16, 16, 14, 14, 13, 12, 12, 12, 11, 11, 11, 11, 10, 33, 32, 32, 32, - 32, 32, 32, 31, 30, 29, 29, 27, 26, 24, 23, 22, 20, 19, 18, 17, 17, 15, - 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 33, 32, 32, 32, 32, 32, 32, 31, - 30, 30, 29, 27, 26, 24, 23, 23, 20, 20, 19, 17, 17, 15, 15, 14, 13, 13, - 13, 12, 12, 12, 11, 11, 33, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 28, - 27, 25, 23, 23, 21, 20, 19, 18, 17, 16, 15, 14, 14, 14, 13, 13, 12, 12, - 12, 11, 33, 32, 32, 32, 31, 31, 31, 30, 29, 28, 28, 26, 26, 24, 23, 23, - 21, 20, 19, 18, 17, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, 33, 32, - 32, 32, 31, 31, 30, 30, 29, 28, 28, 26, 26, 24, 23, 23, 20, 20, 19, 18, - 17, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 32, 32, 32, 32, 31, 30, - 29, 28, 28, 27, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 18, 16, 16, 15, - 14, 14, 14, 13, 13, 12, 12, 12, 32, 31, 31, 31, 30, 30, 28, 28, 27, 26, - 26, 24, 24, 23, 22, 22, 20, 19, 19, 17, 17, 16, 15, 14, 14, 14, 13, 13, - 13, 12, 12, 12, 30, 30, 30, 31, 29, 29, 28, 27, 26, 24, 24, 23, 22, 22, - 20, 20, 19, 18, 18, 17, 16, 15, 15, 14, 13, 13, 13, 12, 12, 12, 12, 12, - 29, 29, 30, 30, 28, 28, 27, 26, 24, 22, 22, 21, 20, 20, 19, 19, 17, 17, - 17, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 28, 29, 29, 30, - 28, 28, 27, 26, 24, 22, 21, 20, 20, 19, 18, 18, 17, 17, 16, 15, 15, 14, - 14, 13, 13, 13, 12, 12, 11, 11, 11, 11, 26, 27, 27, 28, 26, 26, 26, 24, - 23, 21, 20, 19, 19, 18, 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 12, 12, - 11, 11, 11, 11, 11, 11, 25, 26, 26, 27, 26, 26, 25, 24, 22, 20, 20, 19, - 18, 17, 17, 16, 15, 15, 14, 14, 14, 13, 12, 12, 12, 12, 11, 11, 11, 11, - 11, 10, 23, 24, 24, 25, 24, 24, 24, 23, 22, 20, 19, 18, 17, 16, 16, 15, - 14, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, 22, 23, - 23, 23, 23, 23, 23, 22, 20, 19, 18, 17, 17, 16, 15, 15, 14, 13, 13, 12, - 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 21, 22, 23, 23, 23, 23, - 22, 22, 20, 19, 18, 17, 16, 15, 15, 14, 13, 13, 13, 12, 12, 11, 11, 11, - 10, 10, 10, 10, 10, 10, 9, 9, 19, 20, 20, 21, 21, 20, 21, 20, 19, 17, - 17, 16, 15, 14, 14, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, - 9, 9, 9, 18, 19, 20, 20, 20, 20, 20, 19, 18, 17, 17, 16, 15, 14, 13, 13, - 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 17, 18, 19, 19, - 19, 19, 19, 19, 18, 17, 16, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, - 10, 9, 9, 9, 9, 9, 9, 8, 8, 9, 16, 17, 17, 18, 18, 18, 18, 17, 17, 16, - 15, 14, 14, 13, 12, 12, 11, 11, 11, 10, 10, 9, 9, 9, 9, 9, 8, 8, 8, 8, - 8, 8, 16, 17, 17, 17, 17, 17, 18, 17, 16, 15, 15, 14, 14, 13, 12, 12, - 11, 11, 10, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 14, 15, 15, 16, 16, - 16, 16, 16, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 9, 8, - 8, 8, 8, 8, 8, 8, 8, 7, 14, 14, 15, 15, 15, 15, 16, 15, 15, 14, 14, 13, - 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 13, - 13, 14, 14, 14, 14, 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 9, - 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 12, 13, 13, 14, 14, 14, 14, 14, - 13, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, - 7, 7, 7, 12, 13, 13, 14, 14, 14, 14, 14, 13, 13, 13, 12, 12, 11, 11, 10, - 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 13, 13, 13, 13, - 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, - 7, 7, 6, 6, 6, 11, 12, 12, 13, 13, 13, 13, 13, 12, 12, 12, 11, 11, 10, - 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 11, 12, 12, 12, - 12, 12, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, - 7, 7, 7, 7, 6, 6, 6, 6, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, - 11, 10, 10, 10, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 11, 11, - 11, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 8, - 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 10, 11, 11, 11, 11, 12, 12, 12, 12, 11, - 11, 11, 10, 10, 10, 9, 9, 9, 9, 8, 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, - /* Size 4x8 */ - 32, 29, 17, 12, 32, 28, 18, 13, 30, 22, 16, 12, 25, 19, 13, 11, 20, 17, - 11, 9, 16, 14, 9, 8, 14, 13, 9, 7, 12, 11, 9, 7, - /* Size 8x4 */ - 32, 32, 30, 25, 20, 16, 14, 12, 29, 28, 22, 19, 17, 14, 13, 11, 17, 18, - 16, 13, 11, 9, 9, 9, 12, 13, 12, 11, 9, 8, 7, 7, - /* Size 8x16 */ - 32, 33, 29, 23, 19, 16, 12, 11, 33, 32, 30, 25, 20, 17, 13, 12, 33, 31, - 29, 24, 21, 17, 14, 13, 32, 30, 28, 24, 21, 18, 14, 13, 30, 29, 25, 21, - 19, 16, 13, 13, 28, 28, 22, 19, 17, 15, 13, 12, 25, 26, 21, 17, 15, 13, - 12, 11, 22, 23, 19, 16, 14, 12, 11, 10, 19, 20, 18, 14, 12, 11, 10, 9, - 18, 19, 17, 14, 12, 10, 9, 9, 16, 17, 16, 13, 11, 10, 9, 8, 14, 15, 14, - 12, 10, 9, 8, 8, 12, 14, 13, 11, 10, 9, 7, 7, 12, 13, 12, 11, 9, 8, 7, - 7, 11, 12, 12, 11, 9, 8, 7, 7, 11, 12, 12, 11, 9, 8, 7, 6, - /* Size 16x8 */ - 32, 33, 33, 32, 30, 28, 25, 22, 19, 18, 16, 14, 12, 12, 11, 11, 33, 32, - 31, 30, 29, 28, 26, 23, 20, 19, 17, 15, 14, 13, 12, 12, 29, 30, 29, 28, - 25, 22, 21, 19, 18, 17, 16, 14, 13, 12, 12, 12, 23, 25, 24, 24, 21, 19, - 17, 16, 14, 14, 13, 12, 11, 11, 11, 11, 19, 20, 21, 21, 19, 17, 15, 14, - 12, 12, 11, 10, 10, 9, 9, 9, 16, 17, 17, 18, 16, 15, 13, 12, 11, 10, 10, - 9, 9, 8, 8, 8, 12, 13, 14, 14, 13, 13, 12, 11, 10, 9, 9, 8, 7, 7, 7, 7, - 11, 12, 13, 13, 13, 12, 11, 10, 9, 9, 8, 8, 7, 7, 7, 6, - /* Size 16x32 */ - 32, 33, 33, 32, 29, 28, 23, 22, 19, 17, 16, 13, 12, 12, 11, 11, 33, 32, - 32, 32, 29, 29, 24, 23, 20, 17, 17, 14, 13, 12, 12, 12, 33, 32, 32, 32, - 30, 29, 25, 23, 20, 18, 17, 14, 13, 12, 12, 12, 33, 32, 32, 31, 30, 30, - 25, 23, 21, 18, 17, 14, 14, 13, 12, 12, 33, 32, 31, 30, 29, 28, 24, 23, - 21, 18, 17, 14, 14, 13, 13, 12, 32, 32, 31, 30, 28, 28, 24, 23, 20, 18, - 17, 14, 14, 13, 13, 12, 32, 31, 30, 29, 28, 27, 24, 23, 21, 18, 18, 15, - 14, 13, 13, 12, 32, 31, 30, 28, 26, 26, 23, 22, 20, 18, 17, 14, 14, 13, - 13, 13, 30, 30, 29, 28, 25, 24, 21, 20, 19, 17, 16, 14, 13, 13, 13, 13, - 29, 30, 28, 27, 23, 22, 20, 19, 17, 16, 15, 13, 13, 12, 12, 12, 28, 30, - 28, 27, 22, 21, 19, 18, 17, 16, 15, 13, 13, 12, 12, 12, 26, 28, 26, 26, - 21, 20, 18, 17, 16, 14, 14, 12, 12, 12, 12, 11, 25, 26, 26, 25, 21, 20, - 17, 17, 15, 14, 13, 12, 12, 11, 11, 11, 23, 25, 24, 24, 20, 19, 16, 16, - 14, 13, 13, 11, 11, 11, 11, 11, 22, 23, 23, 23, 19, 18, 16, 15, 14, 12, - 12, 11, 11, 10, 10, 10, 21, 23, 23, 22, 19, 18, 15, 15, 13, 12, 12, 11, - 10, 10, 10, 10, 19, 21, 20, 20, 18, 17, 14, 14, 12, 11, 11, 10, 10, 10, - 9, 10, 19, 20, 20, 20, 17, 17, 14, 13, 12, 11, 11, 10, 9, 9, 9, 9, 18, - 19, 19, 19, 17, 16, 14, 13, 12, 11, 10, 9, 9, 9, 9, 9, 16, 18, 18, 18, - 16, 15, 13, 12, 11, 10, 10, 9, 9, 9, 9, 8, 16, 17, 17, 18, 16, 15, 13, - 12, 11, 10, 10, 9, 9, 8, 8, 8, 14, 16, 16, 16, 14, 14, 12, 12, 11, 9, 9, - 8, 8, 8, 8, 8, 14, 15, 15, 16, 14, 14, 12, 11, 10, 9, 9, 8, 8, 8, 8, 8, - 13, 14, 14, 15, 13, 13, 11, 11, 10, 9, 9, 8, 8, 7, 7, 7, 12, 14, 14, 14, - 13, 13, 11, 11, 10, 9, 9, 8, 7, 7, 7, 7, 12, 14, 14, 14, 13, 13, 11, 11, - 10, 9, 8, 8, 7, 7, 7, 7, 12, 13, 13, 13, 12, 12, 11, 10, 9, 9, 8, 7, 7, - 7, 7, 7, 12, 12, 13, 13, 12, 12, 11, 10, 9, 9, 8, 7, 7, 7, 7, 6, 11, 12, - 12, 13, 12, 12, 11, 10, 9, 9, 8, 8, 7, 7, 7, 6, 11, 12, 12, 12, 12, 11, - 11, 10, 9, 9, 8, 8, 7, 7, 6, 6, 11, 12, 12, 12, 12, 11, 11, 10, 9, 8, 8, - 7, 7, 6, 6, 6, 10, 11, 11, 12, 12, 11, 11, 9, 9, 8, 8, 7, 7, 6, 6, 6, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 32, 32, 32, 30, 29, 28, 26, 25, 23, 22, 21, 19, 19, - 18, 16, 16, 14, 14, 13, 12, 12, 12, 12, 11, 11, 11, 10, 33, 32, 32, 32, - 32, 32, 31, 31, 30, 30, 30, 28, 26, 25, 23, 23, 21, 20, 19, 18, 17, 16, - 15, 14, 14, 14, 13, 12, 12, 12, 12, 11, 33, 32, 32, 32, 31, 31, 30, 30, - 29, 28, 28, 26, 26, 24, 23, 23, 20, 20, 19, 18, 17, 16, 15, 14, 14, 14, - 13, 13, 12, 12, 12, 11, 32, 32, 32, 31, 30, 30, 29, 28, 28, 27, 27, 26, - 25, 24, 23, 22, 20, 20, 19, 18, 18, 16, 16, 15, 14, 14, 13, 13, 13, 12, - 12, 12, 29, 29, 30, 30, 29, 28, 28, 26, 25, 23, 22, 21, 21, 20, 19, 19, - 18, 17, 17, 16, 16, 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 12, 28, 29, - 29, 30, 28, 28, 27, 26, 24, 22, 21, 20, 20, 19, 18, 18, 17, 17, 16, 15, - 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 23, 24, 25, 25, 24, 24, - 24, 23, 21, 20, 19, 18, 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 22, 23, 23, 23, 23, 23, 23, 22, 20, 19, - 18, 17, 17, 16, 15, 15, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 10, 10, - 10, 10, 10, 9, 19, 20, 20, 21, 21, 20, 21, 20, 19, 17, 17, 16, 15, 14, - 14, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 17, - 17, 18, 18, 18, 18, 18, 18, 17, 16, 16, 14, 14, 13, 12, 12, 11, 11, 11, - 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 16, 17, 17, 17, 17, 17, 18, 17, - 16, 15, 15, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 9, 8, 8, 8, - 8, 8, 8, 8, 13, 14, 14, 14, 14, 14, 15, 14, 14, 13, 13, 12, 12, 11, 11, - 11, 10, 10, 9, 9, 9, 8, 8, 8, 8, 8, 7, 7, 8, 8, 7, 7, 12, 13, 13, 14, - 14, 14, 14, 14, 13, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, - 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 13, 13, 13, 13, 13, 13, 12, 12, 12, - 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 6, 6, 11, 12, - 12, 12, 13, 13, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10, 9, 9, 9, 9, 8, - 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 11, 12, 12, 12, 12, 12, 12, 13, 13, 12, - 12, 11, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, - /* Size 4x16 */ - 33, 28, 17, 12, 32, 29, 18, 12, 32, 28, 18, 13, 31, 27, 18, 13, 30, 24, - 17, 13, 30, 21, 16, 12, 26, 20, 14, 11, 23, 18, 12, 10, 21, 17, 11, 10, - 19, 16, 11, 9, 17, 15, 10, 8, 15, 14, 9, 8, 14, 13, 9, 7, 13, 12, 9, 7, - 12, 12, 9, 7, 12, 11, 8, 6, - /* Size 16x4 */ - 33, 32, 32, 31, 30, 30, 26, 23, 21, 19, 17, 15, 14, 13, 12, 12, 28, 29, - 28, 27, 24, 21, 20, 18, 17, 16, 15, 14, 13, 12, 12, 11, 17, 18, 18, 18, - 17, 16, 14, 12, 11, 11, 10, 9, 9, 9, 9, 8, 12, 12, 13, 13, 13, 12, 11, - 10, 10, 9, 8, 8, 7, 7, 7, 6, - /* Size 8x32 */ - 32, 33, 29, 23, 19, 16, 12, 11, 33, 32, 29, 24, 20, 17, 13, 12, 33, 32, - 30, 25, 20, 17, 13, 12, 33, 32, 30, 25, 21, 17, 14, 12, 33, 31, 29, 24, - 21, 17, 14, 13, 32, 31, 28, 24, 20, 17, 14, 13, 32, 30, 28, 24, 21, 18, - 14, 13, 32, 30, 26, 23, 20, 17, 14, 13, 30, 29, 25, 21, 19, 16, 13, 13, - 29, 28, 23, 20, 17, 15, 13, 12, 28, 28, 22, 19, 17, 15, 13, 12, 26, 26, - 21, 18, 16, 14, 12, 12, 25, 26, 21, 17, 15, 13, 12, 11, 23, 24, 20, 16, - 14, 13, 11, 11, 22, 23, 19, 16, 14, 12, 11, 10, 21, 23, 19, 15, 13, 12, - 10, 10, 19, 20, 18, 14, 12, 11, 10, 9, 19, 20, 17, 14, 12, 11, 9, 9, 18, - 19, 17, 14, 12, 10, 9, 9, 16, 18, 16, 13, 11, 10, 9, 9, 16, 17, 16, 13, - 11, 10, 9, 8, 14, 16, 14, 12, 11, 9, 8, 8, 14, 15, 14, 12, 10, 9, 8, 8, - 13, 14, 13, 11, 10, 9, 8, 7, 12, 14, 13, 11, 10, 9, 7, 7, 12, 14, 13, - 11, 10, 8, 7, 7, 12, 13, 12, 11, 9, 8, 7, 7, 12, 13, 12, 11, 9, 8, 7, 7, - 11, 12, 12, 11, 9, 8, 7, 7, 11, 12, 12, 11, 9, 8, 7, 6, 11, 12, 12, 11, - 9, 8, 7, 6, 10, 11, 12, 11, 9, 8, 7, 6, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 32, 32, 32, 30, 29, 28, 26, 25, 23, 22, 21, 19, 19, - 18, 16, 16, 14, 14, 13, 12, 12, 12, 12, 11, 11, 11, 10, 33, 32, 32, 32, - 31, 31, 30, 30, 29, 28, 28, 26, 26, 24, 23, 23, 20, 20, 19, 18, 17, 16, - 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, 29, 29, 30, 30, 29, 28, 28, 26, - 25, 23, 22, 21, 21, 20, 19, 19, 18, 17, 17, 16, 16, 14, 14, 13, 13, 13, - 12, 12, 12, 12, 12, 12, 23, 24, 25, 25, 24, 24, 24, 23, 21, 20, 19, 18, - 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 19, 20, 20, 21, 21, 20, 21, 20, 19, 17, 17, 16, 15, 14, 14, 13, - 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 16, 17, 17, - 17, 17, 17, 18, 17, 16, 15, 15, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, - 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 12, 13, 13, 14, 14, 14, 14, 14, 13, 13, - 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, - 11, 12, 12, 12, 13, 13, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10, 9, 9, 9, - 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6 }, - { /* Chroma */ - /* Size 4x4 */ - 32, 23, 20, 17, 23, 19, 17, 16, 20, 17, 14, 13, 17, 16, 13, 11, - /* Size 8x8 */ - 33, 30, 22, 22, 20, 18, 17, 16, 30, 26, 22, 23, 21, 19, 18, 17, 22, 22, - 20, 20, 19, 18, 17, 17, 22, 23, 20, 18, 17, 16, 15, 15, 20, 21, 19, 17, - 15, 14, 13, 13, 18, 19, 18, 16, 14, 12, 12, 12, 17, 18, 17, 15, 13, 12, - 11, 11, 16, 17, 17, 15, 13, 12, 11, 10, - /* Size 16x16 */ - 32, 33, 31, 28, 25, 21, 21, 20, 20, 19, 18, 17, 16, 15, 15, 15, 33, 33, - 30, 26, 24, 22, 22, 22, 21, 20, 19, 18, 17, 17, 16, 16, 31, 30, 28, 24, - 23, 22, 22, 22, 22, 21, 20, 19, 18, 17, 17, 16, 28, 26, 24, 22, 22, 21, - 22, 22, 22, 21, 20, 19, 19, 18, 17, 17, 25, 24, 23, 22, 21, 20, 21, 20, - 20, 20, 19, 18, 18, 17, 17, 17, 21, 22, 22, 21, 20, 19, 19, 19, 19, 19, - 18, 17, 17, 16, 16, 16, 21, 22, 22, 22, 21, 19, 19, 18, 17, 17, 17, 16, - 16, 15, 15, 15, 20, 22, 22, 22, 20, 19, 18, 17, 16, 16, 16, 15, 15, 14, - 14, 14, 20, 21, 22, 22, 20, 19, 17, 16, 16, 15, 15, 14, 14, 13, 14, 14, - 19, 20, 21, 21, 20, 19, 17, 16, 15, 14, 14, 13, 13, 13, 13, 13, 18, 19, - 20, 20, 19, 18, 17, 16, 15, 14, 13, 13, 12, 12, 12, 12, 17, 18, 19, 19, - 18, 17, 16, 15, 14, 13, 13, 12, 12, 12, 12, 12, 16, 17, 18, 19, 18, 17, - 16, 15, 14, 13, 12, 12, 11, 11, 11, 11, 15, 17, 17, 18, 17, 16, 15, 14, - 13, 13, 12, 12, 11, 11, 11, 11, 15, 16, 17, 17, 17, 16, 15, 14, 14, 13, - 12, 12, 11, 11, 10, 10, 15, 16, 16, 17, 17, 16, 15, 14, 14, 13, 12, 12, - 11, 11, 10, 10, - /* Size 32x32 */ - 32, 33, 33, 34, 31, 31, 28, 27, 25, 22, 21, 21, 21, 21, 20, 20, 20, 19, - 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 14, 33, 33, 33, 33, - 30, 30, 27, 26, 24, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 18, - 18, 17, 17, 17, 16, 16, 16, 16, 16, 15, 33, 33, 33, 33, 30, 29, 26, 26, - 24, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17, - 17, 16, 16, 16, 16, 15, 34, 33, 33, 32, 30, 29, 26, 25, 24, 23, 22, 23, - 23, 23, 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, - 16, 16, 31, 30, 30, 30, 28, 27, 24, 24, 23, 22, 22, 22, 22, 23, 22, 22, - 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 31, 30, - 29, 29, 27, 26, 24, 23, 23, 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20, - 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17, 28, 27, 26, 26, 24, 24, - 22, 22, 22, 22, 21, 22, 22, 23, 22, 22, 22, 22, 21, 21, 20, 20, 19, 19, - 19, 19, 18, 18, 17, 17, 17, 17, 27, 26, 26, 25, 24, 23, 22, 22, 21, 21, - 21, 21, 22, 22, 22, 22, 21, 21, 21, 20, 20, 19, 19, 19, 18, 18, 18, 18, - 18, 17, 17, 17, 25, 24, 24, 24, 23, 23, 22, 21, 21, 20, 20, 21, 21, 21, - 20, 20, 20, 20, 20, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 17, 17, - 22, 22, 22, 23, 22, 22, 22, 21, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, - 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 16, 16, 21, 22, 22, 22, - 22, 22, 21, 21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, - 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 21, 22, 22, 23, 22, 22, 22, 21, - 21, 20, 19, 19, 19, 19, 18, 18, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, - 16, 16, 16, 16, 16, 15, 21, 22, 22, 23, 22, 22, 22, 22, 21, 20, 19, 19, - 19, 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 15, - 15, 15, 21, 22, 22, 23, 23, 23, 23, 22, 21, 20, 19, 19, 18, 18, 17, 17, - 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 20, 21, - 22, 22, 22, 22, 22, 22, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, - 16, 15, 15, 15, 15, 14, 14, 15, 14, 14, 14, 15, 20, 21, 22, 22, 22, 22, - 22, 22, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 20, 20, 21, 22, 22, 22, 22, 21, 20, 19, - 19, 18, 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 14, - 14, 13, 14, 14, 19, 20, 20, 21, 21, 21, 22, 21, 20, 19, 19, 18, 17, 17, - 16, 16, 15, 15, 15, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 19, 20, 20, 21, 21, 21, 21, 21, 20, 19, 19, 18, 17, 17, 16, 16, 15, 15, - 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 18, 19, 19, 20, - 20, 20, 21, 20, 19, 19, 18, 17, 17, 16, 16, 16, 15, 14, 14, 14, 13, 13, - 13, 13, 12, 12, 12, 13, 12, 13, 13, 12, 18, 19, 19, 20, 20, 20, 20, 20, - 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 17, 18, 18, 19, 19, 19, 20, 19, 19, 18, 18, 17, - 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 17, 18, 18, 19, 19, 19, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, - 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 17, - 17, 18, 18, 18, 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, - 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 12, 11, 16, 17, 17, 18, 18, 18, - 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 16, 17, 17, 18, 18, 18, 19, 18, 18, 17, - 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 15, 16, 17, 17, 17, 17, 18, 18, 17, 17, 16, 16, 15, 15, - 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 15, 16, 16, 17, 17, 17, 18, 18, 17, 17, 16, 16, 15, 15, 15, 14, 14, 13, - 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 11, 10, 10, 10, 15, 16, 16, 17, - 17, 17, 17, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, - 12, 11, 11, 11, 11, 11, 10, 10, 10, 10, 15, 16, 16, 17, 17, 17, 17, 17, - 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 12, 11, 11, 11, - 11, 10, 10, 10, 10, 10, 15, 16, 16, 16, 16, 17, 17, 17, 17, 16, 16, 16, - 15, 15, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 10, 10, 10, - 10, 10, 14, 15, 15, 16, 16, 17, 17, 17, 17, 16, 16, 15, 15, 15, 15, 14, - 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, - /* Size 4x8 */ - 33, 22, 19, 16, 28, 22, 20, 17, 22, 20, 19, 17, 23, 19, 16, 15, 21, 19, - 14, 13, 19, 18, 13, 12, 17, 17, 13, 11, 16, 16, 13, 11, - /* Size 8x4 */ - 33, 28, 22, 23, 21, 19, 17, 16, 22, 22, 20, 19, 19, 18, 17, 16, 19, 20, - 19, 16, 14, 13, 13, 13, 16, 17, 17, 15, 13, 12, 11, 11, - /* Size 8x16 */ - 32, 31, 23, 21, 20, 18, 16, 15, 33, 30, 23, 22, 21, 19, 17, 16, 31, 28, - 22, 23, 22, 20, 18, 17, 28, 24, 22, 23, 22, 20, 19, 17, 24, 23, 21, 21, - 20, 19, 18, 17, 21, 22, 20, 19, 19, 18, 17, 16, 21, 22, 20, 18, 17, 17, - 16, 15, 20, 22, 20, 17, 16, 16, 14, 14, 20, 22, 19, 17, 16, 14, 14, 14, - 19, 21, 19, 17, 15, 14, 13, 13, 18, 20, 19, 16, 15, 13, 12, 12, 17, 19, - 18, 16, 14, 13, 12, 12, 16, 18, 17, 15, 14, 12, 11, 11, 16, 17, 17, 15, - 13, 12, 11, 11, 15, 17, 17, 15, 13, 12, 11, 11, 15, 16, 17, 15, 14, 12, - 11, 10, - /* Size 16x8 */ - 32, 33, 31, 28, 24, 21, 21, 20, 20, 19, 18, 17, 16, 16, 15, 15, 31, 30, - 28, 24, 23, 22, 22, 22, 22, 21, 20, 19, 18, 17, 17, 16, 23, 23, 22, 22, - 21, 20, 20, 20, 19, 19, 19, 18, 17, 17, 17, 17, 21, 22, 23, 23, 21, 19, - 18, 17, 17, 17, 16, 16, 15, 15, 15, 15, 20, 21, 22, 22, 20, 19, 17, 16, - 16, 15, 15, 14, 14, 13, 13, 14, 18, 19, 20, 20, 19, 18, 17, 16, 14, 14, - 13, 13, 12, 12, 12, 12, 16, 17, 18, 19, 18, 17, 16, 14, 14, 13, 12, 12, - 11, 11, 11, 11, 15, 16, 17, 17, 17, 16, 15, 14, 14, 13, 12, 12, 11, 11, - 11, 10, - /* Size 16x32 */ - 32, 33, 31, 28, 23, 21, 21, 20, 20, 18, 18, 16, 16, 15, 15, 15, 33, 33, - 30, 27, 23, 22, 22, 21, 20, 19, 19, 17, 17, 16, 16, 16, 33, 32, 30, 26, - 23, 22, 22, 22, 21, 20, 19, 17, 17, 17, 16, 16, 34, 32, 29, 26, 23, 22, - 23, 22, 21, 20, 20, 18, 18, 17, 17, 17, 31, 29, 28, 24, 22, 22, 23, 22, - 22, 20, 20, 18, 18, 17, 17, 17, 31, 28, 27, 24, 22, 22, 22, 22, 22, 20, - 20, 18, 18, 17, 17, 17, 28, 26, 24, 22, 22, 22, 23, 22, 22, 21, 20, 19, - 19, 18, 17, 17, 26, 25, 24, 22, 21, 21, 22, 22, 21, 20, 20, 19, 18, 18, - 18, 17, 24, 24, 23, 22, 21, 20, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17, - 22, 22, 22, 21, 20, 20, 19, 19, 19, 19, 18, 17, 17, 17, 17, 17, 21, 22, - 22, 21, 20, 19, 19, 19, 19, 18, 18, 17, 17, 16, 16, 17, 21, 22, 22, 22, - 20, 19, 18, 18, 18, 17, 17, 16, 16, 16, 16, 16, 21, 23, 22, 22, 20, 19, - 18, 18, 17, 17, 17, 16, 16, 16, 15, 16, 21, 23, 23, 22, 20, 19, 18, 17, - 17, 16, 16, 15, 15, 15, 15, 15, 20, 22, 22, 22, 20, 19, 17, 17, 16, 16, - 16, 15, 14, 15, 14, 15, 20, 22, 22, 22, 20, 19, 17, 17, 16, 16, 15, 14, - 14, 14, 14, 14, 20, 21, 22, 22, 19, 19, 17, 16, 16, 15, 14, 14, 14, 14, - 14, 14, 19, 21, 21, 21, 19, 19, 17, 16, 15, 14, 14, 13, 13, 13, 14, 13, - 19, 20, 21, 21, 19, 19, 17, 16, 15, 14, 14, 13, 13, 13, 13, 13, 18, 20, - 20, 20, 19, 18, 16, 16, 15, 14, 13, 13, 12, 13, 13, 13, 18, 20, 20, 20, - 19, 18, 16, 16, 15, 14, 13, 12, 12, 12, 12, 13, 17, 19, 19, 20, 18, 18, - 16, 15, 14, 13, 13, 12, 12, 12, 12, 12, 17, 18, 19, 19, 18, 17, 16, 15, - 14, 13, 13, 12, 12, 12, 12, 12, 16, 18, 18, 19, 17, 17, 15, 15, 14, 13, - 12, 12, 11, 11, 12, 12, 16, 18, 18, 18, 17, 17, 15, 14, 14, 13, 12, 11, - 11, 11, 11, 12, 16, 17, 18, 18, 17, 17, 15, 14, 14, 13, 12, 11, 11, 11, - 11, 11, 16, 17, 17, 18, 17, 16, 15, 14, 13, 12, 12, 11, 11, 11, 11, 11, - 15, 17, 17, 18, 17, 16, 15, 15, 13, 13, 12, 11, 11, 11, 11, 11, 15, 17, - 17, 17, 17, 16, 15, 14, 13, 13, 12, 12, 11, 11, 11, 10, 15, 16, 17, 17, - 17, 16, 15, 14, 13, 13, 12, 12, 11, 11, 10, 10, 15, 16, 16, 17, 17, 16, - 15, 14, 14, 13, 12, 12, 11, 11, 10, 10, 15, 16, 16, 17, 17, 15, 15, 14, - 14, 12, 12, 11, 11, 10, 10, 10, - /* Size 32x16 */ - 32, 33, 33, 34, 31, 31, 28, 26, 24, 22, 21, 21, 21, 21, 20, 20, 20, 19, - 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 15, 33, 33, 32, 32, - 29, 28, 26, 25, 24, 22, 22, 22, 23, 23, 22, 22, 21, 21, 20, 20, 20, 19, - 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 31, 30, 30, 29, 28, 27, 24, 24, - 23, 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, - 17, 17, 17, 17, 16, 16, 28, 27, 26, 26, 24, 24, 22, 22, 22, 21, 21, 22, - 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 18, 18, 18, 18, 17, 17, - 17, 17, 23, 23, 23, 23, 22, 22, 22, 21, 21, 20, 20, 20, 20, 20, 20, 20, - 19, 19, 19, 19, 19, 18, 18, 17, 17, 17, 17, 17, 17, 17, 17, 17, 21, 22, - 22, 22, 22, 22, 22, 21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, - 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 15, 21, 22, 22, 23, 23, 22, - 23, 22, 21, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 20, 21, 22, 22, 22, 22, 22, 22, 20, 19, - 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 14, 14, 14, 15, - 14, 14, 14, 14, 20, 20, 21, 21, 22, 22, 22, 21, 20, 19, 19, 18, 17, 17, - 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 13, 13, 14, 14, - 18, 19, 20, 20, 20, 20, 21, 20, 19, 19, 18, 17, 17, 16, 16, 16, 15, 14, - 14, 14, 14, 13, 13, 13, 13, 13, 12, 13, 13, 13, 13, 12, 18, 19, 19, 20, - 20, 20, 20, 20, 19, 18, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, 13, - 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 17, 17, 18, 18, 18, 19, 19, - 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, - 11, 11, 12, 12, 12, 11, 16, 17, 17, 18, 18, 18, 19, 18, 18, 17, 17, 16, - 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 15, 16, 17, 17, 17, 17, 18, 18, 17, 17, 16, 16, 16, 15, 15, 14, - 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 10, 15, 16, - 16, 17, 17, 17, 17, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, - 12, 12, 12, 12, 11, 11, 11, 11, 11, 10, 10, 10, 15, 16, 16, 17, 17, 17, - 17, 17, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 12, - 12, 11, 11, 11, 10, 10, 10, 10, - /* Size 4x16 */ - 33, 21, 18, 15, 32, 22, 20, 17, 29, 22, 20, 17, 26, 22, 21, 18, 24, 20, - 19, 17, 22, 19, 18, 16, 23, 19, 17, 16, 22, 19, 16, 15, 21, 19, 15, 14, - 20, 19, 14, 13, 20, 18, 14, 12, 18, 17, 13, 12, 18, 17, 13, 11, 17, 16, - 12, 11, 17, 16, 13, 11, 16, 16, 13, 11, - /* Size 16x4 */ - 33, 32, 29, 26, 24, 22, 23, 22, 21, 20, 20, 18, 18, 17, 17, 16, 21, 22, - 22, 22, 20, 19, 19, 19, 19, 19, 18, 17, 17, 16, 16, 16, 18, 20, 20, 21, - 19, 18, 17, 16, 15, 14, 14, 13, 13, 12, 13, 13, 15, 17, 17, 18, 17, 16, - 16, 15, 14, 13, 12, 12, 11, 11, 11, 11, - /* Size 8x32 */ - 32, 31, 23, 21, 20, 18, 16, 15, 33, 30, 23, 22, 20, 19, 17, 16, 33, 30, - 23, 22, 21, 19, 17, 16, 34, 29, 23, 23, 21, 20, 18, 17, 31, 28, 22, 23, - 22, 20, 18, 17, 31, 27, 22, 22, 22, 20, 18, 17, 28, 24, 22, 23, 22, 20, - 19, 17, 26, 24, 21, 22, 21, 20, 18, 18, 24, 23, 21, 21, 20, 19, 18, 17, - 22, 22, 20, 19, 19, 18, 17, 17, 21, 22, 20, 19, 19, 18, 17, 16, 21, 22, - 20, 18, 18, 17, 16, 16, 21, 22, 20, 18, 17, 17, 16, 15, 21, 23, 20, 18, - 17, 16, 15, 15, 20, 22, 20, 17, 16, 16, 14, 14, 20, 22, 20, 17, 16, 15, - 14, 14, 20, 22, 19, 17, 16, 14, 14, 14, 19, 21, 19, 17, 15, 14, 13, 14, - 19, 21, 19, 17, 15, 14, 13, 13, 18, 20, 19, 16, 15, 13, 12, 13, 18, 20, - 19, 16, 15, 13, 12, 12, 17, 19, 18, 16, 14, 13, 12, 12, 17, 19, 18, 16, - 14, 13, 12, 12, 16, 18, 17, 15, 14, 12, 11, 12, 16, 18, 17, 15, 14, 12, - 11, 11, 16, 18, 17, 15, 14, 12, 11, 11, 16, 17, 17, 15, 13, 12, 11, 11, - 15, 17, 17, 15, 13, 12, 11, 11, 15, 17, 17, 15, 13, 12, 11, 11, 15, 17, - 17, 15, 13, 12, 11, 10, 15, 16, 17, 15, 14, 12, 11, 10, 15, 16, 17, 15, - 14, 12, 11, 10, - /* Size 32x8 */ - 32, 33, 33, 34, 31, 31, 28, 26, 24, 22, 21, 21, 21, 21, 20, 20, 20, 19, - 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 15, 31, 30, 30, 29, - 28, 27, 24, 24, 23, 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, 19, - 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 23, 23, 23, 23, 22, 22, 22, 21, - 21, 20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 19, 18, 18, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 21, 22, 22, 23, 23, 22, 23, 22, 21, 19, 19, 18, - 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 20, 20, 21, 21, 22, 22, 22, 21, 20, 19, 19, 18, 17, 17, 16, 16, - 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 13, 13, 14, 14, 18, 19, - 19, 20, 20, 20, 20, 20, 19, 18, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, - 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 17, 17, 18, 18, 18, - 19, 18, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 15, 16, 16, 17, 17, 17, 17, 18, 17, 17, - 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, - 11, 10, 10, 10 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 30, 19, 14, 30, 21, 16, 13, 19, 16, 11, 9, 14, 13, 9, 7, - /* Size 8x8 */ - 32, 32, 30, 26, 20, 17, 13, 12, 32, 31, 29, 26, 21, 17, 14, 13, 30, 29, - 26, 22, 19, 16, 14, 13, 26, 26, 22, 18, 16, 14, 12, 11, 20, 21, 19, 16, - 13, 11, 10, 10, 17, 17, 16, 14, 11, 10, 9, 8, 13, 14, 14, 12, 10, 9, 8, - 7, 12, 13, 13, 11, 10, 8, 7, 7, - /* Size 16x16 */ - 32, 33, 33, 32, 31, 28, 26, 23, 21, 19, 17, 16, 14, 13, 12, 11, 33, 32, - 32, 32, 31, 29, 27, 24, 22, 20, 18, 16, 15, 13, 13, 12, 33, 32, 32, 31, - 30, 29, 27, 25, 23, 21, 19, 17, 15, 14, 13, 12, 32, 32, 31, 30, 28, 28, - 26, 24, 23, 21, 19, 17, 16, 14, 14, 13, 31, 31, 30, 28, 27, 24, 23, 22, - 20, 19, 18, 16, 15, 14, 13, 13, 28, 29, 29, 28, 24, 21, 20, 19, 18, 17, - 16, 15, 14, 13, 12, 12, 26, 27, 27, 26, 23, 20, 19, 18, 17, 16, 15, 14, - 13, 12, 12, 11, 23, 24, 25, 24, 22, 19, 18, 16, 15, 14, 14, 13, 12, 11, - 11, 11, 21, 22, 23, 23, 20, 18, 17, 15, 14, 13, 13, 12, 11, 10, 10, 10, - 19, 20, 21, 21, 19, 17, 16, 14, 13, 12, 12, 11, 10, 10, 9, 9, 17, 18, - 19, 19, 18, 16, 15, 14, 13, 12, 11, 10, 10, 9, 9, 9, 16, 16, 17, 17, 16, - 15, 14, 13, 12, 11, 10, 10, 9, 8, 8, 8, 14, 15, 15, 16, 15, 14, 13, 12, - 11, 10, 10, 9, 8, 8, 8, 7, 13, 13, 14, 14, 14, 13, 12, 11, 10, 10, 9, 8, - 8, 7, 7, 7, 12, 13, 13, 14, 13, 12, 12, 11, 10, 9, 9, 8, 8, 7, 7, 7, 11, - 12, 12, 13, 13, 12, 11, 11, 10, 9, 9, 8, 7, 7, 7, 6, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 33, 32, 32, 31, 30, 28, 28, 26, 25, 23, 22, 21, 20, - 19, 18, 17, 16, 16, 14, 14, 13, 13, 12, 12, 12, 11, 11, 33, 32, 32, 32, - 32, 32, 32, 32, 31, 30, 29, 29, 27, 26, 24, 23, 22, 20, 20, 18, 18, 17, - 16, 15, 14, 13, 13, 13, 12, 12, 12, 12, 33, 32, 32, 32, 32, 32, 32, 32, - 31, 30, 29, 29, 27, 26, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 15, 14, - 13, 13, 13, 12, 12, 12, 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, - 28, 27, 25, 24, 23, 21, 21, 19, 19, 17, 17, 16, 15, 14, 14, 14, 13, 13, - 12, 12, 33, 32, 32, 32, 32, 31, 31, 31, 30, 30, 29, 29, 27, 26, 25, 24, - 23, 21, 21, 19, 19, 17, 17, 16, 15, 14, 14, 14, 13, 13, 12, 12, 33, 32, - 32, 32, 31, 31, 31, 30, 29, 29, 28, 28, 26, 26, 24, 23, 23, 21, 20, 19, - 19, 17, 17, 16, 15, 14, 14, 14, 13, 13, 13, 12, 32, 32, 32, 32, 31, 31, - 30, 29, 28, 28, 28, 27, 26, 26, 24, 23, 23, 21, 21, 19, 19, 18, 17, 16, - 16, 15, 14, 14, 14, 13, 13, 12, 32, 32, 32, 32, 31, 30, 29, 29, 28, 28, - 27, 27, 26, 25, 24, 23, 22, 21, 21, 19, 19, 18, 17, 16, 16, 15, 14, 14, - 14, 13, 13, 13, 31, 31, 31, 31, 30, 29, 28, 28, 27, 26, 24, 24, 23, 23, - 22, 21, 20, 20, 19, 18, 18, 17, 16, 15, 15, 14, 14, 14, 13, 13, 13, 13, - 30, 30, 30, 31, 30, 29, 28, 28, 26, 26, 24, 24, 23, 22, 22, 21, 20, 19, - 19, 18, 18, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 28, 29, 29, 30, - 29, 28, 28, 27, 24, 24, 21, 21, 20, 20, 19, 19, 18, 17, 17, 16, 16, 15, - 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 28, 29, 29, 30, 29, 28, 27, 27, - 24, 24, 21, 21, 20, 20, 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, - 13, 13, 12, 12, 12, 11, 26, 27, 27, 28, 27, 26, 26, 26, 23, 23, 20, 20, - 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11, - 11, 11, 25, 26, 26, 27, 26, 26, 26, 25, 23, 22, 20, 20, 19, 18, 17, 17, - 16, 16, 15, 15, 15, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 23, 24, - 24, 25, 25, 24, 24, 24, 22, 22, 19, 19, 18, 17, 16, 16, 15, 15, 14, 14, - 14, 13, 13, 12, 12, 11, 11, 11, 11, 11, 11, 11, 22, 23, 23, 24, 24, 23, - 23, 23, 21, 21, 19, 19, 17, 17, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, - 12, 11, 11, 11, 10, 10, 10, 10, 21, 22, 22, 23, 23, 23, 23, 22, 20, 20, - 18, 18, 17, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 11, 11, 11, 10, 10, - 10, 10, 10, 10, 20, 20, 21, 21, 21, 21, 21, 21, 20, 19, 17, 17, 16, 16, - 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 9, - 19, 20, 20, 21, 21, 20, 21, 21, 19, 19, 17, 17, 16, 15, 14, 14, 13, 13, - 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 18, 18, 19, 19, 19, - 19, 19, 19, 18, 18, 16, 16, 15, 15, 14, 13, 13, 12, 12, 11, 11, 11, 10, - 10, 10, 9, 9, 9, 9, 9, 9, 9, 17, 18, 18, 19, 19, 19, 19, 19, 18, 18, 16, - 16, 15, 15, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9, - 9, 9, 16, 17, 17, 17, 17, 17, 18, 18, 17, 16, 15, 15, 14, 14, 13, 12, - 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 16, 16, 16, 17, - 17, 17, 17, 17, 16, 16, 15, 15, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, - 10, 9, 9, 9, 8, 8, 8, 8, 8, 8, 14, 15, 15, 16, 16, 16, 16, 16, 15, 15, - 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, - 8, 8, 14, 14, 15, 15, 15, 15, 16, 16, 15, 15, 14, 14, 13, 12, 12, 12, - 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 8, 8, 7, 8, 13, 13, 14, 14, 14, - 14, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 9, 9, 9, 9, 8, - 8, 8, 8, 7, 7, 7, 7, 7, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 13, 13, - 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 12, - 13, 13, 14, 14, 14, 14, 14, 14, 13, 13, 13, 12, 12, 11, 11, 10, 10, 10, - 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 12, 12, 13, 13, 13, 13, 14, 14, - 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, - 7, 7, 7, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 12, 12, 11, 11, 11, 10, - 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 11, 12, 12, 12, 12, - 13, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 7, - 7, 7, 7, 7, 7, 6, 6, 11, 12, 12, 12, 12, 12, 12, 13, 13, 12, 12, 11, 11, - 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, - /* Size 4x8 */ - 32, 29, 20, 13, 32, 28, 20, 14, 30, 24, 19, 14, 27, 20, 15, 12, 21, 17, - 13, 10, 17, 15, 11, 9, 14, 13, 10, 8, 13, 12, 9, 7, - /* Size 8x4 */ - 32, 32, 30, 27, 21, 17, 14, 13, 29, 28, 24, 20, 17, 15, 13, 12, 20, 20, - 19, 15, 13, 11, 10, 9, 13, 14, 14, 12, 10, 9, 8, 7, - /* Size 8x16 */ - 32, 33, 31, 26, 20, 16, 13, 12, 33, 32, 31, 26, 21, 17, 14, 12, 33, 32, - 30, 27, 22, 17, 14, 13, 32, 31, 28, 26, 21, 18, 15, 13, 31, 30, 27, 23, - 20, 17, 14, 13, 28, 29, 24, 20, 18, 15, 13, 12, 26, 27, 23, 19, 16, 14, - 12, 12, 23, 25, 22, 17, 15, 13, 11, 11, 21, 23, 20, 17, 14, 12, 11, 10, - 19, 21, 19, 16, 13, 11, 10, 9, 18, 19, 18, 15, 12, 10, 9, 9, 16, 17, 16, - 14, 11, 10, 9, 8, 14, 15, 15, 13, 11, 9, 8, 8, 13, 14, 14, 12, 10, 9, 8, - 7, 12, 13, 13, 11, 10, 8, 7, 7, 11, 12, 13, 11, 10, 9, 7, 7, - /* Size 16x8 */ - 32, 33, 33, 32, 31, 28, 26, 23, 21, 19, 18, 16, 14, 13, 12, 11, 33, 32, - 32, 31, 30, 29, 27, 25, 23, 21, 19, 17, 15, 14, 13, 12, 31, 31, 30, 28, - 27, 24, 23, 22, 20, 19, 18, 16, 15, 14, 13, 13, 26, 26, 27, 26, 23, 20, - 19, 17, 17, 16, 15, 14, 13, 12, 11, 11, 20, 21, 22, 21, 20, 18, 16, 15, - 14, 13, 12, 11, 11, 10, 10, 10, 16, 17, 17, 18, 17, 15, 14, 13, 12, 11, - 10, 10, 9, 9, 8, 9, 13, 14, 14, 15, 14, 13, 12, 11, 11, 10, 9, 9, 8, 8, - 7, 7, 12, 12, 13, 13, 13, 12, 12, 11, 10, 9, 9, 8, 8, 7, 7, 7, - /* Size 16x32 */ - 32, 33, 33, 32, 31, 28, 26, 23, 20, 19, 16, 16, 13, 13, 12, 11, 33, 32, - 32, 32, 31, 29, 26, 24, 21, 20, 17, 16, 14, 13, 12, 12, 33, 32, 32, 32, - 31, 29, 26, 24, 21, 20, 17, 17, 14, 13, 12, 12, 33, 32, 32, 31, 31, 30, - 27, 25, 22, 21, 17, 17, 14, 14, 13, 13, 33, 32, 32, 31, 30, 29, 27, 25, - 22, 21, 17, 17, 14, 14, 13, 13, 32, 32, 31, 30, 29, 28, 26, 24, 21, 20, - 17, 17, 14, 14, 13, 13, 32, 32, 31, 29, 28, 28, 26, 24, 21, 21, 18, 17, - 15, 14, 13, 13, 32, 31, 31, 29, 28, 27, 25, 24, 21, 21, 18, 17, 15, 15, - 14, 13, 31, 31, 30, 28, 27, 25, 23, 22, 20, 19, 17, 16, 14, 14, 13, 13, - 30, 30, 30, 28, 26, 24, 23, 21, 19, 19, 16, 16, 14, 14, 13, 12, 28, 30, - 29, 27, 24, 21, 20, 19, 18, 17, 15, 15, 13, 13, 12, 12, 28, 29, 29, 27, - 24, 21, 20, 19, 17, 17, 15, 15, 13, 13, 12, 12, 26, 28, 27, 26, 23, 20, - 19, 18, 16, 16, 14, 14, 12, 12, 12, 12, 26, 27, 26, 25, 23, 20, 18, 17, - 16, 15, 14, 13, 12, 12, 11, 11, 23, 25, 25, 24, 22, 19, 17, 16, 15, 14, - 13, 13, 11, 11, 11, 11, 22, 24, 24, 23, 21, 19, 17, 16, 14, 14, 12, 12, - 11, 11, 11, 10, 21, 23, 23, 22, 20, 18, 17, 15, 14, 13, 12, 12, 11, 10, - 10, 10, 20, 21, 21, 21, 20, 17, 16, 15, 13, 13, 11, 11, 10, 10, 10, 10, - 19, 21, 21, 20, 19, 17, 16, 14, 13, 12, 11, 11, 10, 10, 9, 10, 18, 19, - 19, 19, 18, 16, 15, 14, 12, 12, 11, 10, 9, 9, 9, 9, 18, 19, 19, 19, 18, - 16, 15, 14, 12, 12, 10, 10, 9, 9, 9, 9, 16, 17, 17, 18, 17, 15, 14, 13, - 12, 11, 10, 10, 9, 9, 8, 8, 16, 17, 17, 17, 16, 15, 14, 13, 11, 11, 10, - 10, 9, 8, 8, 8, 14, 16, 16, 16, 15, 14, 13, 12, 11, 11, 9, 9, 8, 8, 8, - 8, 14, 15, 15, 16, 15, 14, 13, 12, 11, 10, 9, 9, 8, 8, 8, 8, 13, 14, 14, - 15, 14, 13, 12, 11, 10, 10, 9, 9, 8, 8, 7, 7, 13, 14, 14, 14, 14, 13, - 12, 11, 10, 10, 9, 8, 8, 7, 7, 7, 12, 14, 14, 14, 14, 13, 12, 11, 10, - 10, 8, 8, 8, 7, 7, 7, 12, 13, 13, 14, 13, 12, 11, 11, 10, 9, 8, 8, 7, 7, - 7, 7, 12, 13, 13, 13, 13, 12, 11, 10, 10, 9, 8, 8, 7, 7, 7, 7, 11, 12, - 12, 13, 13, 12, 11, 10, 10, 9, 9, 8, 7, 7, 7, 7, 11, 12, 12, 13, 13, 11, - 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 32, 32, 32, 31, 30, 28, 28, 26, 26, 23, 22, 21, 20, - 19, 18, 18, 16, 16, 14, 14, 13, 13, 12, 12, 12, 11, 11, 33, 32, 32, 32, - 32, 32, 32, 31, 31, 30, 30, 29, 28, 27, 25, 24, 23, 21, 21, 19, 19, 17, - 17, 16, 15, 14, 14, 14, 13, 13, 12, 12, 33, 32, 32, 32, 32, 31, 31, 31, - 30, 30, 29, 29, 27, 26, 25, 24, 23, 21, 21, 19, 19, 17, 17, 16, 15, 14, - 14, 14, 13, 13, 12, 12, 32, 32, 32, 31, 31, 30, 29, 29, 28, 28, 27, 27, - 26, 25, 24, 23, 22, 21, 20, 19, 19, 18, 17, 16, 16, 15, 14, 14, 14, 13, - 13, 13, 31, 31, 31, 31, 30, 29, 28, 28, 27, 26, 24, 24, 23, 23, 22, 21, - 20, 20, 19, 18, 18, 17, 16, 15, 15, 14, 14, 14, 13, 13, 13, 13, 28, 29, - 29, 30, 29, 28, 28, 27, 25, 24, 21, 21, 20, 20, 19, 19, 18, 17, 17, 16, - 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 26, 26, 26, 27, 27, 26, - 26, 25, 23, 23, 20, 20, 19, 18, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, - 13, 12, 12, 12, 11, 11, 11, 11, 23, 24, 24, 25, 25, 24, 24, 24, 22, 21, - 19, 19, 18, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 11, 11, 11, - 11, 10, 10, 10, 20, 21, 21, 22, 22, 21, 21, 21, 20, 19, 18, 17, 16, 16, - 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, - 19, 20, 20, 21, 21, 20, 21, 21, 19, 19, 17, 17, 16, 15, 14, 14, 13, 13, - 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 16, 17, 17, 17, 17, - 17, 18, 18, 17, 16, 15, 15, 14, 14, 13, 12, 12, 11, 11, 11, 10, 10, 10, - 9, 9, 9, 9, 8, 8, 8, 9, 9, 16, 16, 17, 17, 17, 17, 17, 17, 16, 16, 15, - 15, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 8, - 8, 13, 14, 14, 14, 14, 14, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, - 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 7, 7, 7, 8, 13, 13, 13, 14, 14, 14, - 14, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, - 7, 7, 7, 7, 7, 7, 12, 12, 12, 13, 13, 13, 13, 14, 13, 13, 12, 12, 12, - 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 11, 12, - 12, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 11, 11, 10, 10, 10, 10, 9, - 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, - /* Size 4x16 */ - 33, 28, 19, 13, 32, 29, 20, 13, 32, 29, 21, 14, 32, 28, 21, 14, 31, 25, - 19, 14, 30, 21, 17, 13, 28, 20, 16, 12, 25, 19, 14, 11, 23, 18, 13, 10, - 21, 17, 12, 10, 19, 16, 12, 9, 17, 15, 11, 8, 15, 14, 10, 8, 14, 13, 10, - 7, 13, 12, 9, 7, 12, 12, 9, 7, - /* Size 16x4 */ - 33, 32, 32, 32, 31, 30, 28, 25, 23, 21, 19, 17, 15, 14, 13, 12, 28, 29, - 29, 28, 25, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 12, 19, 20, 21, 21, - 19, 17, 16, 14, 13, 12, 12, 11, 10, 10, 9, 9, 13, 13, 14, 14, 14, 13, - 12, 11, 10, 10, 9, 8, 8, 7, 7, 7, - /* Size 8x32 */ - 32, 33, 31, 26, 20, 16, 13, 12, 33, 32, 31, 26, 21, 17, 14, 12, 33, 32, - 31, 26, 21, 17, 14, 12, 33, 32, 31, 27, 22, 17, 14, 13, 33, 32, 30, 27, - 22, 17, 14, 13, 32, 31, 29, 26, 21, 17, 14, 13, 32, 31, 28, 26, 21, 18, - 15, 13, 32, 31, 28, 25, 21, 18, 15, 14, 31, 30, 27, 23, 20, 17, 14, 13, - 30, 30, 26, 23, 19, 16, 14, 13, 28, 29, 24, 20, 18, 15, 13, 12, 28, 29, - 24, 20, 17, 15, 13, 12, 26, 27, 23, 19, 16, 14, 12, 12, 26, 26, 23, 18, - 16, 14, 12, 11, 23, 25, 22, 17, 15, 13, 11, 11, 22, 24, 21, 17, 14, 12, - 11, 11, 21, 23, 20, 17, 14, 12, 11, 10, 20, 21, 20, 16, 13, 11, 10, 10, - 19, 21, 19, 16, 13, 11, 10, 9, 18, 19, 18, 15, 12, 11, 9, 9, 18, 19, 18, - 15, 12, 10, 9, 9, 16, 17, 17, 14, 12, 10, 9, 8, 16, 17, 16, 14, 11, 10, - 9, 8, 14, 16, 15, 13, 11, 9, 8, 8, 14, 15, 15, 13, 11, 9, 8, 8, 13, 14, - 14, 12, 10, 9, 8, 7, 13, 14, 14, 12, 10, 9, 8, 7, 12, 14, 14, 12, 10, 8, - 8, 7, 12, 13, 13, 11, 10, 8, 7, 7, 12, 13, 13, 11, 10, 8, 7, 7, 11, 12, - 13, 11, 10, 9, 7, 7, 11, 12, 13, 11, 10, 9, 8, 7, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 32, 32, 32, 31, 30, 28, 28, 26, 26, 23, 22, 21, 20, - 19, 18, 18, 16, 16, 14, 14, 13, 13, 12, 12, 12, 11, 11, 33, 32, 32, 32, - 32, 31, 31, 31, 30, 30, 29, 29, 27, 26, 25, 24, 23, 21, 21, 19, 19, 17, - 17, 16, 15, 14, 14, 14, 13, 13, 12, 12, 31, 31, 31, 31, 30, 29, 28, 28, - 27, 26, 24, 24, 23, 23, 22, 21, 20, 20, 19, 18, 18, 17, 16, 15, 15, 14, - 14, 14, 13, 13, 13, 13, 26, 26, 26, 27, 27, 26, 26, 25, 23, 23, 20, 20, - 19, 18, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, - 11, 11, 20, 21, 21, 22, 22, 21, 21, 21, 20, 19, 18, 17, 16, 16, 15, 14, - 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 16, 17, - 17, 17, 17, 17, 18, 18, 17, 16, 15, 15, 14, 14, 13, 12, 12, 11, 11, 11, - 10, 10, 10, 9, 9, 9, 9, 8, 8, 8, 9, 9, 13, 14, 14, 14, 14, 14, 15, 15, - 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, - 7, 7, 7, 8, 12, 12, 12, 13, 13, 13, 13, 14, 13, 13, 12, 12, 12, 11, 11, - 11, 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7 }, - { /* Chroma */ - /* Size 4x4 */ - 32, 22, 21, 18, 22, 19, 19, 17, 21, 19, 15, 13, 18, 17, 13, 11, - /* Size 8x8 */ - 33, 30, 24, 22, 21, 19, 17, 16, 30, 26, 23, 22, 22, 20, 18, 17, 24, 23, - 21, 21, 20, 19, 18, 17, 22, 22, 21, 19, 18, 17, 16, 16, 21, 22, 20, 18, - 16, 15, 14, 14, 19, 20, 19, 17, 15, 13, 12, 12, 17, 18, 18, 16, 14, 12, - 12, 11, 16, 17, 17, 16, 14, 12, 11, 11, - /* Size 16x16 */ - 32, 33, 33, 29, 26, 21, 21, 21, 20, 20, 19, 18, 17, 16, 16, 15, 33, 33, - 32, 28, 25, 22, 22, 22, 21, 21, 20, 19, 18, 17, 17, 16, 33, 32, 30, 26, - 24, 22, 22, 23, 22, 22, 21, 20, 19, 18, 17, 17, 29, 28, 26, 23, 22, 22, - 22, 23, 22, 22, 21, 20, 19, 18, 18, 17, 26, 25, 24, 22, 21, 20, 21, 21, - 21, 21, 20, 19, 19, 18, 17, 17, 21, 22, 22, 22, 20, 19, 19, 19, 19, 19, - 19, 18, 17, 17, 17, 17, 21, 22, 22, 22, 21, 19, 19, 19, 18, 18, 18, 17, - 17, 16, 16, 16, 21, 22, 23, 23, 21, 19, 19, 18, 17, 17, 17, 16, 16, 15, - 15, 15, 20, 21, 22, 22, 21, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, - 20, 21, 22, 22, 21, 19, 18, 17, 16, 16, 15, 14, 14, 14, 13, 13, 19, 20, - 21, 21, 20, 19, 18, 17, 16, 15, 14, 14, 13, 13, 13, 13, 18, 19, 20, 20, - 19, 18, 17, 16, 15, 14, 14, 13, 13, 12, 12, 12, 17, 18, 19, 19, 19, 17, - 17, 16, 15, 14, 13, 13, 12, 12, 12, 12, 16, 17, 18, 18, 18, 17, 16, 15, - 14, 14, 13, 12, 12, 11, 11, 11, 16, 17, 17, 18, 17, 17, 16, 15, 14, 13, - 13, 12, 12, 11, 11, 11, 15, 16, 17, 17, 17, 17, 16, 15, 14, 13, 13, 12, - 12, 11, 11, 10, - /* Size 32x32 */ - 32, 33, 33, 34, 33, 31, 29, 28, 26, 25, 21, 21, 21, 21, 21, 20, 20, 20, - 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 33, 33, 33, 33, - 32, 30, 28, 27, 25, 24, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, - 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 33, 33, 33, 33, 32, 29, 28, 26, - 25, 24, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 19, 19, 18, 18, 17, - 17, 17, 17, 16, 16, 16, 34, 33, 33, 32, 31, 29, 27, 26, 24, 24, 22, 22, - 23, 23, 23, 23, 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, - 17, 17, 33, 32, 32, 31, 30, 28, 26, 25, 24, 24, 22, 22, 22, 23, 23, 22, - 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 31, 30, - 29, 29, 28, 26, 25, 24, 23, 23, 22, 22, 22, 22, 23, 22, 22, 22, 22, 21, - 21, 20, 20, 19, 19, 18, 18, 18, 18, 17, 17, 17, 29, 28, 28, 27, 26, 25, - 23, 22, 22, 22, 22, 22, 22, 22, 23, 22, 22, 22, 22, 21, 21, 20, 20, 20, - 19, 19, 18, 18, 18, 18, 17, 17, 28, 27, 26, 26, 25, 24, 22, 22, 22, 22, - 21, 22, 22, 22, 23, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, 19, - 18, 18, 18, 18, 26, 25, 25, 24, 24, 23, 22, 22, 21, 21, 20, 21, 21, 21, - 21, 21, 21, 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, - 25, 24, 24, 24, 24, 23, 22, 22, 21, 21, 20, 20, 21, 21, 21, 21, 20, 20, - 20, 20, 20, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 21, 22, 22, 22, - 22, 22, 22, 21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, - 18, 18, 17, 17, 17, 17, 17, 17, 17, 17, 21, 22, 22, 22, 22, 22, 22, 22, - 21, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, - 17, 17, 16, 16, 16, 16, 21, 22, 22, 23, 22, 22, 22, 22, 21, 21, 19, 19, - 19, 19, 19, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, - 16, 16, 21, 22, 22, 23, 23, 22, 22, 22, 21, 21, 19, 19, 19, 19, 18, 18, - 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 15, 15, 21, 22, - 22, 23, 23, 23, 23, 23, 21, 21, 19, 19, 19, 18, 18, 17, 17, 17, 17, 17, - 17, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 20, 22, 22, 23, 22, 22, - 22, 22, 21, 21, 19, 19, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15, - 15, 15, 15, 15, 15, 15, 14, 14, 20, 21, 21, 22, 22, 22, 22, 22, 21, 20, - 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, - 14, 14, 14, 14, 20, 21, 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 18, 18, - 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 20, 20, 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 18, 18, 17, 17, 16, 16, - 16, 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 14, 19, 20, 20, 21, - 21, 21, 21, 21, 20, 20, 19, 19, 18, 17, 17, 16, 16, 15, 15, 15, 15, 14, - 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 19, 20, 20, 21, 21, 21, 21, 21, - 20, 20, 19, 18, 18, 17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 14, 13, 13, - 13, 13, 13, 13, 13, 13, 18, 19, 19, 20, 20, 20, 20, 20, 20, 19, 18, 18, - 17, 17, 16, 16, 15, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 12, 12, 12, - 12, 12, 18, 19, 19, 20, 20, 20, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, - 15, 15, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 17, 18, - 18, 19, 19, 19, 20, 20, 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, - 14, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 17, 18, 18, 19, 19, 19, - 19, 19, 19, 18, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 13, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 16, 17, 17, 18, 18, 18, 19, 19, 18, 18, - 17, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 11, - 11, 11, 11, 11, 16, 17, 17, 18, 18, 18, 18, 19, 18, 18, 17, 17, 16, 16, - 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, - 16, 17, 17, 18, 18, 18, 18, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, - 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 16, 16, 17, 17, - 17, 18, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, - 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 15, 16, 16, 17, 17, 17, 18, 18, - 17, 17, 17, 16, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, - 11, 11, 11, 11, 11, 11, 15, 16, 16, 17, 17, 17, 17, 18, 17, 17, 17, 16, - 16, 15, 15, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, - 10, 11, 15, 16, 16, 17, 17, 17, 17, 18, 17, 17, 17, 16, 16, 15, 15, 14, - 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 10, - /* Size 4x8 */ - 33, 22, 20, 17, 28, 22, 22, 18, 24, 20, 20, 18, 23, 19, 18, 16, 22, 19, - 16, 14, 20, 18, 15, 12, 18, 17, 14, 11, 17, 16, 13, 11, - /* Size 8x4 */ - 33, 28, 24, 23, 22, 20, 18, 17, 22, 22, 20, 19, 19, 18, 17, 16, 20, 22, - 20, 18, 16, 15, 14, 13, 17, 18, 18, 16, 14, 12, 11, 11, - /* Size 8x16 */ - 32, 32, 26, 21, 20, 18, 16, 15, 33, 31, 25, 22, 21, 19, 17, 16, 33, 29, - 24, 22, 22, 20, 18, 17, 29, 26, 22, 22, 22, 20, 19, 18, 25, 24, 21, 21, - 21, 20, 18, 17, 21, 22, 20, 19, 19, 18, 17, 17, 21, 22, 21, 19, 18, 17, - 16, 16, 21, 23, 21, 18, 17, 16, 15, 15, 20, 22, 21, 18, 16, 15, 14, 14, - 20, 21, 20, 18, 16, 14, 14, 13, 19, 20, 20, 17, 15, 14, 13, 13, 18, 20, - 19, 17, 15, 13, 12, 12, 17, 19, 18, 16, 14, 13, 12, 12, 16, 18, 18, 16, - 14, 12, 12, 11, 16, 17, 17, 16, 14, 12, 11, 11, 15, 17, 17, 16, 14, 13, - 12, 11, - /* Size 16x8 */ - 32, 33, 33, 29, 25, 21, 21, 21, 20, 20, 19, 18, 17, 16, 16, 15, 32, 31, - 29, 26, 24, 22, 22, 23, 22, 21, 20, 20, 19, 18, 17, 17, 26, 25, 24, 22, - 21, 20, 21, 21, 21, 20, 20, 19, 18, 18, 17, 17, 21, 22, 22, 22, 21, 19, - 19, 18, 18, 18, 17, 17, 16, 16, 16, 16, 20, 21, 22, 22, 21, 19, 18, 17, - 16, 16, 15, 15, 14, 14, 14, 14, 18, 19, 20, 20, 20, 18, 17, 16, 15, 14, - 14, 13, 13, 12, 12, 13, 16, 17, 18, 19, 18, 17, 16, 15, 14, 14, 13, 12, - 12, 12, 11, 12, 15, 16, 17, 18, 17, 17, 16, 15, 14, 13, 13, 12, 12, 11, - 11, 11, - /* Size 16x32 */ - 32, 33, 32, 28, 26, 21, 21, 21, 20, 20, 18, 18, 16, 16, 15, 15, 33, 33, - 31, 27, 25, 22, 22, 22, 21, 20, 19, 19, 17, 17, 16, 16, 33, 33, 31, 27, - 25, 22, 22, 22, 21, 21, 19, 19, 17, 17, 16, 16, 34, 32, 31, 26, 24, 22, - 23, 23, 22, 21, 20, 20, 18, 18, 17, 17, 33, 31, 29, 25, 24, 22, 22, 23, - 22, 21, 20, 20, 18, 18, 17, 17, 31, 28, 28, 24, 23, 22, 22, 22, 22, 22, - 20, 20, 18, 18, 17, 17, 29, 27, 26, 23, 22, 22, 22, 23, 22, 22, 20, 20, - 19, 18, 18, 17, 28, 26, 25, 22, 22, 22, 22, 23, 22, 22, 20, 20, 19, 19, - 18, 18, 25, 24, 24, 22, 21, 21, 21, 21, 21, 20, 20, 19, 18, 18, 17, 18, - 24, 24, 24, 22, 21, 20, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 21, 22, - 22, 21, 20, 19, 19, 19, 19, 19, 18, 18, 17, 17, 17, 17, 21, 22, 22, 21, - 20, 19, 19, 19, 19, 19, 18, 18, 17, 17, 16, 16, 21, 22, 22, 22, 21, 19, - 19, 18, 18, 18, 17, 17, 16, 16, 16, 16, 21, 23, 22, 22, 21, 19, 19, 18, - 18, 18, 17, 17, 16, 16, 16, 15, 21, 23, 23, 22, 21, 19, 18, 18, 17, 17, - 16, 16, 15, 15, 15, 15, 21, 22, 22, 22, 21, 19, 18, 17, 17, 17, 16, 16, - 15, 15, 15, 15, 20, 22, 22, 22, 21, 19, 18, 17, 16, 16, 15, 15, 14, 14, - 14, 14, 20, 22, 22, 22, 21, 19, 18, 17, 16, 16, 15, 15, 14, 14, 14, 14, - 20, 21, 21, 22, 20, 19, 18, 17, 16, 16, 14, 14, 14, 14, 13, 14, 19, 20, - 21, 21, 20, 19, 17, 17, 15, 15, 14, 14, 13, 13, 13, 13, 19, 20, 20, 21, - 20, 19, 17, 17, 15, 15, 14, 14, 13, 13, 13, 13, 18, 20, 20, 20, 20, 18, - 17, 16, 15, 15, 13, 13, 12, 12, 12, 12, 18, 20, 20, 20, 19, 18, 17, 16, - 15, 14, 13, 13, 12, 12, 12, 12, 17, 19, 19, 20, 19, 18, 17, 16, 14, 14, - 13, 13, 12, 12, 12, 12, 17, 18, 19, 19, 18, 17, 16, 16, 14, 14, 13, 13, - 12, 12, 12, 12, 16, 18, 18, 19, 18, 17, 16, 15, 14, 14, 12, 12, 12, 11, - 11, 11, 16, 18, 18, 19, 18, 17, 16, 15, 14, 14, 12, 12, 12, 11, 11, 11, - 16, 17, 18, 18, 18, 17, 16, 15, 14, 14, 12, 12, 11, 11, 11, 11, 16, 17, - 17, 18, 17, 17, 16, 15, 14, 13, 12, 12, 11, 11, 11, 11, 15, 17, 17, 18, - 17, 16, 16, 15, 14, 13, 12, 12, 11, 11, 11, 11, 15, 17, 17, 18, 17, 16, - 16, 14, 14, 13, 13, 12, 12, 11, 11, 11, 15, 17, 17, 17, 17, 16, 16, 14, - 14, 13, 13, 12, 12, 11, 11, 10, - /* Size 32x16 */ - 32, 33, 33, 34, 33, 31, 29, 28, 25, 24, 21, 21, 21, 21, 21, 21, 20, 20, - 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 33, 33, 33, 32, - 31, 28, 27, 26, 24, 24, 22, 22, 22, 23, 23, 22, 22, 22, 21, 20, 20, 20, - 20, 19, 18, 18, 18, 17, 17, 17, 17, 17, 32, 31, 31, 31, 29, 28, 26, 25, - 24, 24, 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 18, - 18, 18, 17, 17, 17, 17, 28, 27, 27, 26, 25, 24, 23, 22, 22, 22, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, - 18, 17, 26, 25, 25, 24, 24, 23, 22, 22, 21, 21, 20, 20, 21, 21, 21, 21, - 21, 21, 20, 20, 20, 20, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 21, 22, - 22, 22, 22, 22, 22, 22, 21, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 21, 22, 22, 23, 22, 22, - 22, 22, 21, 21, 19, 19, 19, 19, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, - 16, 16, 16, 16, 16, 16, 16, 16, 21, 22, 22, 23, 23, 22, 23, 23, 21, 21, - 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, - 15, 15, 14, 14, 20, 21, 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 18, 18, - 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 20, 20, 21, 21, 21, 22, 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, - 16, 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 18, 19, 19, 20, - 20, 20, 20, 20, 20, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 13, - 13, 13, 13, 12, 12, 12, 12, 12, 13, 13, 18, 19, 19, 20, 20, 20, 20, 20, - 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 13, 13, 12, - 12, 12, 12, 12, 12, 12, 16, 17, 17, 18, 18, 18, 19, 19, 18, 18, 17, 17, - 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 11, 11, 11, - 12, 12, 16, 17, 17, 18, 18, 18, 18, 19, 18, 18, 17, 17, 16, 16, 15, 15, - 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 15, 16, - 16, 17, 17, 17, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 14, 14, 13, 13, - 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 15, 16, 16, 17, 17, 17, - 17, 18, 18, 17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, - 12, 11, 11, 11, 11, 11, 11, 10, - /* Size 4x16 */ - 33, 21, 20, 16, 33, 22, 21, 17, 31, 22, 21, 18, 27, 22, 22, 18, 24, 21, - 20, 18, 22, 19, 19, 17, 22, 19, 18, 16, 23, 19, 17, 15, 22, 19, 16, 14, - 21, 19, 16, 14, 20, 19, 15, 13, 20, 18, 14, 12, 18, 17, 14, 12, 18, 17, - 14, 11, 17, 17, 13, 11, 17, 16, 13, 11, - /* Size 16x4 */ - 33, 33, 31, 27, 24, 22, 22, 23, 22, 21, 20, 20, 18, 18, 17, 17, 21, 22, - 22, 22, 21, 19, 19, 19, 19, 19, 19, 18, 17, 17, 17, 16, 20, 21, 21, 22, - 20, 19, 18, 17, 16, 16, 15, 14, 14, 14, 13, 13, 16, 17, 18, 18, 18, 17, - 16, 15, 14, 14, 13, 12, 12, 11, 11, 11, - /* Size 8x32 */ - 32, 32, 26, 21, 20, 18, 16, 15, 33, 31, 25, 22, 21, 19, 17, 16, 33, 31, - 25, 22, 21, 19, 17, 16, 34, 31, 24, 23, 22, 20, 18, 17, 33, 29, 24, 22, - 22, 20, 18, 17, 31, 28, 23, 22, 22, 20, 18, 17, 29, 26, 22, 22, 22, 20, - 19, 18, 28, 25, 22, 22, 22, 20, 19, 18, 25, 24, 21, 21, 21, 20, 18, 17, - 24, 24, 21, 21, 20, 19, 18, 17, 21, 22, 20, 19, 19, 18, 17, 17, 21, 22, - 20, 19, 19, 18, 17, 16, 21, 22, 21, 19, 18, 17, 16, 16, 21, 22, 21, 19, - 18, 17, 16, 16, 21, 23, 21, 18, 17, 16, 15, 15, 21, 22, 21, 18, 17, 16, - 15, 15, 20, 22, 21, 18, 16, 15, 14, 14, 20, 22, 21, 18, 16, 15, 14, 14, - 20, 21, 20, 18, 16, 14, 14, 13, 19, 21, 20, 17, 15, 14, 13, 13, 19, 20, - 20, 17, 15, 14, 13, 13, 18, 20, 20, 17, 15, 13, 12, 12, 18, 20, 19, 17, - 15, 13, 12, 12, 17, 19, 19, 17, 14, 13, 12, 12, 17, 19, 18, 16, 14, 13, - 12, 12, 16, 18, 18, 16, 14, 12, 12, 11, 16, 18, 18, 16, 14, 12, 12, 11, - 16, 18, 18, 16, 14, 12, 11, 11, 16, 17, 17, 16, 14, 12, 11, 11, 15, 17, - 17, 16, 14, 12, 11, 11, 15, 17, 17, 16, 14, 13, 12, 11, 15, 17, 17, 16, - 14, 13, 12, 11, - /* Size 32x8 */ - 32, 33, 33, 34, 33, 31, 29, 28, 25, 24, 21, 21, 21, 21, 21, 21, 20, 20, - 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 32, 31, 31, 31, - 29, 28, 26, 25, 24, 24, 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, - 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 26, 25, 25, 24, 24, 23, 22, 22, - 21, 21, 20, 20, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 19, 19, 18, 18, - 18, 18, 17, 17, 17, 17, 21, 22, 22, 23, 22, 22, 22, 22, 21, 21, 19, 19, - 19, 19, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, - 16, 16, 20, 21, 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 18, 18, 17, 17, - 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 18, 19, - 19, 20, 20, 20, 20, 20, 20, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, - 14, 13, 13, 13, 13, 12, 12, 12, 12, 12, 13, 13, 16, 17, 17, 18, 18, 18, - 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, - 12, 12, 12, 11, 11, 11, 12, 12, 15, 16, 16, 17, 17, 17, 18, 18, 17, 17, - 17, 16, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, - 11, 11, 11, 11 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 30, 21, 14, 30, 21, 17, 13, 21, 17, 12, 10, 14, 13, 10, 8, - /* Size 8x8 */ - 32, 32, 30, 27, 22, 18, 15, 13, 32, 31, 29, 26, 23, 19, 16, 14, 30, 29, - 26, 23, 20, 18, 15, 13, 27, 26, 23, 19, 17, 15, 13, 12, 22, 23, 20, 17, - 14, 13, 11, 10, 18, 19, 18, 15, 13, 11, 10, 9, 15, 16, 15, 13, 11, 10, - 9, 8, 13, 14, 13, 12, 10, 9, 8, 7, - /* Size 16x16 */ - 32, 33, 33, 33, 32, 30, 28, 26, 23, 21, 19, 17, 16, 14, 13, 12, 33, 32, - 32, 32, 32, 30, 29, 27, 24, 22, 20, 18, 17, 15, 13, 13, 33, 32, 32, 32, - 32, 31, 30, 28, 25, 23, 21, 19, 17, 16, 14, 14, 33, 32, 32, 31, 30, 29, - 28, 26, 24, 23, 20, 19, 17, 16, 14, 14, 32, 32, 32, 30, 29, 28, 27, 26, - 24, 22, 21, 19, 18, 16, 15, 14, 30, 30, 31, 29, 28, 26, 24, 23, 22, 20, - 19, 18, 16, 15, 14, 13, 28, 29, 30, 28, 27, 24, 21, 20, 19, 18, 17, 16, - 15, 14, 13, 13, 26, 27, 28, 26, 26, 23, 20, 19, 18, 17, 16, 15, 14, 13, - 12, 12, 23, 24, 25, 24, 24, 22, 19, 18, 16, 15, 14, 14, 13, 12, 11, 11, - 21, 22, 23, 23, 22, 20, 18, 17, 15, 14, 13, 13, 12, 11, 11, 10, 19, 20, - 21, 20, 21, 19, 17, 16, 14, 13, 12, 12, 11, 11, 10, 10, 17, 18, 19, 19, - 19, 18, 16, 15, 14, 13, 12, 11, 10, 10, 9, 9, 16, 17, 17, 17, 18, 16, - 15, 14, 13, 12, 11, 10, 10, 9, 9, 8, 14, 15, 16, 16, 16, 15, 14, 13, 12, - 11, 11, 10, 9, 9, 8, 8, 13, 13, 14, 14, 15, 14, 13, 12, 11, 11, 10, 9, - 9, 8, 8, 7, 12, 13, 14, 14, 14, 13, 13, 12, 11, 10, 10, 9, 8, 8, 7, 7, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 33, 33, 32, 32, 30, 30, 28, 28, 26, 26, 23, 23, 21, - 21, 19, 19, 17, 17, 16, 16, 14, 14, 13, 13, 12, 12, 12, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 30, 30, 29, 29, 27, 27, 24, 24, 22, 22, 20, 20, 18, - 18, 17, 17, 15, 15, 13, 13, 13, 13, 12, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 30, 30, 29, 29, 27, 27, 24, 24, 22, 22, 20, 20, 18, 18, 17, 17, 15, - 15, 13, 13, 13, 13, 12, 33, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, - 30, 28, 28, 25, 25, 23, 23, 21, 21, 19, 19, 17, 17, 16, 16, 14, 14, 14, - 14, 13, 33, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 28, 28, 25, - 25, 23, 23, 21, 21, 19, 19, 17, 17, 16, 16, 14, 14, 14, 14, 13, 33, 32, - 32, 32, 32, 31, 31, 30, 30, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 20, - 20, 19, 19, 17, 17, 16, 16, 14, 14, 14, 14, 13, 33, 32, 32, 32, 32, 31, - 31, 30, 30, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 20, 20, 19, 19, 17, - 17, 16, 16, 14, 14, 14, 14, 13, 32, 32, 32, 32, 32, 30, 30, 29, 29, 28, - 28, 27, 27, 26, 26, 24, 24, 22, 22, 21, 21, 19, 19, 18, 18, 16, 16, 15, - 15, 14, 14, 14, 32, 32, 32, 32, 32, 30, 30, 29, 29, 28, 28, 27, 27, 26, - 26, 24, 24, 22, 22, 21, 21, 19, 19, 18, 18, 16, 16, 15, 15, 14, 14, 14, - 30, 30, 30, 31, 31, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 22, 22, 20, - 20, 19, 19, 18, 18, 16, 16, 15, 15, 14, 14, 13, 13, 13, 30, 30, 30, 31, - 31, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 22, 22, 20, 20, 19, 19, 18, - 18, 16, 16, 15, 15, 14, 14, 13, 13, 13, 28, 29, 29, 30, 30, 28, 28, 27, - 27, 24, 24, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, - 14, 13, 13, 13, 13, 12, 28, 29, 29, 30, 30, 28, 28, 27, 27, 24, 24, 21, - 21, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, - 13, 12, 26, 27, 27, 28, 28, 26, 26, 26, 26, 23, 23, 20, 20, 19, 19, 18, - 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11, 26, 27, - 27, 28, 28, 26, 26, 26, 26, 23, 23, 20, 20, 19, 19, 18, 18, 17, 17, 16, - 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11, 23, 24, 24, 25, 25, 24, - 24, 24, 24, 22, 22, 19, 19, 18, 18, 16, 16, 15, 15, 14, 14, 14, 14, 13, - 13, 12, 12, 11, 11, 11, 11, 11, 23, 24, 24, 25, 25, 24, 24, 24, 24, 22, - 22, 19, 19, 18, 18, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 11, - 11, 11, 11, 11, 21, 22, 22, 23, 23, 23, 23, 22, 22, 20, 20, 18, 18, 17, - 17, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, 10, - 21, 22, 22, 23, 23, 23, 23, 22, 22, 20, 20, 18, 18, 17, 17, 15, 15, 14, - 14, 13, 13, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, 10, 19, 20, 20, 21, - 21, 20, 20, 21, 21, 19, 19, 17, 17, 16, 16, 14, 14, 13, 13, 12, 12, 12, - 12, 11, 11, 11, 11, 10, 10, 10, 10, 9, 19, 20, 20, 21, 21, 20, 20, 21, - 21, 19, 19, 17, 17, 16, 16, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, - 11, 10, 10, 10, 10, 9, 17, 18, 18, 19, 19, 19, 19, 19, 19, 18, 18, 16, - 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, - 9, 17, 18, 18, 19, 19, 19, 19, 19, 19, 18, 18, 16, 16, 15, 15, 14, 14, - 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9, 16, 17, 17, 17, - 17, 17, 17, 18, 18, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, - 10, 10, 10, 9, 9, 9, 9, 8, 8, 8, 16, 17, 17, 17, 17, 17, 17, 18, 18, 16, - 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, - 8, 8, 8, 14, 15, 15, 16, 16, 16, 16, 16, 16, 15, 15, 14, 14, 13, 13, 12, - 12, 11, 11, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 14, 15, 15, 16, - 16, 16, 16, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 10, - 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 13, 13, 13, 14, 14, 14, 14, 15, 15, 14, - 14, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 7, - 7, 7, 13, 13, 13, 14, 14, 14, 14, 15, 15, 14, 14, 13, 13, 12, 12, 11, - 11, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 12, 13, 13, 14, 14, - 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 8, - 8, 8, 8, 7, 7, 7, 7, 7, 12, 13, 13, 14, 14, 14, 14, 14, 14, 13, 13, 13, - 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 12, - 12, 12, 13, 13, 13, 13, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, - 9, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, - /* Size 4x8 */ - 32, 29, 20, 14, 32, 28, 20, 14, 30, 24, 19, 14, 28, 20, 16, 12, 23, 18, - 13, 11, 19, 16, 12, 9, 16, 14, 11, 8, 14, 13, 10, 8, - /* Size 8x4 */ - 32, 32, 30, 28, 23, 19, 16, 14, 29, 28, 24, 20, 18, 16, 14, 13, 20, 20, - 19, 16, 13, 12, 11, 10, 14, 14, 14, 12, 11, 9, 8, 8, - /* Size 8x16 */ - 32, 33, 32, 28, 23, 19, 16, 13, 33, 32, 32, 29, 24, 20, 17, 14, 33, 32, - 31, 30, 25, 21, 17, 14, 32, 32, 30, 28, 24, 20, 17, 14, 32, 31, 29, 27, - 24, 21, 18, 15, 30, 30, 28, 24, 21, 19, 16, 14, 28, 30, 27, 21, 19, 17, - 15, 13, 26, 28, 26, 20, 18, 16, 14, 12, 23, 25, 24, 19, 16, 14, 13, 11, - 21, 23, 22, 18, 15, 13, 12, 11, 19, 21, 20, 17, 14, 12, 11, 10, 18, 19, - 19, 16, 14, 12, 10, 9, 16, 17, 18, 15, 13, 11, 10, 9, 14, 16, 16, 14, - 12, 11, 9, 8, 13, 14, 15, 13, 11, 10, 9, 8, 12, 14, 14, 13, 11, 10, 8, - 8, - /* Size 16x8 */ - 32, 33, 33, 32, 32, 30, 28, 26, 23, 21, 19, 18, 16, 14, 13, 12, 33, 32, - 32, 32, 31, 30, 30, 28, 25, 23, 21, 19, 17, 16, 14, 14, 32, 32, 31, 30, - 29, 28, 27, 26, 24, 22, 20, 19, 18, 16, 15, 14, 28, 29, 30, 28, 27, 24, - 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 23, 24, 25, 24, 24, 21, 19, 18, - 16, 15, 14, 14, 13, 12, 11, 11, 19, 20, 21, 20, 21, 19, 17, 16, 14, 13, - 12, 12, 11, 11, 10, 10, 16, 17, 17, 17, 18, 16, 15, 14, 13, 12, 11, 10, - 10, 9, 9, 8, 13, 14, 14, 14, 15, 14, 13, 12, 11, 11, 10, 9, 9, 8, 8, 8, - /* Size 16x32 */ - 32, 33, 33, 32, 32, 28, 28, 23, 23, 19, 19, 16, 16, 13, 13, 12, 33, 32, - 32, 32, 32, 29, 29, 24, 24, 20, 20, 17, 17, 14, 14, 12, 33, 32, 32, 32, - 32, 29, 29, 24, 24, 20, 20, 17, 17, 14, 14, 12, 33, 32, 32, 31, 31, 30, - 30, 25, 25, 21, 21, 17, 17, 14, 14, 13, 33, 32, 32, 31, 31, 30, 30, 25, - 25, 21, 21, 17, 17, 14, 14, 13, 32, 32, 32, 30, 30, 28, 28, 24, 24, 20, - 20, 17, 17, 14, 14, 13, 32, 32, 32, 30, 30, 28, 28, 24, 24, 20, 20, 17, - 17, 14, 14, 13, 32, 31, 31, 29, 29, 27, 27, 24, 24, 21, 21, 18, 18, 15, - 15, 14, 32, 31, 31, 29, 29, 27, 27, 24, 24, 21, 21, 18, 18, 15, 15, 14, - 30, 30, 30, 28, 28, 24, 24, 21, 21, 19, 19, 16, 16, 14, 14, 13, 30, 30, - 30, 28, 28, 24, 24, 21, 21, 19, 19, 16, 16, 14, 14, 13, 28, 30, 30, 27, - 27, 21, 21, 19, 19, 17, 17, 15, 15, 13, 13, 12, 28, 30, 30, 27, 27, 21, - 21, 19, 19, 17, 17, 15, 15, 13, 13, 12, 26, 28, 28, 26, 26, 20, 20, 18, - 18, 16, 16, 14, 14, 12, 12, 12, 26, 28, 28, 26, 26, 20, 20, 18, 18, 16, - 16, 14, 14, 12, 12, 12, 23, 25, 25, 24, 24, 19, 19, 16, 16, 14, 14, 13, - 13, 11, 11, 11, 23, 25, 25, 24, 24, 19, 19, 16, 16, 14, 14, 13, 13, 11, - 11, 11, 21, 23, 23, 22, 22, 18, 18, 15, 15, 13, 13, 12, 12, 11, 11, 10, - 21, 23, 23, 22, 22, 18, 18, 15, 15, 13, 13, 12, 12, 11, 11, 10, 19, 21, - 21, 20, 20, 17, 17, 14, 14, 12, 12, 11, 11, 10, 10, 9, 19, 21, 21, 20, - 20, 17, 17, 14, 14, 12, 12, 11, 11, 10, 10, 9, 18, 19, 19, 19, 19, 16, - 16, 14, 14, 12, 12, 10, 10, 9, 9, 9, 18, 19, 19, 19, 19, 16, 16, 14, 14, - 12, 12, 10, 10, 9, 9, 9, 16, 17, 17, 18, 18, 15, 15, 13, 13, 11, 11, 10, - 10, 9, 9, 8, 16, 17, 17, 18, 18, 15, 15, 13, 13, 11, 11, 10, 10, 9, 9, - 8, 14, 16, 16, 16, 16, 14, 14, 12, 12, 11, 11, 9, 9, 8, 8, 8, 14, 16, - 16, 16, 16, 14, 14, 12, 12, 11, 11, 9, 9, 8, 8, 8, 13, 14, 14, 15, 15, - 13, 13, 11, 11, 10, 10, 9, 9, 8, 8, 7, 13, 14, 14, 15, 15, 13, 13, 11, - 11, 10, 10, 9, 9, 8, 8, 7, 12, 14, 14, 14, 14, 13, 13, 11, 11, 10, 10, - 8, 8, 8, 8, 7, 12, 14, 14, 14, 14, 13, 13, 11, 11, 10, 10, 8, 8, 8, 8, - 7, 12, 13, 13, 13, 13, 12, 12, 11, 11, 9, 9, 8, 8, 7, 7, 7, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 32, 32, 32, 32, 30, 30, 28, 28, 26, 26, 23, 23, 21, - 21, 19, 19, 18, 18, 16, 16, 14, 14, 13, 13, 12, 12, 12, 33, 32, 32, 32, - 32, 32, 32, 31, 31, 30, 30, 30, 30, 28, 28, 25, 25, 23, 23, 21, 21, 19, - 19, 17, 17, 16, 16, 14, 14, 14, 14, 13, 33, 32, 32, 32, 32, 32, 32, 31, - 31, 30, 30, 30, 30, 28, 28, 25, 25, 23, 23, 21, 21, 19, 19, 17, 17, 16, - 16, 14, 14, 14, 14, 13, 32, 32, 32, 31, 31, 30, 30, 29, 29, 28, 28, 27, - 27, 26, 26, 24, 24, 22, 22, 20, 20, 19, 19, 18, 18, 16, 16, 15, 15, 14, - 14, 13, 32, 32, 32, 31, 31, 30, 30, 29, 29, 28, 28, 27, 27, 26, 26, 24, - 24, 22, 22, 20, 20, 19, 19, 18, 18, 16, 16, 15, 15, 14, 14, 13, 28, 29, - 29, 30, 30, 28, 28, 27, 27, 24, 24, 21, 21, 20, 20, 19, 19, 18, 18, 17, - 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 13, 12, 28, 29, 29, 30, 30, 28, - 28, 27, 27, 24, 24, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 15, - 15, 14, 14, 13, 13, 13, 13, 12, 23, 24, 24, 25, 25, 24, 24, 24, 24, 21, - 21, 19, 19, 18, 18, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 11, - 11, 11, 11, 11, 23, 24, 24, 25, 25, 24, 24, 24, 24, 21, 21, 19, 19, 18, - 18, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 11, - 19, 20, 20, 21, 21, 20, 20, 21, 21, 19, 19, 17, 17, 16, 16, 14, 14, 13, - 13, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 9, 19, 20, 20, 21, - 21, 20, 20, 21, 21, 19, 19, 17, 17, 16, 16, 14, 14, 13, 13, 12, 12, 12, - 12, 11, 11, 11, 11, 10, 10, 10, 10, 9, 16, 17, 17, 17, 17, 17, 17, 18, - 18, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, - 9, 9, 9, 8, 8, 8, 16, 17, 17, 17, 17, 17, 17, 18, 18, 16, 16, 15, 15, - 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 8, 8, 8, 13, - 14, 14, 14, 14, 14, 14, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, - 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 7, 13, 14, 14, 14, 14, 14, 14, 15, - 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, - 8, 8, 8, 7, 12, 12, 12, 13, 13, 13, 13, 14, 14, 13, 13, 12, 12, 12, 12, - 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, - /* Size 4x16 */ - 33, 28, 19, 13, 32, 29, 20, 14, 32, 30, 21, 14, 32, 28, 20, 14, 31, 27, - 21, 15, 30, 24, 19, 14, 30, 21, 17, 13, 28, 20, 16, 12, 25, 19, 14, 11, - 23, 18, 13, 11, 21, 17, 12, 10, 19, 16, 12, 9, 17, 15, 11, 9, 16, 14, - 11, 8, 14, 13, 10, 8, 14, 13, 10, 8, - /* Size 16x4 */ - 33, 32, 32, 32, 31, 30, 30, 28, 25, 23, 21, 19, 17, 16, 14, 14, 28, 29, - 30, 28, 27, 24, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 19, 20, 21, 20, - 21, 19, 17, 16, 14, 13, 12, 12, 11, 11, 10, 10, 13, 14, 14, 14, 15, 14, - 13, 12, 11, 11, 10, 9, 9, 8, 8, 8, - /* Size 8x32 */ - 32, 33, 32, 28, 23, 19, 16, 13, 33, 32, 32, 29, 24, 20, 17, 14, 33, 32, - 32, 29, 24, 20, 17, 14, 33, 32, 31, 30, 25, 21, 17, 14, 33, 32, 31, 30, - 25, 21, 17, 14, 32, 32, 30, 28, 24, 20, 17, 14, 32, 32, 30, 28, 24, 20, - 17, 14, 32, 31, 29, 27, 24, 21, 18, 15, 32, 31, 29, 27, 24, 21, 18, 15, - 30, 30, 28, 24, 21, 19, 16, 14, 30, 30, 28, 24, 21, 19, 16, 14, 28, 30, - 27, 21, 19, 17, 15, 13, 28, 30, 27, 21, 19, 17, 15, 13, 26, 28, 26, 20, - 18, 16, 14, 12, 26, 28, 26, 20, 18, 16, 14, 12, 23, 25, 24, 19, 16, 14, - 13, 11, 23, 25, 24, 19, 16, 14, 13, 11, 21, 23, 22, 18, 15, 13, 12, 11, - 21, 23, 22, 18, 15, 13, 12, 11, 19, 21, 20, 17, 14, 12, 11, 10, 19, 21, - 20, 17, 14, 12, 11, 10, 18, 19, 19, 16, 14, 12, 10, 9, 18, 19, 19, 16, - 14, 12, 10, 9, 16, 17, 18, 15, 13, 11, 10, 9, 16, 17, 18, 15, 13, 11, - 10, 9, 14, 16, 16, 14, 12, 11, 9, 8, 14, 16, 16, 14, 12, 11, 9, 8, 13, - 14, 15, 13, 11, 10, 9, 8, 13, 14, 15, 13, 11, 10, 9, 8, 12, 14, 14, 13, - 11, 10, 8, 8, 12, 14, 14, 13, 11, 10, 8, 8, 12, 13, 13, 12, 11, 9, 8, 7, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 32, 32, 32, 32, 30, 30, 28, 28, 26, 26, 23, 23, 21, - 21, 19, 19, 18, 18, 16, 16, 14, 14, 13, 13, 12, 12, 12, 33, 32, 32, 32, - 32, 32, 32, 31, 31, 30, 30, 30, 30, 28, 28, 25, 25, 23, 23, 21, 21, 19, - 19, 17, 17, 16, 16, 14, 14, 14, 14, 13, 32, 32, 32, 31, 31, 30, 30, 29, - 29, 28, 28, 27, 27, 26, 26, 24, 24, 22, 22, 20, 20, 19, 19, 18, 18, 16, - 16, 15, 15, 14, 14, 13, 28, 29, 29, 30, 30, 28, 28, 27, 27, 24, 24, 21, - 21, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, - 13, 12, 23, 24, 24, 25, 25, 24, 24, 24, 24, 21, 21, 19, 19, 18, 18, 16, - 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 11, 19, 20, - 20, 21, 21, 20, 20, 21, 21, 19, 19, 17, 17, 16, 16, 14, 14, 13, 13, 12, - 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 9, 16, 17, 17, 17, 17, 17, - 17, 18, 18, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, - 10, 9, 9, 9, 9, 8, 8, 8, 13, 14, 14, 14, 14, 14, 14, 15, 15, 14, 14, 13, - 13, 12, 12, 11, 11, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 7 }, - { /* Chroma */ - /* Size 4x4 */ - 32, 22, 22, 18, 22, 19, 19, 17, 22, 19, 16, 14, 18, 17, 14, 12, - /* Size 8x8 */ - 33, 30, 24, 22, 21, 20, 18, 17, 30, 26, 23, 22, 22, 21, 19, 18, 24, 23, - 21, 21, 20, 20, 19, 18, 22, 22, 21, 19, 18, 18, 17, 16, 21, 22, 20, 18, - 17, 16, 15, 14, 20, 21, 20, 18, 16, 14, 14, 13, 18, 19, 19, 17, 15, 14, - 12, 12, 17, 18, 18, 16, 14, 13, 12, 11, - /* Size 16x16 */ - 32, 33, 34, 31, 28, 25, 21, 21, 21, 20, 20, 19, 18, 17, 16, 16, 33, 33, - 33, 30, 27, 24, 22, 22, 22, 21, 20, 20, 19, 18, 17, 17, 34, 33, 32, 29, - 26, 24, 22, 23, 23, 22, 22, 21, 20, 19, 18, 18, 31, 30, 29, 26, 24, 23, - 22, 22, 23, 22, 22, 21, 20, 19, 18, 18, 28, 27, 26, 24, 22, 22, 21, 22, - 23, 22, 22, 21, 20, 20, 19, 19, 25, 24, 24, 23, 22, 21, 20, 21, 21, 20, - 20, 20, 19, 19, 18, 18, 21, 22, 22, 22, 21, 20, 19, 19, 19, 19, 19, 19, - 18, 18, 17, 17, 21, 22, 23, 22, 22, 21, 19, 19, 19, 18, 18, 18, 17, 17, - 16, 16, 21, 22, 23, 23, 23, 21, 19, 19, 18, 17, 17, 17, 16, 16, 15, 15, - 20, 21, 22, 22, 22, 20, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 20, 20, - 22, 22, 22, 20, 19, 18, 17, 16, 16, 15, 15, 14, 14, 14, 19, 20, 21, 21, - 21, 20, 19, 18, 17, 16, 15, 14, 14, 14, 13, 13, 18, 19, 20, 20, 20, 19, - 18, 17, 16, 15, 15, 14, 13, 13, 12, 12, 17, 18, 19, 19, 20, 19, 18, 17, - 16, 15, 14, 14, 13, 12, 12, 12, 16, 17, 18, 18, 19, 18, 17, 16, 15, 14, - 14, 13, 12, 12, 12, 11, 16, 17, 18, 18, 19, 18, 17, 16, 15, 14, 14, 13, - 12, 12, 11, 11, - /* Size 32x32 */ - 32, 33, 33, 34, 34, 31, 31, 28, 28, 25, 25, 21, 21, 21, 21, 21, 21, 20, - 20, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 33, 33, 33, 33, - 33, 30, 30, 27, 27, 24, 24, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, - 20, 19, 19, 18, 18, 17, 17, 17, 17, 16, 33, 33, 33, 33, 33, 30, 30, 27, - 27, 24, 24, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 19, 19, 18, - 18, 17, 17, 17, 17, 16, 34, 33, 33, 32, 32, 29, 29, 26, 26, 24, 24, 22, - 22, 23, 23, 23, 23, 22, 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, - 18, 17, 34, 33, 33, 32, 32, 29, 29, 26, 26, 24, 24, 22, 22, 23, 23, 23, - 23, 22, 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 18, 17, 31, 30, - 30, 29, 29, 26, 26, 24, 24, 23, 23, 22, 22, 22, 22, 23, 23, 22, 22, 22, - 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 18, 17, 31, 30, 30, 29, 29, 26, - 26, 24, 24, 23, 23, 22, 22, 22, 22, 23, 23, 22, 22, 22, 22, 21, 21, 20, - 20, 19, 19, 18, 18, 18, 18, 17, 28, 27, 27, 26, 26, 24, 24, 22, 22, 22, - 22, 21, 21, 22, 22, 23, 23, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 19, - 19, 19, 19, 18, 28, 27, 27, 26, 26, 24, 24, 22, 22, 22, 22, 21, 21, 22, - 22, 23, 23, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 19, 19, 19, 19, 18, - 25, 24, 24, 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, 21, 21, 21, 21, 20, - 20, 20, 20, 20, 20, 19, 19, 19, 19, 18, 18, 18, 18, 17, 25, 24, 24, 24, - 24, 23, 23, 22, 22, 21, 21, 20, 20, 21, 21, 21, 21, 20, 20, 20, 20, 20, - 20, 19, 19, 19, 19, 18, 18, 18, 18, 17, 21, 22, 22, 22, 22, 22, 22, 21, - 21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, - 18, 17, 17, 17, 17, 17, 21, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, - 17, 17, 21, 22, 22, 23, 23, 22, 22, 22, 22, 21, 21, 19, 19, 19, 19, 19, - 19, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 21, 22, - 22, 23, 23, 22, 22, 22, 22, 21, 21, 19, 19, 19, 19, 19, 19, 18, 18, 18, - 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 21, 22, 22, 23, 23, 23, - 23, 23, 23, 21, 21, 19, 19, 19, 19, 18, 18, 17, 17, 17, 17, 17, 17, 16, - 16, 16, 16, 15, 15, 15, 15, 15, 21, 22, 22, 23, 23, 23, 23, 23, 23, 21, - 21, 19, 19, 19, 19, 18, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15, - 15, 15, 15, 15, 20, 21, 21, 22, 22, 22, 22, 22, 22, 20, 20, 19, 19, 18, - 18, 17, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, - 20, 21, 21, 22, 22, 22, 22, 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 17, - 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 20, 20, 20, 22, - 22, 22, 22, 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, - 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 20, 20, 20, 22, 22, 22, 22, 22, - 22, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, - 14, 14, 14, 14, 14, 13, 19, 20, 20, 21, 21, 21, 21, 21, 21, 20, 20, 19, - 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, - 13, 13, 19, 20, 20, 21, 21, 21, 21, 21, 21, 20, 20, 19, 19, 18, 18, 17, - 17, 16, 16, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 18, 19, - 19, 20, 20, 20, 20, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 15, - 15, 14, 14, 13, 13, 13, 13, 12, 12, 12, 12, 12, 18, 19, 19, 20, 20, 20, - 20, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 15, 15, 14, 14, 13, - 13, 13, 13, 12, 12, 12, 12, 12, 17, 18, 18, 19, 19, 19, 19, 20, 20, 19, - 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 12, - 12, 12, 12, 12, 17, 18, 18, 19, 19, 19, 19, 20, 20, 19, 19, 18, 18, 17, - 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 12, - 16, 17, 17, 18, 18, 18, 18, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, - 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 16, 17, 17, 18, - 18, 18, 18, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, - 13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 16, 17, 17, 18, 18, 18, 18, 19, - 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 12, - 12, 11, 11, 11, 11, 11, 16, 17, 17, 18, 18, 18, 18, 19, 19, 18, 18, 17, - 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, - 11, 11, 15, 16, 16, 17, 17, 17, 17, 18, 18, 17, 17, 17, 17, 16, 16, 15, - 15, 14, 14, 13, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, - /* Size 4x8 */ - 33, 22, 20, 17, 28, 22, 22, 18, 24, 20, 20, 18, 22, 19, 18, 16, 22, 19, - 16, 14, 20, 19, 15, 13, 19, 18, 14, 12, 17, 17, 14, 11, - /* Size 8x4 */ - 33, 28, 24, 22, 22, 20, 19, 17, 22, 22, 20, 19, 19, 19, 18, 17, 20, 22, - 20, 18, 16, 15, 14, 14, 17, 18, 18, 16, 14, 13, 12, 11, - /* Size 8x16 */ - 32, 33, 28, 21, 21, 20, 18, 16, 33, 33, 27, 22, 22, 20, 19, 17, 34, 32, - 26, 22, 23, 21, 20, 18, 31, 28, 24, 22, 22, 22, 20, 18, 28, 26, 22, 22, - 23, 22, 20, 19, 24, 24, 22, 20, 21, 20, 19, 18, 21, 22, 21, 19, 19, 19, - 18, 17, 21, 22, 22, 19, 18, 18, 17, 16, 21, 23, 22, 19, 18, 17, 16, 15, - 20, 22, 22, 19, 17, 16, 15, 14, 20, 21, 22, 19, 17, 16, 14, 14, 19, 20, - 21, 19, 17, 15, 14, 13, 18, 20, 20, 18, 16, 15, 13, 12, 17, 19, 20, 18, - 16, 14, 13, 12, 16, 18, 19, 17, 15, 14, 12, 12, 16, 17, 18, 17, 15, 14, - 12, 11, - /* Size 16x8 */ - 32, 33, 34, 31, 28, 24, 21, 21, 21, 20, 20, 19, 18, 17, 16, 16, 33, 33, - 32, 28, 26, 24, 22, 22, 23, 22, 21, 20, 20, 19, 18, 17, 28, 27, 26, 24, - 22, 22, 21, 22, 22, 22, 22, 21, 20, 20, 19, 18, 21, 22, 22, 22, 22, 20, - 19, 19, 19, 19, 19, 19, 18, 18, 17, 17, 21, 22, 23, 22, 23, 21, 19, 18, - 18, 17, 17, 17, 16, 16, 15, 15, 20, 20, 21, 22, 22, 20, 19, 18, 17, 16, - 16, 15, 15, 14, 14, 14, 18, 19, 20, 20, 20, 19, 18, 17, 16, 15, 14, 14, - 13, 13, 12, 12, 16, 17, 18, 18, 19, 18, 17, 16, 15, 14, 14, 13, 12, 12, - 12, 11, - /* Size 16x32 */ - 32, 33, 33, 28, 28, 21, 21, 21, 21, 20, 20, 18, 18, 16, 16, 16, 33, 33, - 33, 27, 27, 22, 22, 22, 22, 20, 20, 19, 19, 17, 17, 16, 33, 33, 33, 27, - 27, 22, 22, 22, 22, 20, 20, 19, 19, 17, 17, 16, 34, 32, 32, 26, 26, 22, - 22, 23, 23, 21, 21, 20, 20, 18, 18, 17, 34, 32, 32, 26, 26, 22, 22, 23, - 23, 21, 21, 20, 20, 18, 18, 17, 31, 28, 28, 24, 24, 22, 22, 22, 22, 22, - 22, 20, 20, 18, 18, 17, 31, 28, 28, 24, 24, 22, 22, 22, 22, 22, 22, 20, - 20, 18, 18, 17, 28, 26, 26, 22, 22, 22, 22, 23, 23, 22, 22, 20, 20, 19, - 19, 18, 28, 26, 26, 22, 22, 22, 22, 23, 23, 22, 22, 20, 20, 19, 19, 18, - 24, 24, 24, 22, 22, 20, 20, 21, 21, 20, 20, 19, 19, 18, 18, 17, 24, 24, - 24, 22, 22, 20, 20, 21, 21, 20, 20, 19, 19, 18, 18, 17, 21, 22, 22, 21, - 21, 19, 19, 19, 19, 19, 19, 18, 18, 17, 17, 17, 21, 22, 22, 21, 21, 19, - 19, 19, 19, 19, 19, 18, 18, 17, 17, 17, 21, 22, 22, 22, 22, 19, 19, 18, - 18, 18, 18, 17, 17, 16, 16, 16, 21, 22, 22, 22, 22, 19, 19, 18, 18, 18, - 18, 17, 17, 16, 16, 16, 21, 23, 23, 22, 22, 19, 19, 18, 18, 17, 17, 16, - 16, 15, 15, 15, 21, 23, 23, 22, 22, 19, 19, 18, 18, 17, 17, 16, 16, 15, - 15, 15, 20, 22, 22, 22, 22, 19, 19, 17, 17, 16, 16, 15, 15, 14, 14, 14, - 20, 22, 22, 22, 22, 19, 19, 17, 17, 16, 16, 15, 15, 14, 14, 14, 20, 21, - 21, 22, 22, 19, 19, 17, 17, 16, 16, 14, 14, 14, 14, 13, 20, 21, 21, 22, - 22, 19, 19, 17, 17, 16, 16, 14, 14, 14, 14, 13, 19, 20, 20, 21, 21, 19, - 19, 17, 17, 15, 15, 14, 14, 13, 13, 13, 19, 20, 20, 21, 21, 19, 19, 17, - 17, 15, 15, 14, 14, 13, 13, 13, 18, 20, 20, 20, 20, 18, 18, 16, 16, 15, - 15, 13, 13, 12, 12, 12, 18, 20, 20, 20, 20, 18, 18, 16, 16, 15, 15, 13, - 13, 12, 12, 12, 17, 19, 19, 20, 20, 18, 18, 16, 16, 14, 14, 13, 13, 12, - 12, 12, 17, 19, 19, 20, 20, 18, 18, 16, 16, 14, 14, 13, 13, 12, 12, 12, - 16, 18, 18, 19, 19, 17, 17, 15, 15, 14, 14, 12, 12, 12, 12, 11, 16, 18, - 18, 19, 19, 17, 17, 15, 15, 14, 14, 12, 12, 12, 12, 11, 16, 17, 17, 18, - 18, 17, 17, 15, 15, 14, 14, 12, 12, 11, 11, 11, 16, 17, 17, 18, 18, 17, - 17, 15, 15, 14, 14, 12, 12, 11, 11, 11, 16, 17, 17, 18, 18, 16, 16, 15, - 15, 13, 13, 12, 12, 11, 11, 11, - /* Size 32x16 */ - 32, 33, 33, 34, 34, 31, 31, 28, 28, 24, 24, 21, 21, 21, 21, 21, 21, 20, - 20, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 16, 33, 33, 33, 32, - 32, 28, 28, 26, 26, 24, 24, 22, 22, 22, 22, 23, 23, 22, 22, 21, 21, 20, - 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 33, 33, 33, 32, 32, 28, 28, 26, - 26, 24, 24, 22, 22, 22, 22, 23, 23, 22, 22, 21, 21, 20, 20, 20, 20, 19, - 19, 18, 18, 17, 17, 17, 28, 27, 27, 26, 26, 24, 24, 22, 22, 22, 22, 21, - 21, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 19, 19, 18, - 18, 18, 28, 27, 27, 26, 26, 24, 24, 22, 22, 22, 22, 21, 21, 22, 22, 22, - 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 19, 19, 18, 18, 18, 21, 22, - 22, 22, 22, 22, 22, 22, 22, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 16, 21, 22, 22, 22, 22, 22, - 22, 22, 22, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, - 18, 18, 18, 17, 17, 17, 17, 16, 21, 22, 22, 23, 23, 22, 22, 23, 23, 21, - 21, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15, - 15, 15, 15, 15, 21, 22, 22, 23, 23, 22, 22, 23, 23, 21, 21, 19, 19, 18, - 18, 18, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 15, - 20, 20, 20, 21, 21, 22, 22, 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 16, - 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 20, 20, 20, 21, - 21, 22, 22, 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, - 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 18, 19, 19, 20, 20, 20, 20, 20, - 20, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, - 13, 12, 12, 12, 12, 12, 18, 19, 19, 20, 20, 20, 20, 20, 20, 19, 19, 18, - 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, - 12, 12, 16, 17, 17, 18, 18, 18, 18, 19, 19, 18, 18, 17, 17, 16, 16, 15, - 15, 14, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 16, 17, - 17, 18, 18, 18, 18, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, - 14, 13, 13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 16, 16, 16, 17, 17, 17, - 17, 18, 18, 17, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 13, 12, - 12, 12, 12, 11, 11, 11, 11, 11, - /* Size 4x16 */ - 33, 21, 20, 16, 33, 22, 20, 17, 32, 22, 21, 18, 28, 22, 22, 18, 26, 22, - 22, 19, 24, 20, 20, 18, 22, 19, 19, 17, 22, 19, 18, 16, 23, 19, 17, 15, - 22, 19, 16, 14, 21, 19, 16, 14, 20, 19, 15, 13, 20, 18, 15, 12, 19, 18, - 14, 12, 18, 17, 14, 12, 17, 17, 14, 11, - /* Size 16x4 */ - 33, 33, 32, 28, 26, 24, 22, 22, 23, 22, 21, 20, 20, 19, 18, 17, 21, 22, - 22, 22, 22, 20, 19, 19, 19, 19, 19, 19, 18, 18, 17, 17, 20, 20, 21, 22, - 22, 20, 19, 18, 17, 16, 16, 15, 15, 14, 14, 14, 16, 17, 18, 18, 19, 18, - 17, 16, 15, 14, 14, 13, 12, 12, 12, 11, - /* Size 8x32 */ - 32, 33, 28, 21, 21, 20, 18, 16, 33, 33, 27, 22, 22, 20, 19, 17, 33, 33, - 27, 22, 22, 20, 19, 17, 34, 32, 26, 22, 23, 21, 20, 18, 34, 32, 26, 22, - 23, 21, 20, 18, 31, 28, 24, 22, 22, 22, 20, 18, 31, 28, 24, 22, 22, 22, - 20, 18, 28, 26, 22, 22, 23, 22, 20, 19, 28, 26, 22, 22, 23, 22, 20, 19, - 24, 24, 22, 20, 21, 20, 19, 18, 24, 24, 22, 20, 21, 20, 19, 18, 21, 22, - 21, 19, 19, 19, 18, 17, 21, 22, 21, 19, 19, 19, 18, 17, 21, 22, 22, 19, - 18, 18, 17, 16, 21, 22, 22, 19, 18, 18, 17, 16, 21, 23, 22, 19, 18, 17, - 16, 15, 21, 23, 22, 19, 18, 17, 16, 15, 20, 22, 22, 19, 17, 16, 15, 14, - 20, 22, 22, 19, 17, 16, 15, 14, 20, 21, 22, 19, 17, 16, 14, 14, 20, 21, - 22, 19, 17, 16, 14, 14, 19, 20, 21, 19, 17, 15, 14, 13, 19, 20, 21, 19, - 17, 15, 14, 13, 18, 20, 20, 18, 16, 15, 13, 12, 18, 20, 20, 18, 16, 15, - 13, 12, 17, 19, 20, 18, 16, 14, 13, 12, 17, 19, 20, 18, 16, 14, 13, 12, - 16, 18, 19, 17, 15, 14, 12, 12, 16, 18, 19, 17, 15, 14, 12, 12, 16, 17, - 18, 17, 15, 14, 12, 11, 16, 17, 18, 17, 15, 14, 12, 11, 16, 17, 18, 16, - 15, 13, 12, 11, - /* Size 32x8 */ - 32, 33, 33, 34, 34, 31, 31, 28, 28, 24, 24, 21, 21, 21, 21, 21, 21, 20, - 20, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 16, 33, 33, 33, 32, - 32, 28, 28, 26, 26, 24, 24, 22, 22, 22, 22, 23, 23, 22, 22, 21, 21, 20, - 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 28, 27, 27, 26, 26, 24, 24, 22, - 22, 22, 22, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, - 20, 19, 19, 18, 18, 18, 21, 22, 22, 22, 22, 22, 22, 22, 22, 20, 20, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, - 17, 16, 21, 22, 22, 23, 23, 22, 22, 23, 23, 21, 21, 19, 19, 18, 18, 18, - 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 15, 20, 20, - 20, 21, 21, 22, 22, 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, - 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 18, 19, 19, 20, 20, 20, - 20, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, - 13, 13, 13, 12, 12, 12, 12, 12, 16, 17, 17, 18, 18, 18, 18, 19, 19, 18, - 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, - 12, 11, 11, 11 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 31, 23, 17, 31, 26, 20, 16, 23, 20, 14, 12, 17, 16, 12, 9, - /* Size 8x8 */ - 33, 32, 32, 29, 24, 20, 17, 15, 32, 32, 31, 29, 25, 21, 18, 16, 32, 31, - 29, 27, 24, 21, 18, 16, 29, 29, 27, 21, 19, 17, 16, 14, 24, 25, 24, 19, - 16, 14, 13, 12, 20, 21, 21, 17, 14, 13, 12, 11, 17, 18, 18, 16, 13, 12, - 10, 9, 15, 16, 16, 14, 12, 11, 9, 9, - /* Size 16x16 */ - 32, 33, 33, 33, 32, 30, 29, 27, 25, 23, 21, 19, 17, 16, 14, 13, 33, 32, - 32, 32, 32, 30, 29, 28, 26, 24, 22, 20, 18, 17, 15, 13, 33, 32, 32, 32, - 32, 31, 30, 28, 27, 25, 23, 21, 19, 17, 16, 14, 33, 32, 32, 31, 30, 29, - 28, 27, 26, 24, 23, 20, 19, 17, 16, 14, 32, 32, 32, 30, 29, 28, 27, 26, - 25, 24, 22, 21, 19, 18, 16, 15, 30, 30, 31, 29, 28, 26, 24, 23, 22, 21, - 20, 19, 18, 16, 15, 14, 29, 29, 30, 28, 27, 24, 22, 21, 20, 19, 19, 17, - 17, 15, 14, 13, 27, 28, 28, 27, 26, 23, 21, 20, 19, 18, 17, 16, 15, 14, - 13, 12, 25, 26, 27, 26, 25, 22, 20, 19, 18, 17, 16, 15, 14, 14, 13, 12, - 23, 24, 25, 24, 24, 21, 19, 18, 17, 16, 15, 14, 13, 13, 12, 11, 21, 22, - 23, 23, 22, 20, 19, 17, 16, 15, 14, 13, 13, 12, 11, 11, 19, 20, 21, 20, - 21, 19, 17, 16, 15, 14, 13, 12, 12, 11, 11, 10, 17, 18, 19, 19, 19, 18, - 17, 15, 14, 13, 13, 12, 11, 10, 10, 9, 16, 17, 17, 17, 18, 16, 15, 14, - 14, 13, 12, 11, 10, 10, 9, 9, 14, 15, 16, 16, 16, 15, 14, 13, 13, 12, - 11, 11, 10, 9, 9, 8, 13, 13, 14, 14, 15, 14, 13, 12, 12, 11, 11, 10, 9, - 9, 8, 8, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 30, 30, 29, 28, 27, 26, 25, 23, - 23, 21, 21, 19, 19, 18, 17, 17, 16, 15, 14, 14, 13, 13, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 30, 30, 29, 29, 28, 27, 26, 24, 24, 22, 22, 20, - 20, 19, 18, 17, 17, 16, 15, 15, 13, 13, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 30, 30, 29, 29, 28, 27, 26, 24, 24, 22, 22, 20, 20, 19, 18, 17, - 17, 16, 15, 15, 13, 13, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 30, 30, 28, 27, 26, 25, 24, 23, 23, 21, 20, 19, 19, 18, 17, 17, 16, 16, - 14, 14, 33, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 28, 28, - 27, 25, 25, 23, 23, 21, 21, 20, 19, 18, 17, 17, 16, 16, 14, 14, 33, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 29, 29, 28, 27, 26, 25, 24, 23, - 23, 21, 21, 20, 19, 18, 17, 17, 16, 16, 14, 14, 33, 32, 32, 32, 32, 31, - 31, 31, 30, 30, 29, 29, 28, 28, 27, 26, 26, 24, 24, 23, 23, 21, 20, 20, - 19, 18, 17, 17, 16, 16, 14, 14, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, - 29, 29, 28, 28, 27, 26, 26, 24, 24, 23, 23, 21, 21, 20, 19, 18, 17, 17, - 16, 16, 15, 15, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 28, 28, 27, 27, - 26, 26, 25, 24, 24, 22, 22, 21, 21, 20, 19, 19, 18, 17, 16, 16, 15, 15, - 32, 32, 32, 32, 31, 31, 30, 30, 29, 29, 28, 28, 27, 27, 26, 25, 25, 24, - 24, 22, 22, 21, 20, 20, 19, 18, 18, 17, 16, 16, 15, 15, 30, 30, 30, 31, - 31, 30, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 22, 22, 21, 20, 20, 19, - 19, 18, 18, 17, 16, 16, 15, 15, 14, 14, 30, 30, 30, 31, 31, 30, 29, 29, - 28, 28, 26, 26, 24, 24, 23, 23, 22, 22, 21, 20, 20, 19, 19, 18, 18, 17, - 16, 16, 15, 15, 14, 14, 29, 29, 29, 30, 30, 29, 28, 28, 27, 27, 24, 24, - 22, 22, 21, 21, 20, 20, 19, 19, 19, 18, 17, 17, 17, 16, 15, 15, 14, 14, - 13, 13, 28, 29, 29, 30, 30, 29, 28, 28, 27, 27, 24, 24, 22, 21, 20, 20, - 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 27, 28, - 28, 28, 28, 28, 27, 27, 26, 26, 23, 23, 21, 20, 20, 20, 19, 18, 18, 17, - 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 26, 27, 27, 27, 28, 27, - 26, 26, 26, 25, 23, 23, 21, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 15, - 15, 14, 14, 14, 13, 13, 12, 12, 25, 26, 26, 26, 27, 26, 26, 26, 25, 25, - 22, 22, 20, 20, 19, 19, 18, 17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 13, - 13, 13, 12, 12, 23, 24, 24, 25, 25, 25, 24, 24, 24, 24, 22, 22, 20, 19, - 18, 18, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 12, 11, 11, - 23, 24, 24, 24, 25, 24, 24, 24, 24, 24, 21, 21, 19, 19, 18, 18, 17, 16, - 16, 15, 15, 14, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 21, 22, 22, 23, - 23, 23, 23, 23, 22, 22, 20, 20, 19, 18, 17, 17, 16, 15, 15, 14, 14, 14, - 13, 13, 13, 12, 12, 12, 11, 11, 11, 11, 21, 22, 22, 23, 23, 23, 23, 23, - 22, 22, 20, 20, 19, 18, 17, 17, 16, 15, 15, 14, 14, 14, 13, 13, 13, 12, - 12, 12, 11, 11, 11, 11, 19, 20, 20, 21, 21, 21, 21, 21, 21, 21, 19, 19, - 18, 17, 17, 16, 15, 14, 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, - 10, 10, 19, 20, 20, 20, 21, 21, 20, 21, 21, 20, 19, 19, 17, 17, 16, 16, - 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 10, 10, 10, 18, 19, - 19, 19, 20, 20, 20, 20, 20, 20, 18, 18, 17, 17, 16, 15, 15, 14, 14, 13, - 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 17, 18, 18, 19, 19, 19, - 19, 19, 19, 19, 18, 18, 17, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 11, - 11, 11, 10, 10, 10, 10, 9, 9, 17, 17, 17, 18, 18, 18, 18, 18, 19, 18, - 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 10, 10, 10, - 10, 9, 9, 9, 16, 17, 17, 17, 17, 17, 17, 17, 18, 18, 16, 16, 15, 15, 14, - 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 15, 16, - 16, 17, 17, 17, 17, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, - 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9, 14, 15, 15, 16, 16, 16, 16, - 16, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, - 10, 9, 9, 9, 9, 8, 8, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 15, 15, - 14, 14, 13, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 9, 9, 8, 8, - 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, - 11, 11, 11, 10, 10, 9, 9, 9, 9, 9, 8, 8, 8, 8, 13, 13, 13, 14, 14, 14, - 14, 15, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 10, 10, 9, - 9, 9, 9, 9, 8, 8, 8, 8, - /* Size 4x8 */ - 32, 30, 24, 17, 32, 30, 24, 17, 31, 28, 23, 18, 29, 24, 19, 15, 25, 21, - 16, 13, 21, 19, 14, 11, 18, 17, 13, 10, 16, 15, 12, 9, - /* Size 8x4 */ - 32, 32, 31, 29, 25, 21, 18, 16, 30, 30, 28, 24, 21, 19, 17, 15, 24, 24, - 23, 19, 16, 14, 13, 12, 17, 17, 18, 15, 13, 11, 10, 9, - /* Size 8x16 */ - 32, 33, 32, 28, 23, 19, 17, 14, 33, 32, 32, 29, 24, 20, 17, 15, 33, 32, - 31, 30, 25, 21, 18, 16, 32, 32, 30, 28, 24, 20, 18, 16, 32, 31, 29, 27, - 24, 21, 18, 16, 30, 30, 28, 24, 21, 19, 17, 15, 29, 30, 27, 22, 20, 17, - 16, 14, 27, 28, 26, 21, 18, 16, 15, 13, 25, 26, 25, 20, 17, 15, 14, 13, - 23, 24, 24, 19, 16, 14, 13, 12, 21, 23, 22, 18, 15, 13, 12, 11, 19, 21, - 20, 17, 14, 12, 11, 10, 18, 19, 19, 16, 14, 12, 11, 10, 16, 17, 18, 15, - 13, 11, 10, 9, 14, 16, 16, 14, 12, 11, 9, 9, 13, 14, 15, 13, 11, 10, 9, - 8, - /* Size 16x8 */ - 32, 33, 33, 32, 32, 30, 29, 27, 25, 23, 21, 19, 18, 16, 14, 13, 33, 32, - 32, 32, 31, 30, 30, 28, 26, 24, 23, 21, 19, 17, 16, 14, 32, 32, 31, 30, - 29, 28, 27, 26, 25, 24, 22, 20, 19, 18, 16, 15, 28, 29, 30, 28, 27, 24, - 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 23, 24, 25, 24, 24, 21, 20, 18, - 17, 16, 15, 14, 14, 13, 12, 11, 19, 20, 21, 20, 21, 19, 17, 16, 15, 14, - 13, 12, 12, 11, 11, 10, 17, 17, 18, 18, 18, 17, 16, 15, 14, 13, 12, 11, - 11, 10, 9, 9, 14, 15, 16, 16, 16, 15, 14, 13, 13, 12, 11, 10, 10, 9, 9, - 8, - /* Size 16x32 */ - 32, 33, 33, 32, 32, 30, 28, 27, 23, 23, 19, 19, 17, 16, 14, 13, 33, 32, - 32, 32, 32, 30, 29, 28, 24, 24, 20, 20, 17, 17, 15, 14, 33, 32, 32, 32, - 32, 30, 29, 28, 24, 24, 20, 20, 17, 17, 15, 14, 33, 32, 32, 32, 32, 31, - 29, 28, 25, 24, 20, 20, 18, 17, 15, 14, 33, 32, 32, 32, 31, 31, 30, 28, - 25, 25, 21, 21, 18, 17, 16, 14, 33, 32, 32, 31, 31, 30, 29, 28, 25, 24, - 21, 21, 18, 17, 16, 14, 32, 32, 32, 31, 30, 29, 28, 27, 24, 24, 20, 20, - 18, 17, 16, 14, 32, 32, 32, 30, 30, 29, 28, 27, 24, 24, 21, 21, 18, 17, - 16, 15, 32, 32, 31, 30, 29, 28, 27, 26, 24, 24, 21, 21, 18, 18, 16, 15, - 32, 31, 31, 30, 29, 28, 26, 26, 24, 23, 20, 20, 18, 18, 16, 15, 30, 30, - 30, 28, 28, 26, 24, 23, 21, 21, 19, 19, 17, 16, 15, 14, 30, 30, 30, 28, - 28, 26, 24, 23, 21, 21, 19, 19, 17, 16, 15, 14, 29, 30, 30, 28, 27, 24, - 22, 21, 20, 19, 17, 17, 16, 15, 14, 13, 28, 29, 30, 28, 27, 24, 21, 21, - 19, 19, 17, 17, 16, 15, 14, 13, 27, 28, 28, 27, 26, 23, 21, 20, 18, 18, - 16, 16, 15, 14, 13, 13, 26, 27, 28, 26, 26, 23, 20, 20, 18, 18, 16, 16, - 14, 14, 13, 12, 25, 26, 26, 25, 25, 22, 20, 19, 17, 17, 15, 15, 14, 13, - 13, 12, 23, 25, 25, 24, 24, 21, 19, 18, 16, 16, 14, 14, 13, 13, 12, 11, - 23, 24, 24, 24, 24, 21, 19, 18, 16, 16, 14, 14, 13, 13, 12, 11, 21, 23, - 23, 22, 22, 20, 18, 17, 15, 15, 13, 13, 12, 12, 11, 11, 21, 23, 23, 22, - 22, 20, 18, 17, 15, 15, 13, 13, 12, 12, 11, 11, 19, 21, 21, 21, 21, 19, - 17, 17, 14, 14, 13, 13, 12, 11, 10, 10, 19, 20, 21, 20, 20, 19, 17, 16, - 14, 14, 12, 12, 11, 11, 10, 10, 18, 19, 20, 20, 20, 18, 17, 16, 14, 14, - 12, 12, 11, 11, 10, 9, 18, 19, 19, 19, 19, 18, 16, 15, 14, 13, 12, 12, - 11, 10, 10, 9, 17, 18, 18, 18, 18, 17, 16, 15, 13, 13, 12, 12, 10, 10, - 9, 9, 16, 17, 17, 17, 18, 16, 15, 14, 13, 13, 11, 11, 10, 10, 9, 9, 15, - 17, 17, 17, 17, 16, 15, 14, 13, 12, 11, 11, 10, 10, 9, 9, 14, 16, 16, - 16, 16, 15, 14, 13, 12, 12, 11, 11, 9, 9, 9, 8, 14, 16, 16, 16, 16, 15, - 14, 13, 12, 12, 10, 10, 9, 9, 9, 8, 13, 14, 14, 14, 15, 14, 13, 12, 11, - 11, 10, 10, 9, 9, 8, 8, 13, 14, 14, 14, 15, 14, 13, 12, 11, 11, 10, 10, - 9, 9, 8, 8, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 30, 30, 29, 28, 27, 26, 25, 23, - 23, 21, 21, 19, 19, 18, 18, 17, 16, 15, 14, 14, 13, 13, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 30, 30, 30, 29, 28, 27, 26, 25, 24, 23, 23, 21, - 20, 19, 19, 18, 17, 17, 16, 16, 14, 14, 33, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 30, 30, 30, 30, 28, 28, 26, 25, 24, 23, 23, 21, 21, 20, 19, 18, - 17, 17, 16, 16, 14, 14, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 28, 28, - 28, 28, 27, 26, 25, 24, 24, 22, 22, 21, 20, 20, 19, 18, 17, 17, 16, 16, - 14, 14, 32, 32, 32, 32, 31, 31, 30, 30, 29, 29, 28, 28, 27, 27, 26, 26, - 25, 24, 24, 22, 22, 21, 20, 20, 19, 18, 18, 17, 16, 16, 15, 15, 30, 30, - 30, 31, 31, 30, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 22, 21, 21, 20, - 20, 19, 19, 18, 18, 17, 16, 16, 15, 15, 14, 14, 28, 29, 29, 29, 30, 29, - 28, 28, 27, 26, 24, 24, 22, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17, - 16, 16, 15, 15, 14, 14, 13, 13, 27, 28, 28, 28, 28, 28, 27, 27, 26, 26, - 23, 23, 21, 21, 20, 20, 19, 18, 18, 17, 17, 17, 16, 16, 15, 15, 14, 14, - 13, 13, 12, 12, 23, 24, 24, 25, 25, 25, 24, 24, 24, 24, 21, 21, 20, 19, - 18, 18, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 12, 11, 11, - 23, 24, 24, 24, 25, 24, 24, 24, 24, 23, 21, 21, 19, 19, 18, 18, 17, 16, - 16, 15, 15, 14, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 19, 20, 20, 20, - 21, 21, 20, 21, 21, 20, 19, 19, 17, 17, 16, 16, 15, 14, 14, 13, 13, 13, - 12, 12, 12, 12, 11, 11, 11, 10, 10, 10, 19, 20, 20, 20, 21, 21, 20, 21, - 21, 20, 19, 19, 17, 17, 16, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, - 11, 11, 11, 10, 10, 10, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 17, 17, - 16, 16, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, - 9, 16, 17, 17, 17, 17, 17, 17, 17, 18, 18, 16, 16, 15, 15, 14, 14, 13, - 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 14, 15, 15, 15, - 16, 16, 16, 16, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 11, 11, 10, - 10, 10, 10, 9, 9, 9, 9, 9, 8, 8, 13, 14, 14, 14, 14, 14, 14, 15, 15, 15, - 14, 14, 13, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, 9, 9, 9, 9, 9, 8, 8, - 8, 8, - /* Size 4x16 */ - 33, 30, 23, 16, 32, 30, 24, 17, 32, 31, 25, 17, 32, 29, 24, 17, 32, 28, - 24, 18, 30, 26, 21, 16, 30, 24, 19, 15, 28, 23, 18, 14, 26, 22, 17, 13, - 24, 21, 16, 13, 23, 20, 15, 12, 20, 19, 14, 11, 19, 18, 13, 10, 17, 16, - 13, 10, 16, 15, 12, 9, 14, 14, 11, 9, - /* Size 16x4 */ - 33, 32, 32, 32, 32, 30, 30, 28, 26, 24, 23, 20, 19, 17, 16, 14, 30, 30, - 31, 29, 28, 26, 24, 23, 22, 21, 20, 19, 18, 16, 15, 14, 23, 24, 25, 24, - 24, 21, 19, 18, 17, 16, 15, 14, 13, 13, 12, 11, 16, 17, 17, 17, 18, 16, - 15, 14, 13, 13, 12, 11, 10, 10, 9, 9, - /* Size 8x32 */ - 32, 33, 32, 28, 23, 19, 17, 14, 33, 32, 32, 29, 24, 20, 17, 15, 33, 32, - 32, 29, 24, 20, 17, 15, 33, 32, 32, 29, 25, 20, 18, 15, 33, 32, 31, 30, - 25, 21, 18, 16, 33, 32, 31, 29, 25, 21, 18, 16, 32, 32, 30, 28, 24, 20, - 18, 16, 32, 32, 30, 28, 24, 21, 18, 16, 32, 31, 29, 27, 24, 21, 18, 16, - 32, 31, 29, 26, 24, 20, 18, 16, 30, 30, 28, 24, 21, 19, 17, 15, 30, 30, - 28, 24, 21, 19, 17, 15, 29, 30, 27, 22, 20, 17, 16, 14, 28, 30, 27, 21, - 19, 17, 16, 14, 27, 28, 26, 21, 18, 16, 15, 13, 26, 28, 26, 20, 18, 16, - 14, 13, 25, 26, 25, 20, 17, 15, 14, 13, 23, 25, 24, 19, 16, 14, 13, 12, - 23, 24, 24, 19, 16, 14, 13, 12, 21, 23, 22, 18, 15, 13, 12, 11, 21, 23, - 22, 18, 15, 13, 12, 11, 19, 21, 21, 17, 14, 13, 12, 10, 19, 21, 20, 17, - 14, 12, 11, 10, 18, 20, 20, 17, 14, 12, 11, 10, 18, 19, 19, 16, 14, 12, - 11, 10, 17, 18, 18, 16, 13, 12, 10, 9, 16, 17, 18, 15, 13, 11, 10, 9, - 15, 17, 17, 15, 13, 11, 10, 9, 14, 16, 16, 14, 12, 11, 9, 9, 14, 16, 16, - 14, 12, 10, 9, 9, 13, 14, 15, 13, 11, 10, 9, 8, 13, 14, 15, 13, 11, 10, - 9, 8, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 30, 30, 29, 28, 27, 26, 25, 23, - 23, 21, 21, 19, 19, 18, 18, 17, 16, 15, 14, 14, 13, 13, 33, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 30, 30, 30, 30, 28, 28, 26, 25, 24, 23, 23, 21, - 21, 20, 19, 18, 17, 17, 16, 16, 14, 14, 32, 32, 32, 32, 31, 31, 30, 30, - 29, 29, 28, 28, 27, 27, 26, 26, 25, 24, 24, 22, 22, 21, 20, 20, 19, 18, - 18, 17, 16, 16, 15, 15, 28, 29, 29, 29, 30, 29, 28, 28, 27, 26, 24, 24, - 22, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 15, 15, 14, 14, - 13, 13, 23, 24, 24, 25, 25, 25, 24, 24, 24, 24, 21, 21, 20, 19, 18, 18, - 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 12, 11, 11, 19, 20, - 20, 20, 21, 21, 20, 21, 21, 20, 19, 19, 17, 17, 16, 16, 15, 14, 14, 13, - 13, 13, 12, 12, 12, 12, 11, 11, 11, 10, 10, 10, 17, 17, 17, 18, 18, 18, - 18, 18, 18, 18, 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, - 11, 10, 10, 10, 9, 9, 9, 9, 14, 15, 15, 15, 16, 16, 16, 16, 16, 16, 15, - 15, 14, 14, 13, 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9, - 8, 8 }, - { /* Chroma */ - /* Size 4x4 */ - 33, 24, 22, 19, 24, 21, 20, 19, 22, 20, 17, 15, 19, 19, 15, 13, - /* Size 8x8 */ - 33, 32, 27, 21, 22, 20, 19, 18, 32, 29, 24, 22, 23, 22, 20, 19, 27, 24, - 22, 21, 23, 22, 21, 20, 21, 22, 21, 19, 19, 19, 18, 18, 22, 23, 23, 19, - 18, 17, 16, 16, 20, 22, 22, 19, 17, 16, 15, 14, 19, 20, 21, 18, 16, 15, - 14, 13, 18, 19, 20, 18, 16, 14, 13, 12, - /* Size 16x16 */ - 32, 33, 34, 31, 28, 25, 22, 21, 21, 21, 20, 20, 19, 18, 17, 16, 33, 33, - 33, 30, 27, 24, 22, 22, 22, 22, 21, 20, 20, 19, 18, 17, 34, 33, 32, 29, - 26, 24, 23, 22, 23, 23, 22, 22, 21, 20, 19, 18, 31, 30, 29, 26, 24, 23, - 22, 22, 22, 23, 22, 22, 21, 20, 19, 18, 28, 27, 26, 24, 22, 22, 22, 22, - 22, 23, 22, 22, 21, 20, 20, 19, 25, 24, 24, 23, 22, 21, 20, 20, 21, 21, - 20, 20, 20, 19, 19, 18, 22, 22, 23, 22, 22, 20, 20, 20, 20, 20, 19, 19, - 19, 18, 18, 17, 21, 22, 22, 22, 22, 20, 20, 19, 19, 19, 19, 18, 18, 18, - 17, 17, 21, 22, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, - 21, 22, 23, 23, 23, 21, 20, 19, 18, 17, 17, 17, 16, 16, 16, 15, 20, 21, - 22, 22, 22, 20, 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 20, 20, 22, 22, - 22, 20, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 19, 20, 21, 21, 21, 20, - 19, 18, 17, 16, 16, 15, 14, 14, 14, 13, 18, 19, 20, 20, 20, 19, 18, 18, - 17, 16, 15, 15, 14, 13, 13, 12, 17, 18, 19, 19, 20, 19, 18, 17, 16, 16, - 15, 14, 14, 13, 12, 12, 16, 17, 18, 18, 19, 18, 17, 17, 16, 15, 14, 14, - 13, 12, 12, 12, - /* Size 32x32 */ - 32, 33, 33, 34, 34, 32, 31, 30, 28, 28, 25, 25, 22, 21, 21, 21, 21, 21, - 21, 20, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 16, 16, 33, 33, 33, 33, - 33, 32, 30, 29, 27, 27, 24, 24, 22, 21, 22, 22, 22, 22, 22, 21, 21, 20, - 20, 20, 20, 19, 19, 19, 18, 18, 17, 17, 33, 33, 33, 33, 33, 31, 30, 29, - 27, 26, 24, 24, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 19, - 19, 19, 18, 18, 17, 17, 34, 33, 33, 33, 33, 31, 29, 28, 26, 26, 24, 24, - 22, 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, 20, 20, 19, 19, 19, - 18, 18, 34, 33, 33, 33, 32, 31, 29, 28, 26, 26, 24, 24, 23, 22, 22, 23, - 23, 23, 23, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 18, 18, 32, 32, - 31, 31, 31, 29, 28, 27, 25, 24, 24, 24, 22, 22, 22, 22, 23, 23, 23, 22, - 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 18, 18, 31, 30, 30, 29, 29, 28, - 26, 26, 24, 24, 23, 23, 22, 22, 22, 22, 22, 23, 23, 22, 22, 22, 22, 21, - 21, 20, 20, 20, 19, 19, 18, 18, 30, 29, 29, 28, 28, 27, 26, 25, 23, 23, - 23, 23, 22, 22, 22, 22, 22, 23, 23, 22, 22, 22, 22, 21, 21, 20, 20, 20, - 19, 19, 19, 19, 28, 27, 27, 26, 26, 25, 24, 23, 22, 22, 22, 22, 22, 21, - 22, 22, 22, 23, 23, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 19, 19, - 28, 27, 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 21, 21, 22, 22, 22, 23, - 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 19, 19, 25, 24, 24, 24, - 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, 20, 21, 21, 21, 21, 20, 20, 20, - 20, 20, 20, 20, 19, 19, 19, 19, 18, 18, 25, 24, 24, 24, 24, 24, 23, 23, - 22, 22, 21, 21, 20, 20, 20, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, - 19, 19, 19, 19, 18, 18, 22, 22, 22, 22, 23, 22, 22, 22, 22, 21, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, - 17, 17, 21, 21, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 17, 17, 21, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, - 19, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, 21, 22, 22, 22, 23, 22, - 22, 22, 22, 22, 21, 21, 20, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, - 18, 17, 17, 17, 17, 17, 16, 16, 21, 22, 22, 22, 23, 23, 22, 22, 22, 22, - 21, 21, 20, 19, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, - 16, 16, 16, 16, 21, 22, 22, 23, 23, 23, 23, 23, 23, 23, 21, 21, 20, 19, - 19, 19, 18, 18, 17, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, - 21, 22, 22, 22, 23, 23, 23, 23, 23, 22, 21, 21, 20, 19, 19, 18, 18, 17, - 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 15, 15, 20, 21, 21, 22, - 22, 22, 22, 22, 22, 22, 20, 20, 19, 19, 19, 18, 18, 17, 17, 17, 17, 16, - 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 20, 21, 21, 22, 22, 22, 22, 22, - 22, 22, 20, 20, 19, 19, 19, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, - 15, 15, 15, 15, 14, 14, 20, 20, 21, 21, 22, 22, 22, 22, 22, 22, 20, 20, - 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, - 14, 14, 20, 20, 20, 21, 22, 22, 22, 22, 22, 22, 20, 20, 19, 19, 18, 18, - 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 19, 20, - 20, 20, 21, 21, 21, 21, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, - 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 19, 20, 20, 20, 21, 21, - 21, 21, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 15, - 14, 14, 14, 14, 14, 13, 13, 13, 18, 19, 19, 20, 20, 20, 20, 20, 21, 21, - 20, 20, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 14, 14, 14, 14, 14, - 13, 13, 13, 13, 18, 19, 19, 20, 20, 20, 20, 20, 20, 20, 19, 19, 18, 18, - 18, 17, 17, 16, 16, 15, 15, 15, 15, 14, 14, 14, 13, 13, 13, 13, 12, 12, - 18, 19, 19, 19, 20, 20, 20, 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, - 16, 15, 15, 14, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 17, 18, 18, 19, - 19, 19, 19, 19, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 14, - 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 17, 18, 18, 19, 19, 19, 19, 19, - 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 14, 14, 14, 13, 13, - 13, 13, 12, 12, 12, 12, 16, 17, 17, 18, 18, 18, 18, 19, 19, 19, 18, 18, - 17, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, - 12, 12, 16, 17, 17, 18, 18, 18, 18, 19, 19, 19, 18, 18, 17, 17, 17, 16, - 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 12, - /* Size 4x8 */ - 33, 24, 22, 19, 31, 23, 23, 20, 26, 22, 22, 20, 22, 20, 19, 18, 23, 21, - 17, 16, 21, 20, 17, 15, 20, 20, 16, 14, 19, 19, 16, 13, - /* Size 8x4 */ - 33, 31, 26, 22, 23, 21, 20, 19, 24, 23, 22, 20, 21, 20, 20, 19, 22, 23, - 22, 19, 17, 17, 16, 16, 19, 20, 20, 18, 16, 15, 14, 13, - /* Size 8x16 */ - 32, 33, 28, 21, 21, 20, 18, 17, 33, 33, 27, 22, 22, 20, 19, 18, 34, 32, - 26, 22, 23, 21, 20, 19, 31, 28, 24, 22, 22, 22, 20, 19, 28, 26, 22, 22, - 23, 22, 21, 20, 24, 24, 22, 20, 21, 20, 19, 18, 22, 22, 21, 20, 19, 19, - 19, 18, 21, 22, 22, 19, 19, 18, 18, 17, 21, 23, 22, 19, 18, 17, 17, 16, - 21, 23, 22, 19, 18, 17, 16, 16, 20, 22, 22, 19, 17, 16, 16, 15, 20, 21, - 22, 19, 17, 16, 15, 14, 19, 20, 21, 19, 17, 15, 14, 13, 18, 20, 20, 18, - 16, 15, 14, 13, 17, 19, 20, 18, 16, 14, 13, 12, 16, 18, 19, 17, 15, 14, - 13, 12, - /* Size 16x8 */ - 32, 33, 34, 31, 28, 24, 22, 21, 21, 21, 20, 20, 19, 18, 17, 16, 33, 33, - 32, 28, 26, 24, 22, 22, 23, 23, 22, 21, 20, 20, 19, 18, 28, 27, 26, 24, - 22, 22, 21, 22, 22, 22, 22, 22, 21, 20, 20, 19, 21, 22, 22, 22, 22, 20, - 20, 19, 19, 19, 19, 19, 19, 18, 18, 17, 21, 22, 23, 22, 23, 21, 19, 19, - 18, 18, 17, 17, 17, 16, 16, 15, 20, 20, 21, 22, 22, 20, 19, 18, 17, 17, - 16, 16, 15, 15, 14, 14, 18, 19, 20, 20, 21, 19, 19, 18, 17, 16, 16, 15, - 14, 14, 13, 13, 17, 18, 19, 19, 20, 18, 18, 17, 16, 16, 15, 14, 13, 13, - 12, 12, - /* Size 16x32 */ - 32, 33, 33, 29, 28, 24, 21, 21, 21, 21, 20, 20, 18, 18, 17, 16, 33, 33, - 33, 28, 27, 24, 22, 22, 22, 22, 20, 20, 19, 19, 18, 17, 33, 33, 33, 28, - 27, 24, 22, 22, 22, 22, 20, 20, 19, 19, 18, 17, 34, 32, 32, 28, 26, 24, - 22, 22, 22, 22, 21, 21, 20, 20, 18, 18, 34, 32, 32, 28, 26, 24, 22, 22, - 23, 23, 21, 21, 20, 20, 19, 18, 32, 31, 30, 26, 25, 23, 22, 22, 23, 23, - 21, 21, 20, 20, 19, 18, 31, 29, 28, 26, 24, 23, 22, 22, 22, 22, 22, 22, - 20, 20, 19, 18, 30, 28, 28, 24, 23, 23, 22, 22, 23, 22, 22, 22, 20, 20, - 19, 19, 28, 26, 26, 23, 22, 22, 22, 22, 23, 22, 22, 22, 21, 20, 20, 19, - 28, 26, 26, 23, 22, 22, 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 24, 24, - 24, 22, 22, 21, 20, 20, 21, 21, 20, 20, 19, 19, 18, 18, 24, 24, 24, 22, - 22, 21, 20, 20, 21, 21, 20, 20, 19, 19, 18, 18, 22, 22, 22, 22, 21, 20, - 20, 20, 19, 19, 19, 19, 19, 18, 18, 17, 21, 22, 22, 22, 21, 20, 19, 19, - 19, 19, 19, 19, 18, 18, 17, 17, 21, 22, 22, 22, 22, 20, 19, 19, 19, 19, - 18, 18, 18, 18, 17, 17, 21, 22, 22, 22, 22, 20, 19, 19, 18, 18, 18, 18, - 17, 17, 17, 16, 21, 22, 23, 22, 22, 21, 19, 19, 18, 18, 17, 17, 17, 17, - 16, 16, 21, 23, 23, 23, 22, 21, 19, 19, 18, 17, 17, 17, 16, 16, 16, 15, - 21, 22, 23, 22, 22, 21, 19, 19, 18, 17, 17, 17, 16, 16, 16, 15, 20, 22, - 22, 22, 22, 20, 19, 19, 17, 17, 16, 16, 16, 15, 15, 14, 20, 22, 22, 22, - 22, 20, 19, 19, 17, 17, 16, 16, 16, 15, 15, 14, 20, 21, 21, 22, 22, 20, - 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 20, 21, 21, 22, 22, 20, 19, 18, - 17, 17, 16, 16, 15, 14, 14, 14, 19, 20, 21, 21, 21, 20, 19, 18, 17, 17, - 15, 15, 14, 14, 14, 13, 19, 20, 20, 21, 21, 20, 19, 18, 17, 16, 15, 15, - 14, 14, 13, 13, 19, 20, 20, 20, 21, 20, 18, 18, 16, 16, 15, 15, 14, 14, - 13, 13, 18, 20, 20, 20, 20, 19, 18, 18, 16, 16, 15, 15, 14, 13, 13, 12, - 18, 19, 19, 20, 20, 19, 18, 17, 16, 16, 14, 14, 13, 13, 13, 12, 17, 19, - 19, 19, 20, 19, 18, 17, 16, 16, 14, 14, 13, 13, 12, 12, 17, 19, 19, 19, - 19, 19, 17, 17, 16, 16, 14, 14, 13, 13, 12, 12, 16, 18, 18, 18, 19, 18, - 17, 17, 15, 15, 14, 14, 13, 12, 12, 12, 16, 18, 18, 18, 19, 18, 17, 17, - 15, 15, 14, 14, 13, 12, 12, 12, - /* Size 32x16 */ - 32, 33, 33, 34, 34, 32, 31, 30, 28, 28, 24, 24, 22, 21, 21, 21, 21, 21, - 21, 20, 20, 20, 20, 19, 19, 19, 18, 18, 17, 17, 16, 16, 33, 33, 33, 32, - 32, 31, 29, 28, 26, 26, 24, 24, 22, 22, 22, 22, 22, 23, 22, 22, 22, 21, - 21, 20, 20, 20, 20, 19, 19, 19, 18, 18, 33, 33, 33, 32, 32, 30, 28, 28, - 26, 26, 24, 24, 22, 22, 22, 22, 23, 23, 23, 22, 22, 21, 21, 21, 20, 20, - 20, 19, 19, 19, 18, 18, 29, 28, 28, 28, 28, 26, 26, 24, 23, 23, 22, 22, - 22, 22, 22, 22, 22, 23, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, - 18, 18, 28, 27, 27, 26, 26, 25, 24, 23, 22, 22, 22, 22, 21, 21, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 24, 24, - 24, 24, 24, 23, 23, 23, 22, 22, 21, 21, 20, 20, 20, 20, 21, 21, 21, 20, - 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 18, 18, 21, 22, 22, 22, 22, 22, - 22, 22, 22, 21, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 18, 18, 18, 18, 17, 17, 17, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 17, - 17, 17, 17, 17, 21, 22, 22, 22, 23, 23, 22, 23, 23, 22, 21, 21, 19, 19, - 19, 18, 18, 18, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, - 21, 22, 22, 22, 23, 23, 22, 22, 22, 22, 21, 21, 19, 19, 19, 18, 18, 17, - 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 15, 15, 20, 20, 20, 21, - 21, 21, 22, 22, 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, - 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 20, 20, 20, 21, 21, 21, 22, 22, - 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, - 15, 14, 14, 14, 14, 14, 18, 19, 19, 20, 20, 20, 20, 20, 21, 21, 19, 19, - 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, - 13, 13, 18, 19, 19, 20, 20, 20, 20, 20, 20, 20, 19, 19, 18, 18, 18, 17, - 17, 16, 16, 15, 15, 15, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 17, 18, - 18, 18, 19, 19, 19, 19, 20, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 15, - 15, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, 12, 16, 17, 17, 18, 18, 18, - 18, 19, 19, 19, 18, 18, 17, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, - 13, 13, 12, 12, 12, 12, 12, 12, - /* Size 4x16 */ - 33, 24, 21, 18, 33, 24, 22, 19, 32, 24, 23, 20, 29, 23, 22, 20, 26, 22, - 22, 20, 24, 21, 21, 19, 22, 20, 19, 18, 22, 20, 19, 18, 22, 21, 18, 17, - 22, 21, 17, 16, 22, 20, 17, 15, 21, 20, 17, 14, 20, 20, 16, 14, 20, 19, - 16, 13, 19, 19, 16, 13, 18, 18, 15, 12, - /* Size 16x4 */ - 33, 33, 32, 29, 26, 24, 22, 22, 22, 22, 22, 21, 20, 20, 19, 18, 24, 24, - 24, 23, 22, 21, 20, 20, 21, 21, 20, 20, 20, 19, 19, 18, 21, 22, 23, 22, - 22, 21, 19, 19, 18, 17, 17, 17, 16, 16, 16, 15, 18, 19, 20, 20, 20, 19, - 18, 18, 17, 16, 15, 14, 14, 13, 13, 12, - /* Size 8x32 */ - 32, 33, 28, 21, 21, 20, 18, 17, 33, 33, 27, 22, 22, 20, 19, 18, 33, 33, - 27, 22, 22, 20, 19, 18, 34, 32, 26, 22, 22, 21, 20, 18, 34, 32, 26, 22, - 23, 21, 20, 19, 32, 30, 25, 22, 23, 21, 20, 19, 31, 28, 24, 22, 22, 22, - 20, 19, 30, 28, 23, 22, 23, 22, 20, 19, 28, 26, 22, 22, 23, 22, 21, 20, - 28, 26, 22, 21, 22, 22, 21, 19, 24, 24, 22, 20, 21, 20, 19, 18, 24, 24, - 22, 20, 21, 20, 19, 18, 22, 22, 21, 20, 19, 19, 19, 18, 21, 22, 21, 19, - 19, 19, 18, 17, 21, 22, 22, 19, 19, 18, 18, 17, 21, 22, 22, 19, 18, 18, - 17, 17, 21, 23, 22, 19, 18, 17, 17, 16, 21, 23, 22, 19, 18, 17, 16, 16, - 21, 23, 22, 19, 18, 17, 16, 16, 20, 22, 22, 19, 17, 16, 16, 15, 20, 22, - 22, 19, 17, 16, 16, 15, 20, 21, 22, 19, 17, 16, 15, 14, 20, 21, 22, 19, - 17, 16, 15, 14, 19, 21, 21, 19, 17, 15, 14, 14, 19, 20, 21, 19, 17, 15, - 14, 13, 19, 20, 21, 18, 16, 15, 14, 13, 18, 20, 20, 18, 16, 15, 14, 13, - 18, 19, 20, 18, 16, 14, 13, 13, 17, 19, 20, 18, 16, 14, 13, 12, 17, 19, - 19, 17, 16, 14, 13, 12, 16, 18, 19, 17, 15, 14, 13, 12, 16, 18, 19, 17, - 15, 14, 13, 12, - /* Size 32x8 */ - 32, 33, 33, 34, 34, 32, 31, 30, 28, 28, 24, 24, 22, 21, 21, 21, 21, 21, - 21, 20, 20, 20, 20, 19, 19, 19, 18, 18, 17, 17, 16, 16, 33, 33, 33, 32, - 32, 30, 28, 28, 26, 26, 24, 24, 22, 22, 22, 22, 23, 23, 23, 22, 22, 21, - 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 28, 27, 27, 26, 26, 25, 24, 23, - 22, 22, 22, 22, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, - 20, 20, 20, 19, 19, 19, 21, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, - 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 17, - 17, 17, 21, 22, 22, 22, 23, 23, 22, 23, 23, 22, 21, 21, 19, 19, 19, 18, - 18, 18, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 20, 20, - 20, 21, 21, 21, 22, 22, 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, - 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 18, 19, 19, 20, 20, 20, - 20, 20, 21, 21, 19, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 14, - 14, 14, 14, 13, 13, 13, 13, 13, 17, 18, 18, 18, 19, 19, 19, 19, 20, 19, - 18, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 14, 14, 14, 13, 13, 13, 13, - 12, 12, 12, 12 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 31, 24, 19, 31, 27, 22, 18, 24, 22, 16, 14, 19, 18, 14, 11, - /* Size 8x8 */ - 33, 32, 32, 30, 27, 22, 20, 16, 32, 32, 32, 30, 28, 23, 21, 17, 32, 32, - 29, 28, 26, 23, 21, 18, 30, 30, 28, 24, 22, 20, 18, 16, 27, 28, 26, 22, - 19, 17, 16, 14, 22, 23, 23, 20, 17, 15, 14, 12, 20, 21, 21, 18, 16, 14, - 12, 11, 16, 17, 18, 16, 14, 12, 11, 10, - /* Size 16x16 */ - 32, 33, 33, 33, 32, 32, 30, 28, 27, 25, 23, 21, 19, 18, 17, 16, 33, 32, - 32, 32, 32, 32, 30, 29, 27, 26, 24, 22, 20, 19, 18, 17, 33, 32, 32, 32, - 32, 32, 31, 30, 28, 27, 25, 23, 21, 19, 18, 17, 33, 32, 32, 31, 31, 31, - 29, 28, 27, 26, 24, 23, 21, 19, 18, 17, 32, 32, 32, 31, 30, 30, 28, 28, - 26, 26, 24, 23, 21, 19, 19, 17, 32, 32, 32, 31, 30, 29, 28, 27, 26, 25, - 24, 22, 21, 20, 19, 18, 30, 30, 31, 29, 28, 28, 26, 24, 23, 22, 22, 20, - 19, 18, 17, 16, 28, 29, 30, 28, 28, 27, 24, 21, 20, 20, 19, 18, 17, 16, - 16, 15, 27, 27, 28, 27, 26, 26, 23, 20, 20, 19, 18, 17, 16, 15, 15, 14, - 25, 26, 27, 26, 26, 25, 22, 20, 19, 18, 17, 16, 15, 15, 14, 14, 23, 24, - 25, 24, 24, 24, 22, 19, 18, 17, 16, 15, 14, 14, 13, 13, 21, 22, 23, 23, - 23, 22, 20, 18, 17, 16, 15, 14, 13, 13, 12, 12, 19, 20, 21, 21, 21, 21, - 19, 17, 16, 15, 14, 13, 12, 12, 12, 11, 18, 19, 19, 19, 19, 20, 18, 16, - 15, 15, 14, 13, 12, 11, 11, 11, 17, 18, 18, 18, 19, 19, 17, 16, 15, 14, - 13, 12, 12, 11, 11, 10, 16, 17, 17, 17, 17, 18, 16, 15, 14, 14, 13, 12, - 11, 11, 10, 10, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 31, 30, 30, 28, 28, 27, 26, - 25, 23, 23, 22, 21, 20, 19, 19, 18, 17, 17, 16, 16, 15, 33, 33, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 27, 27, 26, 24, 24, 22, - 22, 21, 20, 20, 18, 18, 17, 16, 16, 15, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 31, 30, 30, 29, 29, 27, 27, 26, 24, 24, 23, 22, 21, 20, 20, - 19, 18, 18, 17, 17, 15, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, - 30, 30, 29, 29, 28, 27, 26, 24, 24, 23, 23, 22, 20, 20, 19, 19, 18, 17, - 17, 16, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, - 28, 28, 27, 25, 25, 23, 23, 22, 21, 21, 19, 19, 18, 17, 17, 16, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 28, 28, 27, 25, - 25, 23, 23, 22, 21, 21, 19, 19, 18, 17, 17, 16, 33, 32, 32, 32, 32, 32, - 31, 31, 31, 31, 31, 30, 29, 29, 28, 28, 27, 26, 26, 24, 24, 23, 23, 22, - 21, 21, 19, 19, 18, 17, 17, 16, 33, 32, 32, 32, 32, 32, 31, 31, 31, 30, - 30, 29, 29, 28, 28, 28, 27, 26, 26, 24, 24, 23, 23, 22, 20, 20, 19, 19, - 18, 17, 17, 16, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 29, 28, 28, - 28, 28, 26, 26, 26, 24, 24, 23, 23, 22, 21, 21, 19, 19, 19, 17, 17, 16, - 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 26, 26, - 25, 24, 24, 23, 22, 22, 21, 21, 20, 19, 19, 18, 18, 17, 32, 32, 32, 32, - 32, 32, 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 26, 26, 25, 24, 24, 23, - 22, 22, 21, 21, 20, 19, 19, 18, 18, 17, 31, 31, 31, 31, 31, 31, 30, 29, - 29, 28, 28, 27, 26, 26, 24, 24, 24, 23, 23, 22, 22, 21, 20, 20, 19, 19, - 18, 18, 17, 17, 17, 16, 30, 30, 30, 30, 31, 31, 29, 29, 28, 28, 28, 26, - 26, 25, 24, 24, 23, 23, 22, 22, 22, 20, 20, 20, 19, 19, 18, 18, 17, 16, - 16, 15, 30, 30, 30, 30, 30, 30, 29, 28, 28, 28, 28, 26, 25, 24, 23, 23, - 22, 22, 21, 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 15, 28, 29, - 29, 29, 30, 30, 28, 28, 28, 27, 27, 24, 24, 23, 21, 21, 20, 20, 20, 19, - 19, 18, 18, 18, 17, 17, 16, 16, 16, 15, 15, 14, 28, 29, 29, 29, 30, 30, - 28, 28, 28, 27, 27, 24, 24, 23, 21, 21, 20, 20, 20, 19, 19, 18, 18, 18, - 17, 17, 16, 16, 16, 15, 15, 14, 27, 27, 27, 28, 28, 28, 27, 27, 26, 26, - 26, 24, 23, 22, 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 15, 15, - 15, 14, 14, 13, 26, 27, 27, 27, 28, 28, 26, 26, 26, 26, 26, 23, 23, 22, - 20, 20, 19, 19, 19, 18, 18, 17, 17, 17, 16, 16, 15, 15, 15, 14, 14, 13, - 25, 26, 26, 26, 27, 27, 26, 26, 26, 25, 25, 23, 22, 21, 20, 20, 19, 19, - 18, 17, 17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 14, 13, 23, 24, 24, 24, - 25, 25, 24, 24, 24, 24, 24, 22, 22, 20, 19, 19, 18, 18, 17, 16, 16, 16, - 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 23, 24, 24, 24, 25, 25, 24, 24, - 24, 24, 24, 22, 22, 20, 19, 19, 18, 18, 17, 16, 16, 16, 15, 15, 14, 14, - 14, 14, 13, 13, 13, 12, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 21, - 20, 20, 18, 18, 17, 17, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 13, 12, - 12, 12, 21, 22, 22, 23, 23, 23, 23, 23, 23, 22, 22, 20, 20, 19, 18, 18, - 17, 17, 16, 15, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 12, 12, 20, 21, - 21, 22, 22, 22, 22, 22, 22, 22, 22, 20, 20, 19, 18, 18, 17, 17, 16, 15, - 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 11, 19, 20, 20, 20, 21, 21, - 21, 20, 21, 21, 21, 19, 19, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, - 12, 12, 12, 12, 12, 11, 11, 11, 19, 20, 20, 20, 21, 21, 21, 20, 21, 21, - 21, 19, 19, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, - 12, 11, 11, 11, 18, 18, 19, 19, 19, 19, 19, 19, 19, 20, 20, 18, 18, 17, - 16, 16, 15, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 10, - 17, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 17, 16, 16, 15, 15, - 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 17, 17, 18, 18, - 18, 18, 18, 18, 19, 19, 19, 17, 17, 17, 16, 16, 15, 15, 14, 13, 13, 13, - 12, 12, 12, 12, 11, 11, 11, 10, 10, 10, 16, 16, 17, 17, 17, 17, 17, 17, - 17, 18, 18, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, - 11, 10, 10, 10, 10, 9, 16, 16, 17, 17, 17, 17, 17, 17, 17, 18, 18, 17, - 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, - 10, 9, 15, 15, 15, 16, 16, 16, 16, 16, 16, 17, 17, 16, 15, 15, 14, 14, - 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, - /* Size 4x8 */ - 32, 32, 24, 18, 32, 31, 25, 19, 32, 29, 24, 20, 30, 28, 20, 17, 27, 26, - 18, 15, 23, 23, 16, 13, 20, 20, 14, 12, 17, 18, 13, 11, - /* Size 8x4 */ - 32, 32, 32, 30, 27, 23, 20, 17, 32, 31, 29, 28, 26, 23, 20, 18, 24, 25, - 24, 20, 18, 16, 14, 13, 18, 19, 20, 17, 15, 13, 12, 11, - /* Size 8x16 */ - 32, 33, 32, 29, 26, 23, 19, 16, 33, 32, 32, 29, 27, 24, 20, 17, 33, 32, - 31, 30, 28, 25, 21, 17, 33, 32, 30, 29, 27, 24, 21, 17, 32, 32, 30, 28, - 26, 24, 21, 18, 32, 31, 29, 28, 26, 24, 21, 18, 30, 30, 28, 25, 23, 21, - 19, 16, 28, 30, 27, 22, 20, 19, 17, 15, 27, 28, 26, 22, 20, 18, 16, 14, - 25, 26, 25, 21, 19, 17, 15, 13, 23, 25, 24, 20, 18, 16, 14, 13, 21, 23, - 22, 19, 17, 15, 13, 12, 19, 21, 20, 18, 16, 14, 12, 11, 18, 19, 19, 17, - 15, 14, 12, 11, 17, 18, 18, 16, 15, 13, 12, 10, 16, 17, 18, 16, 14, 13, - 11, 10, - /* Size 16x8 */ - 32, 33, 33, 33, 32, 32, 30, 28, 27, 25, 23, 21, 19, 18, 17, 16, 33, 32, - 32, 32, 32, 31, 30, 30, 28, 26, 25, 23, 21, 19, 18, 17, 32, 32, 31, 30, - 30, 29, 28, 27, 26, 25, 24, 22, 20, 19, 18, 18, 29, 29, 30, 29, 28, 28, - 25, 22, 22, 21, 20, 19, 18, 17, 16, 16, 26, 27, 28, 27, 26, 26, 23, 20, - 20, 19, 18, 17, 16, 15, 15, 14, 23, 24, 25, 24, 24, 24, 21, 19, 18, 17, - 16, 15, 14, 14, 13, 13, 19, 20, 21, 21, 21, 21, 19, 17, 16, 15, 14, 13, - 12, 12, 12, 11, 16, 17, 17, 17, 18, 18, 16, 15, 14, 13, 13, 12, 11, 11, - 10, 10, - /* Size 16x32 */ - 32, 33, 33, 33, 32, 32, 29, 28, 26, 23, 23, 20, 19, 18, 16, 16, 33, 32, - 32, 32, 32, 32, 29, 29, 27, 24, 24, 21, 20, 18, 16, 16, 33, 32, 32, 32, - 32, 32, 29, 29, 27, 24, 24, 21, 20, 19, 17, 17, 33, 32, 32, 32, 32, 32, - 30, 29, 28, 25, 25, 21, 20, 19, 17, 17, 33, 32, 32, 32, 31, 31, 30, 30, - 28, 25, 25, 22, 21, 19, 17, 17, 33, 32, 32, 32, 31, 31, 30, 30, 28, 25, - 25, 22, 21, 19, 17, 17, 33, 32, 32, 31, 30, 30, 29, 28, 27, 24, 24, 21, - 21, 19, 17, 17, 32, 32, 32, 31, 30, 30, 28, 28, 27, 24, 24, 21, 20, 19, - 17, 17, 32, 32, 32, 31, 30, 30, 28, 28, 26, 24, 24, 21, 21, 19, 18, 18, - 32, 32, 31, 30, 29, 29, 28, 27, 26, 24, 24, 21, 21, 20, 18, 18, 32, 32, - 31, 30, 29, 29, 28, 27, 26, 24, 24, 21, 21, 20, 18, 18, 31, 31, 31, 29, - 28, 28, 26, 25, 24, 22, 22, 20, 19, 18, 17, 17, 30, 30, 30, 29, 28, 28, - 25, 24, 23, 21, 21, 19, 19, 18, 16, 16, 30, 30, 30, 29, 28, 28, 24, 23, - 22, 20, 20, 19, 18, 17, 16, 16, 28, 29, 30, 28, 27, 27, 22, 21, 20, 19, - 19, 18, 17, 16, 15, 15, 28, 29, 30, 28, 27, 27, 22, 21, 20, 19, 19, 18, - 17, 16, 15, 15, 27, 28, 28, 27, 26, 26, 22, 20, 20, 18, 18, 17, 16, 15, - 14, 14, 26, 27, 28, 26, 26, 26, 21, 20, 19, 18, 18, 16, 16, 15, 14, 14, - 25, 26, 26, 26, 25, 25, 21, 20, 19, 17, 17, 16, 15, 15, 13, 13, 23, 25, - 25, 24, 24, 24, 20, 19, 18, 16, 16, 15, 14, 14, 13, 13, 23, 25, 25, 24, - 24, 24, 20, 19, 18, 16, 16, 15, 14, 14, 13, 13, 22, 23, 23, 23, 23, 23, - 19, 18, 17, 16, 16, 14, 14, 13, 12, 12, 21, 23, 23, 23, 22, 22, 19, 18, - 17, 15, 15, 14, 13, 13, 12, 12, 20, 22, 22, 22, 22, 22, 19, 18, 17, 15, - 15, 13, 13, 12, 12, 12, 19, 20, 21, 20, 20, 20, 18, 17, 16, 14, 14, 13, - 12, 12, 11, 11, 19, 20, 21, 20, 20, 20, 18, 17, 16, 14, 14, 13, 12, 12, - 11, 11, 18, 19, 19, 19, 19, 19, 17, 16, 15, 14, 14, 12, 12, 11, 11, 11, - 18, 19, 19, 19, 19, 19, 17, 16, 15, 14, 14, 12, 12, 11, 10, 10, 17, 18, - 18, 18, 18, 18, 16, 16, 15, 13, 13, 12, 12, 11, 10, 10, 16, 17, 17, 17, - 18, 18, 16, 15, 14, 13, 13, 12, 11, 11, 10, 10, 16, 17, 17, 17, 18, 18, - 16, 15, 14, 13, 13, 12, 11, 11, 10, 10, 15, 16, 16, 16, 17, 17, 15, 14, - 13, 12, 12, 11, 11, 10, 9, 9, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 31, 30, 30, 28, 28, 27, 26, - 25, 23, 23, 22, 21, 20, 19, 19, 18, 18, 17, 16, 16, 15, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 28, 27, 26, 25, 25, 23, - 23, 22, 20, 20, 19, 19, 18, 17, 17, 16, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 31, 30, 30, 30, 30, 28, 28, 26, 25, 25, 23, 23, 22, 21, 21, - 19, 19, 18, 17, 17, 16, 33, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 29, - 29, 29, 28, 28, 27, 26, 26, 24, 24, 23, 23, 22, 20, 20, 19, 19, 18, 17, - 17, 16, 32, 32, 32, 32, 31, 31, 30, 30, 30, 29, 29, 28, 28, 28, 27, 27, - 26, 26, 25, 24, 24, 23, 22, 22, 20, 20, 19, 19, 18, 18, 18, 17, 32, 32, - 32, 32, 31, 31, 30, 30, 30, 29, 29, 28, 28, 28, 27, 27, 26, 26, 25, 24, - 24, 23, 22, 22, 20, 20, 19, 19, 18, 18, 18, 17, 29, 29, 29, 30, 30, 30, - 29, 28, 28, 28, 28, 26, 25, 24, 22, 22, 22, 21, 21, 20, 20, 19, 19, 19, - 18, 18, 17, 17, 16, 16, 16, 15, 28, 29, 29, 29, 30, 30, 28, 28, 28, 27, - 27, 25, 24, 23, 21, 21, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 16, 16, - 16, 15, 15, 14, 26, 27, 27, 28, 28, 28, 27, 27, 26, 26, 26, 24, 23, 22, - 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 15, 15, 15, 14, 14, 13, - 23, 24, 24, 25, 25, 25, 24, 24, 24, 24, 24, 22, 21, 20, 19, 19, 18, 18, - 17, 16, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 23, 24, 24, 25, - 25, 25, 24, 24, 24, 24, 24, 22, 21, 20, 19, 19, 18, 18, 17, 16, 16, 16, - 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 20, 21, 21, 21, 22, 22, 21, 21, - 21, 21, 21, 20, 19, 19, 18, 18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, - 12, 12, 12, 12, 12, 11, 19, 20, 20, 20, 21, 21, 21, 20, 21, 21, 21, 19, - 19, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 11, - 11, 11, 18, 18, 19, 19, 19, 19, 19, 19, 19, 20, 20, 18, 18, 17, 16, 16, - 15, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 10, 16, 16, - 17, 17, 17, 17, 17, 17, 18, 18, 18, 17, 16, 16, 15, 15, 14, 14, 13, 13, - 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 16, 16, 17, 17, 17, 17, - 17, 17, 18, 18, 18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, - 11, 11, 11, 10, 10, 10, 10, 9, - /* Size 4x16 */ - 33, 32, 23, 18, 32, 32, 24, 19, 32, 31, 25, 19, 32, 30, 24, 19, 32, 30, - 24, 19, 32, 29, 24, 20, 30, 28, 21, 18, 29, 27, 19, 16, 28, 26, 18, 15, - 26, 25, 17, 15, 25, 24, 16, 14, 23, 22, 15, 13, 20, 20, 14, 12, 19, 19, - 14, 11, 18, 18, 13, 11, 17, 18, 13, 11, - /* Size 16x4 */ - 33, 32, 32, 32, 32, 32, 30, 29, 28, 26, 25, 23, 20, 19, 18, 17, 32, 32, - 31, 30, 30, 29, 28, 27, 26, 25, 24, 22, 20, 19, 18, 18, 23, 24, 25, 24, - 24, 24, 21, 19, 18, 17, 16, 15, 14, 14, 13, 13, 18, 19, 19, 19, 19, 20, - 18, 16, 15, 15, 14, 13, 12, 11, 11, 11, - /* Size 8x32 */ - 32, 33, 32, 29, 26, 23, 19, 16, 33, 32, 32, 29, 27, 24, 20, 16, 33, 32, - 32, 29, 27, 24, 20, 17, 33, 32, 32, 30, 28, 25, 20, 17, 33, 32, 31, 30, - 28, 25, 21, 17, 33, 32, 31, 30, 28, 25, 21, 17, 33, 32, 30, 29, 27, 24, - 21, 17, 32, 32, 30, 28, 27, 24, 20, 17, 32, 32, 30, 28, 26, 24, 21, 18, - 32, 31, 29, 28, 26, 24, 21, 18, 32, 31, 29, 28, 26, 24, 21, 18, 31, 31, - 28, 26, 24, 22, 19, 17, 30, 30, 28, 25, 23, 21, 19, 16, 30, 30, 28, 24, - 22, 20, 18, 16, 28, 30, 27, 22, 20, 19, 17, 15, 28, 30, 27, 22, 20, 19, - 17, 15, 27, 28, 26, 22, 20, 18, 16, 14, 26, 28, 26, 21, 19, 18, 16, 14, - 25, 26, 25, 21, 19, 17, 15, 13, 23, 25, 24, 20, 18, 16, 14, 13, 23, 25, - 24, 20, 18, 16, 14, 13, 22, 23, 23, 19, 17, 16, 14, 12, 21, 23, 22, 19, - 17, 15, 13, 12, 20, 22, 22, 19, 17, 15, 13, 12, 19, 21, 20, 18, 16, 14, - 12, 11, 19, 21, 20, 18, 16, 14, 12, 11, 18, 19, 19, 17, 15, 14, 12, 11, - 18, 19, 19, 17, 15, 14, 12, 10, 17, 18, 18, 16, 15, 13, 12, 10, 16, 17, - 18, 16, 14, 13, 11, 10, 16, 17, 18, 16, 14, 13, 11, 10, 15, 16, 17, 15, - 13, 12, 11, 9, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 31, 30, 30, 28, 28, 27, 26, - 25, 23, 23, 22, 21, 20, 19, 19, 18, 18, 17, 16, 16, 15, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 28, 28, 26, 25, 25, 23, - 23, 22, 21, 21, 19, 19, 18, 17, 17, 16, 32, 32, 32, 32, 31, 31, 30, 30, - 30, 29, 29, 28, 28, 28, 27, 27, 26, 26, 25, 24, 24, 23, 22, 22, 20, 20, - 19, 19, 18, 18, 18, 17, 29, 29, 29, 30, 30, 30, 29, 28, 28, 28, 28, 26, - 25, 24, 22, 22, 22, 21, 21, 20, 20, 19, 19, 19, 18, 18, 17, 17, 16, 16, - 16, 15, 26, 27, 27, 28, 28, 28, 27, 27, 26, 26, 26, 24, 23, 22, 20, 20, - 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 15, 15, 15, 14, 14, 13, 23, 24, - 24, 25, 25, 25, 24, 24, 24, 24, 24, 22, 21, 20, 19, 19, 18, 18, 17, 16, - 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 19, 20, 20, 20, 21, 21, - 21, 20, 21, 21, 21, 19, 19, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, - 12, 12, 12, 12, 12, 11, 11, 11, 16, 16, 17, 17, 17, 17, 17, 17, 18, 18, - 18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, - 10, 10, 10, 9 }, - { /* Chroma */ - /* Size 4x4 */ - 33, 25, 22, 20, 25, 21, 21, 20, 22, 21, 18, 17, 20, 20, 17, 14, - /* Size 8x8 */ - 33, 33, 27, 23, 22, 21, 20, 19, 33, 32, 26, 23, 23, 22, 22, 20, 27, 26, - 22, 22, 22, 22, 22, 20, 23, 23, 22, 20, 20, 20, 20, 19, 22, 23, 22, 20, - 19, 18, 18, 17, 21, 22, 22, 20, 18, 17, 16, 16, 20, 22, 22, 20, 18, 16, - 16, 15, 19, 20, 20, 19, 17, 16, 15, 13, - /* Size 16x16 */ - 32, 33, 34, 31, 30, 28, 25, 21, 21, 21, 21, 20, 20, 19, 19, 18, 33, 33, - 33, 30, 28, 27, 24, 22, 22, 22, 22, 21, 20, 20, 19, 19, 34, 33, 32, 30, - 28, 26, 24, 22, 23, 23, 23, 22, 22, 21, 20, 20, 31, 30, 30, 28, 26, 24, - 23, 22, 22, 22, 23, 22, 22, 21, 20, 20, 30, 28, 28, 26, 24, 23, 22, 22, - 22, 22, 23, 22, 22, 21, 21, 20, 28, 27, 26, 24, 23, 22, 22, 21, 22, 22, - 23, 22, 22, 21, 21, 20, 25, 24, 24, 23, 22, 22, 21, 20, 20, 21, 21, 20, - 20, 20, 20, 19, 21, 22, 22, 22, 22, 21, 20, 19, 19, 19, 19, 19, 19, 19, - 18, 18, 21, 22, 23, 22, 22, 22, 20, 19, 19, 19, 19, 18, 18, 18, 18, 17, - 21, 22, 23, 22, 22, 22, 21, 19, 19, 19, 18, 18, 17, 17, 17, 17, 21, 22, - 23, 23, 23, 23, 21, 19, 19, 18, 18, 17, 17, 17, 16, 16, 20, 21, 22, 22, - 22, 22, 20, 19, 18, 18, 17, 17, 16, 16, 16, 15, 20, 20, 22, 22, 22, 22, - 20, 19, 18, 17, 17, 16, 16, 15, 15, 15, 19, 20, 21, 21, 21, 21, 20, 19, - 18, 17, 17, 16, 15, 15, 14, 14, 19, 19, 20, 20, 21, 21, 20, 18, 18, 17, - 16, 16, 15, 14, 14, 14, 18, 19, 20, 20, 20, 20, 19, 18, 17, 17, 16, 15, - 15, 14, 14, 13, - /* Size 32x32 */ - 32, 33, 33, 33, 34, 34, 31, 31, 30, 28, 28, 26, 25, 23, 21, 21, 21, 21, - 21, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 18, 18, 17, 33, 33, 33, 33, - 33, 33, 31, 30, 28, 27, 27, 25, 24, 23, 21, 21, 22, 22, 22, 22, 22, 21, - 21, 21, 20, 20, 20, 20, 19, 19, 19, 18, 33, 33, 33, 33, 33, 33, 30, 30, - 28, 27, 27, 25, 24, 23, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, - 20, 20, 19, 19, 19, 18, 33, 33, 33, 33, 33, 33, 30, 29, 28, 26, 26, 25, - 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 19, - 19, 19, 34, 33, 33, 33, 32, 32, 30, 29, 28, 26, 26, 24, 24, 23, 22, 22, - 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 34, 33, - 33, 33, 32, 32, 30, 29, 28, 26, 26, 24, 24, 23, 22, 22, 23, 23, 23, 23, - 23, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 31, 31, 30, 30, 30, 30, - 28, 27, 26, 24, 24, 23, 23, 23, 22, 22, 22, 22, 22, 23, 23, 22, 22, 22, - 22, 22, 21, 21, 20, 20, 20, 19, 31, 30, 30, 29, 29, 29, 27, 26, 26, 24, - 24, 23, 23, 22, 22, 22, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 21, 21, - 20, 20, 20, 19, 30, 28, 28, 28, 28, 28, 26, 26, 24, 23, 23, 23, 22, 22, - 22, 22, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, - 28, 27, 27, 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 21, 21, 22, 22, - 22, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 28, 27, 27, 26, - 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 21, 21, 22, 22, 22, 23, 23, 22, - 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 26, 25, 25, 25, 24, 24, 23, 23, - 23, 22, 22, 21, 21, 21, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 20, 20, 20, 20, 20, 19, 25, 24, 24, 24, 24, 24, 23, 23, 22, 22, 22, 21, - 21, 21, 20, 20, 20, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 19, - 19, 19, 23, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 21, 21, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 19, 18, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 21, 21, 22, 22, 22, 22, - 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 18, 18, 18, 18, 21, 22, 22, 22, 23, 23, 22, 22, 22, 22, - 22, 21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 18, - 18, 17, 17, 17, 21, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 21, 21, 20, - 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, - 21, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 21, 21, 20, 19, 19, 19, 19, - 19, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 16, 21, 22, 22, 22, - 23, 23, 23, 23, 23, 23, 23, 21, 21, 20, 19, 19, 19, 19, 18, 18, 18, 17, - 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 21, 22, 22, 22, 23, 23, 23, 23, - 23, 23, 23, 21, 21, 20, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, - 17, 17, 16, 16, 16, 16, 20, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 21, - 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, - 16, 15, 20, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 19, 19, - 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 20, 21, - 21, 21, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 19, 19, 18, 18, 18, 17, - 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 15, 20, 20, 20, 21, 22, 22, - 22, 22, 22, 22, 22, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, - 16, 16, 15, 15, 15, 15, 15, 14, 20, 20, 20, 21, 22, 22, 22, 22, 22, 22, - 22, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, - 15, 15, 15, 14, 19, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 20, 20, 19, - 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, - 19, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 20, 20, 19, 19, 19, 18, 18, - 17, 17, 17, 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 19, 19, 19, 20, - 20, 20, 20, 20, 21, 21, 21, 20, 20, 19, 18, 18, 18, 17, 17, 16, 16, 16, - 16, 15, 15, 15, 14, 14, 14, 14, 14, 13, 18, 19, 19, 19, 20, 20, 20, 20, - 20, 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 15, 15, - 14, 14, 14, 13, 13, 13, 18, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, - 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 13, - 13, 13, 17, 18, 18, 19, 19, 19, 19, 19, 20, 20, 20, 19, 19, 18, 18, 18, - 17, 17, 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 13, 13, 13, 13, - /* Size 4x8 */ - 33, 27, 22, 20, 32, 26, 23, 21, 26, 22, 23, 21, 23, 22, 20, 19, 22, 22, - 18, 18, 22, 22, 17, 16, 21, 22, 17, 15, 19, 20, 16, 14, - /* Size 8x4 */ - 33, 32, 26, 23, 22, 22, 21, 19, 27, 26, 22, 22, 22, 22, 22, 20, 22, 23, - 23, 20, 18, 17, 17, 16, 20, 21, 21, 19, 18, 16, 15, 14, - /* Size 8x16 */ - 32, 33, 28, 23, 21, 21, 20, 18, 33, 33, 27, 23, 22, 22, 20, 19, 34, 32, - 26, 23, 23, 23, 21, 20, 31, 29, 24, 22, 22, 23, 22, 20, 29, 28, 23, 22, - 22, 23, 22, 20, 28, 26, 22, 22, 22, 23, 22, 20, 24, 24, 22, 21, 20, 21, - 20, 19, 21, 22, 21, 20, 19, 19, 19, 18, 21, 22, 22, 20, 19, 19, 18, 17, - 21, 23, 22, 20, 19, 18, 17, 17, 21, 23, 22, 20, 19, 18, 17, 16, 20, 22, - 22, 20, 18, 17, 16, 15, 20, 21, 22, 19, 18, 17, 16, 14, 19, 21, 21, 19, - 18, 17, 15, 14, 19, 20, 21, 19, 18, 16, 15, 14, 18, 20, 20, 19, 17, 16, - 15, 13, - /* Size 16x8 */ - 32, 33, 34, 31, 29, 28, 24, 21, 21, 21, 21, 20, 20, 19, 19, 18, 33, 33, - 32, 29, 28, 26, 24, 22, 22, 23, 23, 22, 21, 21, 20, 20, 28, 27, 26, 24, - 23, 22, 22, 21, 22, 22, 22, 22, 22, 21, 21, 20, 23, 23, 23, 22, 22, 22, - 21, 20, 20, 20, 20, 20, 19, 19, 19, 19, 21, 22, 23, 22, 22, 22, 20, 19, - 19, 19, 19, 18, 18, 18, 18, 17, 21, 22, 23, 23, 23, 23, 21, 19, 19, 18, - 18, 17, 17, 17, 16, 16, 20, 20, 21, 22, 22, 22, 20, 19, 18, 17, 17, 16, - 16, 15, 15, 15, 18, 19, 20, 20, 20, 20, 19, 18, 17, 17, 16, 15, 14, 14, - 14, 13, - /* Size 16x32 */ - 32, 33, 33, 31, 28, 28, 23, 21, 21, 21, 21, 20, 20, 19, 18, 18, 33, 33, - 33, 30, 27, 27, 23, 22, 22, 22, 22, 20, 20, 20, 19, 19, 33, 33, 33, 30, - 27, 27, 23, 22, 22, 22, 22, 21, 20, 20, 19, 19, 33, 33, 32, 30, 26, 26, - 23, 22, 22, 22, 22, 21, 21, 20, 19, 19, 34, 32, 32, 29, 26, 26, 23, 22, - 23, 23, 23, 22, 21, 21, 20, 20, 34, 32, 32, 29, 26, 26, 23, 22, 23, 23, - 23, 22, 21, 21, 20, 20, 31, 30, 29, 28, 24, 24, 22, 22, 22, 23, 23, 22, - 22, 21, 20, 20, 31, 29, 28, 27, 24, 24, 22, 22, 22, 22, 22, 22, 22, 21, - 20, 20, 29, 28, 28, 26, 23, 23, 22, 22, 22, 23, 23, 22, 22, 21, 20, 20, - 28, 26, 26, 24, 22, 22, 22, 22, 22, 23, 23, 22, 22, 21, 20, 20, 28, 26, - 26, 24, 22, 22, 22, 22, 22, 23, 23, 22, 22, 21, 20, 20, 25, 24, 24, 23, - 22, 22, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 24, 24, 24, 23, 22, 22, - 21, 20, 20, 21, 21, 20, 20, 20, 19, 19, 23, 23, 23, 23, 22, 22, 20, 20, - 20, 20, 20, 20, 20, 19, 19, 19, 21, 22, 22, 22, 21, 21, 20, 19, 19, 19, - 19, 19, 19, 19, 18, 18, 21, 22, 22, 22, 21, 21, 20, 19, 19, 19, 19, 19, - 19, 19, 18, 18, 21, 22, 22, 22, 22, 22, 20, 19, 19, 19, 19, 18, 18, 18, - 17, 17, 21, 22, 22, 22, 22, 22, 20, 19, 19, 18, 18, 18, 18, 18, 17, 17, - 21, 22, 23, 22, 22, 22, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 21, 22, - 23, 23, 22, 22, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 21, 22, 23, 23, - 22, 22, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 20, 22, 22, 22, 22, 22, - 20, 19, 18, 17, 17, 17, 16, 16, 16, 16, 20, 22, 22, 22, 22, 22, 20, 19, - 18, 17, 17, 16, 16, 16, 15, 15, 20, 21, 22, 22, 22, 22, 20, 19, 18, 17, - 17, 16, 16, 16, 15, 15, 20, 21, 21, 22, 22, 22, 19, 19, 18, 17, 17, 16, - 16, 15, 14, 14, 20, 21, 21, 22, 22, 22, 19, 19, 18, 17, 17, 16, 16, 15, - 14, 14, 19, 20, 21, 21, 21, 21, 19, 19, 18, 17, 17, 15, 15, 15, 14, 14, - 19, 20, 20, 21, 21, 21, 19, 19, 18, 17, 17, 15, 15, 15, 14, 14, 19, 20, - 20, 20, 21, 21, 19, 18, 18, 16, 16, 15, 15, 14, 14, 14, 18, 19, 20, 20, - 20, 20, 19, 18, 17, 16, 16, 15, 15, 14, 13, 13, 18, 19, 20, 20, 20, 20, - 19, 18, 17, 16, 16, 15, 15, 14, 13, 13, 17, 19, 19, 19, 20, 20, 18, 18, - 17, 16, 16, 15, 14, 14, 13, 13, - /* Size 32x16 */ - 32, 33, 33, 33, 34, 34, 31, 31, 29, 28, 28, 25, 24, 23, 21, 21, 21, 21, - 21, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 18, 18, 17, 33, 33, 33, 33, - 32, 32, 30, 29, 28, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 33, 33, 33, 32, 32, 32, 29, 28, - 28, 26, 26, 24, 24, 23, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 21, 21, - 21, 20, 20, 20, 20, 19, 31, 30, 30, 30, 29, 29, 28, 27, 26, 24, 24, 23, - 23, 23, 22, 22, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 21, 21, 20, 20, - 20, 19, 28, 27, 27, 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 28, 27, - 27, 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 21, 21, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 23, 23, 23, 23, 23, 23, - 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 19, 19, 19, 19, 19, 19, 19, 18, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 18, 18, 18, 18, 21, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 21, 20, 20, - 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17, - 21, 22, 22, 22, 23, 23, 23, 22, 23, 23, 23, 21, 21, 20, 19, 19, 19, 18, - 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 21, 22, 22, 22, - 23, 23, 23, 22, 23, 23, 23, 21, 21, 20, 19, 19, 19, 18, 18, 18, 18, 17, - 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 20, 20, 21, 21, 22, 22, 22, 22, - 22, 22, 22, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, - 15, 15, 15, 15, 15, 15, 20, 20, 20, 21, 21, 21, 22, 22, 22, 22, 22, 20, - 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 15, - 15, 14, 19, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 20, 20, 19, 19, 19, - 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 18, 19, - 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, - 16, 16, 15, 15, 14, 14, 14, 14, 14, 13, 13, 13, 18, 19, 19, 19, 20, 20, - 20, 20, 20, 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, - 14, 14, 14, 14, 14, 13, 13, 13, - /* Size 4x16 */ - 33, 28, 21, 19, 33, 27, 22, 20, 32, 26, 23, 21, 30, 24, 23, 21, 28, 23, - 23, 21, 26, 22, 23, 21, 24, 22, 21, 20, 22, 21, 19, 19, 22, 22, 19, 18, - 22, 22, 18, 17, 22, 22, 18, 17, 22, 22, 17, 16, 21, 22, 17, 15, 20, 21, - 17, 15, 20, 21, 16, 14, 19, 20, 16, 14, - /* Size 16x4 */ - 33, 33, 32, 30, 28, 26, 24, 22, 22, 22, 22, 22, 21, 20, 20, 19, 28, 27, - 26, 24, 23, 22, 22, 21, 22, 22, 22, 22, 22, 21, 21, 20, 21, 22, 23, 23, - 23, 23, 21, 19, 19, 18, 18, 17, 17, 17, 16, 16, 19, 20, 21, 21, 21, 21, - 20, 19, 18, 17, 17, 16, 15, 15, 14, 14, - /* Size 8x32 */ - 32, 33, 28, 23, 21, 21, 20, 18, 33, 33, 27, 23, 22, 22, 20, 19, 33, 33, - 27, 23, 22, 22, 20, 19, 33, 32, 26, 23, 22, 22, 21, 19, 34, 32, 26, 23, - 23, 23, 21, 20, 34, 32, 26, 23, 23, 23, 21, 20, 31, 29, 24, 22, 22, 23, - 22, 20, 31, 28, 24, 22, 22, 22, 22, 20, 29, 28, 23, 22, 22, 23, 22, 20, - 28, 26, 22, 22, 22, 23, 22, 20, 28, 26, 22, 22, 22, 23, 22, 20, 25, 24, - 22, 21, 21, 21, 20, 20, 24, 24, 22, 21, 20, 21, 20, 19, 23, 23, 22, 20, - 20, 20, 20, 19, 21, 22, 21, 20, 19, 19, 19, 18, 21, 22, 21, 20, 19, 19, - 19, 18, 21, 22, 22, 20, 19, 19, 18, 17, 21, 22, 22, 20, 19, 18, 18, 17, - 21, 23, 22, 20, 19, 18, 17, 17, 21, 23, 22, 20, 19, 18, 17, 16, 21, 23, - 22, 20, 19, 18, 17, 16, 20, 22, 22, 20, 18, 17, 16, 16, 20, 22, 22, 20, - 18, 17, 16, 15, 20, 22, 22, 20, 18, 17, 16, 15, 20, 21, 22, 19, 18, 17, - 16, 14, 20, 21, 22, 19, 18, 17, 16, 14, 19, 21, 21, 19, 18, 17, 15, 14, - 19, 20, 21, 19, 18, 17, 15, 14, 19, 20, 21, 19, 18, 16, 15, 14, 18, 20, - 20, 19, 17, 16, 15, 13, 18, 20, 20, 19, 17, 16, 15, 13, 17, 19, 20, 18, - 17, 16, 14, 13, - /* Size 32x8 */ - 32, 33, 33, 33, 34, 34, 31, 31, 29, 28, 28, 25, 24, 23, 21, 21, 21, 21, - 21, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 18, 18, 17, 33, 33, 33, 32, - 32, 32, 29, 28, 28, 26, 26, 24, 24, 23, 22, 22, 22, 22, 23, 23, 23, 22, - 22, 22, 21, 21, 21, 20, 20, 20, 20, 19, 28, 27, 27, 26, 26, 26, 24, 24, - 23, 22, 22, 22, 22, 22, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 21, 21, 21, 20, 20, 20, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 21, - 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, - 19, 18, 21, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 21, 20, 20, 19, 19, - 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17, 21, 22, - 22, 22, 23, 23, 23, 22, 23, 23, 23, 21, 21, 20, 19, 19, 19, 18, 18, 18, - 18, 17, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 20, 20, 20, 21, 21, 21, - 22, 22, 22, 22, 22, 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, - 16, 16, 15, 15, 15, 15, 15, 14, 18, 19, 19, 19, 20, 20, 20, 20, 20, 20, - 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 14, 14, 14, 14, - 14, 13, 13, 13 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 32, 27, 20, 32, 29, 26, 21, 27, 26, 19, 16, 20, 21, 16, 13, - /* Size 8x8 */ - 33, 32, 32, 30, 29, 25, 22, 19, 32, 32, 32, 31, 30, 26, 23, 20, 32, 32, - 30, 29, 28, 25, 23, 20, 30, 31, 29, 26, 24, 22, 20, 19, 29, 30, 28, 24, - 21, 19, 18, 17, 25, 26, 25, 22, 19, 17, 16, 15, 22, 23, 23, 20, 18, 16, - 14, 13, 19, 20, 20, 19, 17, 15, 13, 12, - /* Size 16x16 */ - 32, 33, 33, 33, 33, 32, 32, 30, 28, 27, 26, 23, 22, 21, 19, 17, 33, 32, - 32, 32, 32, 32, 31, 30, 29, 28, 27, 24, 23, 22, 20, 18, 33, 32, 32, 32, - 32, 32, 31, 31, 30, 28, 28, 25, 23, 22, 20, 19, 33, 32, 32, 32, 32, 31, - 31, 30, 29, 28, 27, 25, 23, 23, 21, 19, 33, 32, 32, 32, 31, 30, 30, 29, - 28, 27, 26, 24, 23, 22, 20, 19, 32, 32, 32, 31, 30, 29, 28, 28, 27, 26, - 26, 24, 23, 22, 21, 19, 32, 31, 31, 31, 30, 28, 28, 27, 26, 25, 24, 23, - 22, 21, 20, 19, 30, 30, 31, 30, 29, 28, 27, 26, 24, 23, 23, 22, 20, 20, - 19, 18, 28, 29, 30, 29, 28, 27, 26, 24, 21, 20, 20, 19, 18, 18, 17, 16, - 27, 28, 28, 28, 27, 26, 25, 23, 20, 20, 20, 18, 18, 17, 16, 15, 26, 27, - 28, 27, 26, 26, 24, 23, 20, 20, 19, 18, 17, 17, 16, 15, 23, 24, 25, 25, - 24, 24, 23, 22, 19, 18, 18, 16, 16, 15, 14, 14, 22, 23, 23, 23, 23, 23, - 22, 20, 18, 18, 17, 16, 15, 14, 14, 13, 21, 22, 22, 23, 22, 22, 21, 20, - 18, 17, 17, 15, 14, 14, 13, 13, 19, 20, 20, 21, 20, 21, 20, 19, 17, 16, - 16, 14, 14, 13, 12, 12, 17, 18, 19, 19, 19, 19, 19, 18, 16, 15, 15, 14, - 13, 13, 12, 11, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 30, 30, 29, 28, 28, - 27, 26, 26, 24, 23, 23, 22, 21, 21, 19, 19, 19, 17, 17, 33, 33, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 29, 29, 28, 26, 26, 25, - 24, 24, 22, 22, 21, 20, 20, 19, 18, 18, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 30, 30, 30, 29, 29, 28, 27, 27, 25, 24, 24, 23, 22, - 22, 20, 20, 19, 18, 18, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 30, 30, 30, 29, 29, 28, 27, 27, 25, 24, 24, 23, 22, 22, 20, 20, 20, - 18, 18, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, - 30, 30, 28, 28, 28, 26, 25, 25, 23, 23, 22, 21, 20, 20, 19, 19, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 28, 28, - 28, 26, 25, 25, 23, 23, 23, 21, 21, 20, 19, 19, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 29, 29, 28, 27, 27, 26, 25, 25, - 23, 23, 23, 21, 21, 20, 19, 19, 33, 32, 32, 32, 32, 32, 32, 31, 31, 31, - 30, 30, 30, 29, 29, 29, 28, 28, 27, 26, 26, 25, 24, 24, 23, 23, 22, 21, - 20, 20, 19, 19, 33, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 29, - 29, 28, 28, 28, 27, 26, 26, 25, 24, 24, 23, 23, 22, 21, 20, 20, 19, 19, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, - 27, 26, 26, 25, 24, 24, 23, 23, 22, 21, 21, 20, 19, 19, 32, 32, 32, 32, - 32, 32, 31, 30, 30, 30, 29, 29, 28, 28, 28, 28, 27, 27, 26, 26, 26, 24, - 24, 24, 23, 22, 22, 21, 21, 20, 19, 19, 32, 32, 32, 32, 32, 32, 31, 30, - 30, 30, 29, 29, 28, 28, 28, 28, 27, 27, 26, 26, 26, 24, 24, 24, 23, 22, - 22, 21, 21, 20, 19, 19, 32, 31, 31, 31, 31, 31, 31, 30, 30, 29, 28, 28, - 28, 27, 27, 26, 26, 26, 25, 24, 24, 23, 23, 23, 22, 22, 21, 20, 20, 20, - 19, 19, 30, 30, 30, 30, 31, 31, 30, 29, 29, 29, 28, 28, 27, 26, 26, 25, - 24, 24, 23, 23, 23, 22, 22, 21, 20, 20, 20, 19, 19, 19, 18, 18, 30, 30, - 30, 30, 31, 31, 30, 29, 29, 29, 28, 28, 27, 26, 26, 25, 24, 24, 23, 23, - 23, 22, 22, 21, 20, 20, 20, 19, 19, 19, 18, 18, 29, 30, 30, 30, 30, 30, - 30, 29, 28, 28, 28, 28, 26, 25, 25, 24, 23, 23, 22, 22, 22, 21, 20, 20, - 19, 19, 19, 18, 18, 18, 17, 17, 28, 29, 29, 29, 30, 30, 29, 28, 28, 28, - 27, 27, 26, 24, 24, 23, 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, - 17, 17, 16, 16, 28, 29, 29, 29, 30, 30, 29, 28, 28, 28, 27, 27, 26, 24, - 24, 23, 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, - 27, 28, 28, 28, 28, 28, 28, 27, 27, 27, 26, 26, 25, 23, 23, 22, 20, 20, - 20, 20, 20, 19, 18, 18, 18, 17, 17, 17, 16, 16, 15, 15, 26, 26, 27, 27, - 28, 28, 27, 26, 26, 26, 26, 26, 24, 23, 23, 22, 20, 20, 20, 19, 19, 18, - 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 26, 26, 27, 27, 28, 28, 27, 26, - 26, 26, 26, 26, 24, 23, 23, 22, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, - 17, 16, 16, 16, 15, 15, 24, 25, 25, 25, 26, 26, 26, 25, 25, 25, 24, 24, - 23, 22, 22, 21, 19, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 15, - 14, 14, 23, 24, 24, 24, 25, 25, 25, 24, 24, 24, 24, 24, 23, 22, 22, 20, - 19, 19, 18, 18, 18, 17, 16, 16, 16, 15, 15, 14, 14, 14, 14, 14, 23, 24, - 24, 24, 25, 25, 25, 24, 24, 24, 24, 24, 23, 21, 21, 20, 19, 19, 18, 18, - 18, 17, 16, 16, 16, 15, 15, 14, 14, 14, 13, 13, 22, 22, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 22, 20, 20, 19, 18, 18, 18, 17, 17, 16, 16, 16, - 15, 15, 14, 14, 14, 13, 13, 13, 21, 22, 22, 22, 23, 23, 23, 23, 23, 23, - 22, 22, 22, 20, 20, 19, 18, 18, 17, 17, 17, 16, 15, 15, 15, 14, 14, 14, - 13, 13, 13, 13, 21, 21, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 21, 20, - 20, 19, 18, 18, 17, 17, 17, 16, 15, 15, 14, 14, 14, 13, 13, 13, 13, 13, - 19, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 20, 19, 19, 18, 17, 17, - 17, 16, 16, 15, 14, 14, 14, 14, 13, 13, 13, 12, 12, 12, 19, 20, 20, 20, - 20, 21, 21, 20, 20, 21, 21, 21, 20, 19, 19, 18, 17, 17, 16, 16, 16, 15, - 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 19, 19, 19, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 19, 19, 18, 17, 17, 16, 16, 16, 15, 14, 14, 13, 13, - 13, 12, 12, 12, 12, 12, 17, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 18, 18, 17, 16, 16, 15, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 12, - 11, 11, 17, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 17, - 16, 16, 15, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 12, 11, 11, - /* Size 4x8 */ - 32, 32, 28, 20, 32, 31, 28, 21, 32, 30, 27, 21, 30, 28, 23, 19, 29, 27, - 21, 17, 26, 24, 19, 15, 22, 22, 17, 13, 20, 20, 16, 12, - /* Size 8x4 */ - 32, 32, 32, 30, 29, 26, 22, 20, 32, 31, 30, 28, 27, 24, 22, 20, 28, 28, - 27, 23, 21, 19, 17, 16, 20, 21, 21, 19, 17, 15, 13, 12, - /* Size 8x16 */ - 32, 33, 32, 32, 28, 23, 22, 19, 33, 32, 32, 31, 29, 24, 23, 20, 33, 32, - 32, 31, 29, 25, 23, 21, 33, 32, 31, 31, 29, 25, 23, 21, 32, 32, 30, 30, - 28, 24, 23, 20, 32, 31, 29, 28, 27, 24, 23, 21, 32, 31, 29, 28, 26, 23, - 22, 20, 30, 30, 28, 27, 24, 21, 20, 19, 28, 30, 28, 26, 21, 19, 18, 17, - 27, 28, 26, 25, 21, 18, 18, 16, 26, 28, 26, 24, 20, 18, 17, 16, 23, 25, - 24, 23, 19, 16, 16, 14, 22, 23, 23, 22, 18, 16, 15, 14, 21, 22, 22, 21, - 18, 15, 14, 13, 19, 21, 20, 20, 17, 14, 14, 12, 18, 19, 19, 19, 16, 14, - 13, 12, - /* Size 16x8 */ - 32, 33, 33, 33, 32, 32, 32, 30, 28, 27, 26, 23, 22, 21, 19, 18, 33, 32, - 32, 32, 32, 31, 31, 30, 30, 28, 28, 25, 23, 22, 21, 19, 32, 32, 32, 31, - 30, 29, 29, 28, 28, 26, 26, 24, 23, 22, 20, 19, 32, 31, 31, 31, 30, 28, - 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 28, 29, 29, 29, 28, 27, 26, 24, - 21, 21, 20, 19, 18, 18, 17, 16, 23, 24, 25, 25, 24, 24, 23, 21, 19, 18, - 18, 16, 16, 15, 14, 14, 22, 23, 23, 23, 23, 23, 22, 20, 18, 18, 17, 16, - 15, 14, 14, 13, 19, 20, 21, 21, 20, 21, 20, 19, 17, 16, 16, 14, 14, 13, - 12, 12, - /* Size 16x32 */ - 32, 33, 33, 33, 32, 32, 32, 29, 28, 27, 23, 23, 22, 19, 19, 17, 33, 32, - 32, 32, 32, 32, 31, 29, 29, 28, 24, 24, 22, 20, 20, 18, 33, 32, 32, 32, - 32, 32, 31, 29, 29, 28, 24, 24, 23, 20, 20, 18, 33, 32, 32, 32, 32, 32, - 31, 29, 29, 28, 24, 24, 23, 20, 20, 18, 33, 32, 32, 32, 32, 32, 31, 30, - 29, 28, 25, 25, 23, 21, 21, 19, 33, 32, 32, 32, 32, 31, 31, 30, 30, 28, - 25, 25, 23, 21, 21, 19, 33, 32, 32, 32, 31, 31, 31, 29, 29, 28, 25, 25, - 23, 21, 21, 19, 32, 32, 32, 32, 31, 30, 30, 28, 28, 27, 24, 24, 23, 21, - 21, 19, 32, 32, 32, 31, 30, 30, 30, 28, 28, 27, 24, 24, 23, 20, 20, 19, - 32, 32, 32, 31, 30, 30, 29, 28, 28, 27, 24, 24, 23, 21, 21, 19, 32, 32, - 31, 31, 29, 29, 28, 27, 27, 26, 24, 24, 23, 21, 21, 19, 32, 32, 31, 31, - 29, 29, 28, 27, 27, 26, 24, 24, 23, 21, 21, 19, 32, 31, 31, 31, 29, 28, - 28, 26, 26, 25, 23, 23, 22, 20, 20, 19, 30, 30, 30, 30, 28, 28, 27, 24, - 24, 23, 21, 21, 20, 19, 19, 18, 30, 30, 30, 30, 28, 28, 27, 24, 24, 23, - 21, 21, 20, 19, 19, 18, 29, 30, 30, 30, 28, 28, 26, 23, 23, 22, 20, 20, - 19, 18, 18, 17, 28, 29, 30, 29, 28, 27, 26, 22, 21, 21, 19, 19, 18, 17, - 17, 16, 28, 29, 30, 29, 28, 27, 26, 22, 21, 21, 19, 19, 18, 17, 17, 16, - 27, 28, 28, 28, 26, 26, 25, 21, 21, 20, 18, 18, 18, 16, 16, 15, 26, 27, - 28, 27, 26, 26, 24, 21, 20, 20, 18, 18, 17, 16, 16, 15, 26, 27, 28, 27, - 26, 26, 24, 21, 20, 20, 18, 18, 17, 16, 16, 15, 24, 26, 26, 26, 24, 24, - 23, 20, 20, 19, 17, 17, 16, 15, 15, 14, 23, 24, 25, 25, 24, 24, 23, 20, - 19, 18, 16, 16, 16, 14, 14, 14, 23, 24, 25, 25, 24, 24, 23, 20, 19, 18, - 16, 16, 16, 14, 14, 13, 22, 23, 23, 23, 23, 23, 22, 19, 18, 18, 16, 16, - 15, 14, 14, 13, 21, 22, 23, 23, 22, 22, 21, 19, 18, 17, 15, 15, 15, 13, - 13, 13, 21, 22, 22, 22, 22, 22, 21, 18, 18, 17, 15, 15, 14, 13, 13, 13, - 19, 20, 21, 21, 21, 21, 20, 18, 17, 17, 14, 14, 14, 13, 13, 12, 19, 20, - 21, 21, 20, 20, 20, 17, 17, 16, 14, 14, 14, 12, 12, 12, 19, 20, 20, 20, - 20, 20, 19, 17, 17, 16, 14, 14, 13, 12, 12, 12, 18, 19, 19, 19, 19, 19, - 19, 17, 16, 15, 14, 14, 13, 12, 12, 11, 18, 19, 19, 19, 19, 19, 19, 17, - 16, 15, 14, 14, 13, 12, 12, 11, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 30, 30, 29, 28, 28, - 27, 26, 26, 24, 23, 23, 22, 21, 21, 19, 19, 19, 18, 18, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 29, 29, 28, 27, 27, 26, - 24, 24, 23, 22, 22, 20, 20, 20, 19, 19, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 28, 28, 28, 26, 25, 25, 23, 23, - 22, 21, 21, 20, 19, 19, 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 31, 30, 30, 30, 29, 29, 28, 27, 27, 26, 25, 25, 23, 23, 22, 21, 21, 20, - 19, 19, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 29, 29, 29, 28, 28, 28, - 28, 28, 26, 26, 26, 24, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 32, 32, - 32, 32, 32, 31, 31, 30, 30, 30, 29, 29, 28, 28, 28, 28, 27, 27, 26, 26, - 26, 24, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 32, 31, 31, 31, 31, 31, - 31, 30, 30, 29, 28, 28, 28, 27, 27, 26, 26, 26, 25, 24, 24, 23, 23, 23, - 22, 21, 21, 20, 20, 19, 19, 19, 29, 29, 29, 29, 30, 30, 29, 28, 28, 28, - 27, 27, 26, 24, 24, 23, 22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 18, 18, - 17, 17, 17, 17, 28, 29, 29, 29, 29, 30, 29, 28, 28, 28, 27, 27, 26, 24, - 24, 23, 21, 21, 21, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, - 27, 28, 28, 28, 28, 28, 28, 27, 27, 27, 26, 26, 25, 23, 23, 22, 21, 21, - 20, 20, 20, 19, 18, 18, 18, 17, 17, 17, 16, 16, 15, 15, 23, 24, 24, 24, - 25, 25, 25, 24, 24, 24, 24, 24, 23, 21, 21, 20, 19, 19, 18, 18, 18, 17, - 16, 16, 16, 15, 15, 14, 14, 14, 14, 14, 23, 24, 24, 24, 25, 25, 25, 24, - 24, 24, 24, 24, 23, 21, 21, 20, 19, 19, 18, 18, 18, 17, 16, 16, 16, 15, - 15, 14, 14, 14, 14, 14, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 22, 20, 20, 19, 18, 18, 18, 17, 17, 16, 16, 16, 15, 15, 14, 14, 14, 13, - 13, 13, 19, 20, 20, 20, 21, 21, 21, 21, 20, 21, 21, 21, 20, 19, 19, 18, - 17, 17, 16, 16, 16, 15, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 19, 20, - 20, 20, 21, 21, 21, 21, 20, 21, 21, 21, 20, 19, 19, 18, 17, 17, 16, 16, - 16, 15, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 17, 18, 18, 18, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 18, 18, 17, 16, 16, 15, 15, 15, 14, 14, 13, - 13, 13, 13, 12, 12, 12, 11, 11, - /* Size 4x16 */ - 33, 32, 27, 19, 32, 32, 28, 20, 32, 32, 28, 21, 32, 31, 28, 21, 32, 30, - 27, 20, 32, 29, 26, 21, 31, 28, 25, 20, 30, 28, 23, 19, 29, 27, 21, 17, - 28, 26, 20, 16, 27, 26, 20, 16, 24, 24, 18, 14, 23, 23, 18, 14, 22, 22, - 17, 13, 20, 20, 16, 12, 19, 19, 15, 12, - /* Size 16x4 */ - 33, 32, 32, 32, 32, 32, 31, 30, 29, 28, 27, 24, 23, 22, 20, 19, 32, 32, - 32, 31, 30, 29, 28, 28, 27, 26, 26, 24, 23, 22, 20, 19, 27, 28, 28, 28, - 27, 26, 25, 23, 21, 20, 20, 18, 18, 17, 16, 15, 19, 20, 21, 21, 20, 21, - 20, 19, 17, 16, 16, 14, 14, 13, 12, 12, - /* Size 8x32 */ - 32, 33, 32, 32, 28, 23, 22, 19, 33, 32, 32, 31, 29, 24, 22, 20, 33, 32, - 32, 31, 29, 24, 23, 20, 33, 32, 32, 31, 29, 24, 23, 20, 33, 32, 32, 31, - 29, 25, 23, 21, 33, 32, 32, 31, 30, 25, 23, 21, 33, 32, 31, 31, 29, 25, - 23, 21, 32, 32, 31, 30, 28, 24, 23, 21, 32, 32, 30, 30, 28, 24, 23, 20, - 32, 32, 30, 29, 28, 24, 23, 21, 32, 31, 29, 28, 27, 24, 23, 21, 32, 31, - 29, 28, 27, 24, 23, 21, 32, 31, 29, 28, 26, 23, 22, 20, 30, 30, 28, 27, - 24, 21, 20, 19, 30, 30, 28, 27, 24, 21, 20, 19, 29, 30, 28, 26, 23, 20, - 19, 18, 28, 30, 28, 26, 21, 19, 18, 17, 28, 30, 28, 26, 21, 19, 18, 17, - 27, 28, 26, 25, 21, 18, 18, 16, 26, 28, 26, 24, 20, 18, 17, 16, 26, 28, - 26, 24, 20, 18, 17, 16, 24, 26, 24, 23, 20, 17, 16, 15, 23, 25, 24, 23, - 19, 16, 16, 14, 23, 25, 24, 23, 19, 16, 16, 14, 22, 23, 23, 22, 18, 16, - 15, 14, 21, 23, 22, 21, 18, 15, 15, 13, 21, 22, 22, 21, 18, 15, 14, 13, - 19, 21, 21, 20, 17, 14, 14, 13, 19, 21, 20, 20, 17, 14, 14, 12, 19, 20, - 20, 19, 17, 14, 13, 12, 18, 19, 19, 19, 16, 14, 13, 12, 18, 19, 19, 19, - 16, 14, 13, 12, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 30, 30, 29, 28, 28, - 27, 26, 26, 24, 23, 23, 22, 21, 21, 19, 19, 19, 18, 18, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 28, 28, 28, 26, - 25, 25, 23, 23, 22, 21, 21, 20, 19, 19, 32, 32, 32, 32, 32, 32, 31, 31, - 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 26, 26, 26, 24, 24, 24, 23, 22, - 22, 21, 20, 20, 19, 19, 32, 31, 31, 31, 31, 31, 31, 30, 30, 29, 28, 28, - 28, 27, 27, 26, 26, 26, 25, 24, 24, 23, 23, 23, 22, 21, 21, 20, 20, 19, - 19, 19, 28, 29, 29, 29, 29, 30, 29, 28, 28, 28, 27, 27, 26, 24, 24, 23, - 21, 21, 21, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 23, 24, - 24, 24, 25, 25, 25, 24, 24, 24, 24, 24, 23, 21, 21, 20, 19, 19, 18, 18, - 18, 17, 16, 16, 16, 15, 15, 14, 14, 14, 14, 14, 22, 22, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 22, 20, 20, 19, 18, 18, 18, 17, 17, 16, 16, 16, - 15, 15, 14, 14, 14, 13, 13, 13, 19, 20, 20, 20, 21, 21, 21, 21, 20, 21, - 21, 21, 20, 19, 19, 18, 17, 17, 16, 16, 16, 15, 14, 14, 14, 13, 13, 13, - 12, 12, 12, 12 }, - { /* Chroma */ - /* Size 4x4 */ - 33, 27, 22, 21, 27, 22, 22, 22, 22, 22, 19, 18, 21, 22, 18, 16, - /* Size 8x8 */ - 33, 33, 29, 24, 21, 22, 21, 20, 33, 32, 28, 24, 22, 23, 22, 21, 29, 28, - 25, 23, 22, 23, 22, 21, 24, 24, 23, 21, 20, 21, 20, 20, 21, 22, 22, 20, - 19, 19, 19, 19, 22, 23, 23, 21, 19, 18, 17, 17, 21, 22, 22, 20, 19, 17, - 17, 16, 20, 21, 21, 20, 19, 17, 16, 15, - /* Size 16x16 */ - 32, 33, 34, 33, 31, 28, 27, 25, 21, 21, 21, 21, 20, 20, 20, 19, 33, 33, - 33, 32, 30, 27, 26, 24, 22, 22, 22, 22, 21, 21, 20, 20, 34, 33, 33, 32, - 29, 26, 25, 24, 22, 22, 22, 23, 22, 22, 21, 20, 33, 32, 32, 31, 28, 26, - 25, 24, 22, 22, 23, 23, 22, 22, 22, 21, 31, 30, 29, 28, 26, 24, 23, 23, - 22, 22, 22, 23, 22, 22, 22, 21, 28, 27, 26, 26, 24, 22, 22, 22, 21, 22, - 22, 23, 22, 22, 22, 21, 27, 26, 25, 25, 23, 22, 22, 21, 21, 21, 21, 22, - 22, 22, 21, 21, 25, 24, 24, 24, 23, 22, 21, 21, 20, 20, 21, 21, 20, 20, - 20, 20, 21, 22, 22, 22, 22, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 19, - 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 19, 19, 19, 19, 18, 18, 21, 22, - 22, 23, 22, 22, 21, 21, 19, 19, 19, 19, 18, 18, 18, 18, 21, 22, 23, 23, - 23, 23, 22, 21, 19, 19, 19, 18, 17, 17, 17, 17, 20, 21, 22, 22, 22, 22, - 22, 20, 19, 19, 18, 17, 17, 17, 16, 16, 20, 21, 22, 22, 22, 22, 22, 20, - 19, 19, 18, 17, 17, 17, 16, 16, 20, 20, 21, 22, 22, 22, 21, 20, 19, 18, - 18, 17, 16, 16, 16, 15, 19, 20, 20, 21, 21, 21, 21, 20, 19, 18, 18, 17, - 16, 16, 15, 14, - /* Size 32x32 */ - 32, 33, 33, 33, 34, 34, 33, 31, 31, 30, 28, 28, 27, 25, 25, 23, 21, 21, - 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 33, 33, 33, 33, - 33, 33, 33, 30, 30, 29, 27, 27, 26, 24, 24, 23, 21, 21, 22, 22, 22, 22, - 22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 33, 33, 33, 33, 33, 33, 32, 30, - 30, 29, 27, 27, 26, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, - 21, 21, 20, 20, 20, 20, 33, 33, 33, 33, 33, 33, 32, 30, 30, 28, 27, 27, - 26, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 21, 20, - 20, 20, 34, 33, 33, 33, 33, 33, 32, 29, 29, 28, 26, 26, 25, 24, 24, 23, - 22, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 21, 21, 21, 20, 20, 34, 33, - 33, 33, 33, 32, 32, 29, 29, 28, 26, 26, 25, 24, 24, 23, 22, 22, 22, 23, - 23, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 33, 33, 32, 32, 32, 32, - 31, 29, 28, 28, 26, 26, 25, 24, 24, 23, 22, 22, 22, 23, 23, 23, 23, 23, - 22, 22, 22, 22, 22, 21, 21, 21, 31, 30, 30, 30, 29, 29, 29, 27, 27, 26, - 24, 24, 24, 23, 23, 22, 22, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, - 22, 21, 21, 21, 31, 30, 30, 30, 29, 29, 28, 27, 26, 26, 24, 24, 23, 23, - 23, 22, 22, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, - 30, 29, 29, 28, 28, 28, 28, 26, 26, 25, 23, 23, 23, 23, 23, 22, 22, 22, - 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 28, 27, 27, 27, - 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 22, 21, 21, 22, 22, 22, 23, - 23, 23, 22, 22, 22, 22, 22, 22, 21, 21, 28, 27, 27, 27, 26, 26, 26, 24, - 24, 23, 22, 22, 22, 22, 22, 22, 21, 21, 22, 22, 22, 23, 23, 23, 22, 22, - 22, 22, 22, 22, 21, 21, 27, 26, 26, 26, 25, 25, 25, 24, 23, 23, 22, 22, - 22, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 21, 21, 21, - 21, 21, 25, 24, 24, 24, 24, 24, 24, 23, 23, 23, 22, 22, 21, 21, 21, 21, - 20, 20, 20, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 25, 24, - 24, 24, 24, 24, 24, 23, 23, 23, 22, 22, 21, 21, 21, 21, 20, 20, 20, 21, - 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 23, 23, 23, 23, 23, 23, - 23, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 19, 19, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, - 21, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, - 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 21, 22, 22, 22, - 22, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, - 19, 18, 18, 18, 18, 18, 18, 18, 18, 18, 21, 22, 22, 22, 22, 23, 23, 22, - 22, 22, 22, 22, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 21, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, - 22, 21, 21, 20, 19, 19, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 17, - 17, 17, 21, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 22, 21, 21, 20, - 19, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 17, 21, 22, - 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 22, 21, 21, 20, 19, 19, 19, 18, - 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 16, 16, 20, 21, 21, 21, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, - 17, 17, 17, 16, 16, 16, 16, 16, 20, 21, 21, 21, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 20, 20, 20, 19, 19, 19, 18, 18, 17, 17, 17, 17, 17, 17, 16, - 16, 16, 16, 16, 20, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 20, - 20, 20, 19, 19, 19, 18, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, - 20, 20, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 19, 19, - 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 20, 20, 20, 21, - 21, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 19, 19, 18, 18, 18, 17, - 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 19, 20, 20, 20, 21, 21, 21, 21, - 21, 21, 22, 22, 21, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, - 16, 15, 15, 15, 15, 15, 19, 19, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, - 21, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, - 14, 14, 19, 19, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 20, 20, 19, - 19, 19, 18, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 14, 14, - /* Size 4x8 */ - 33, 27, 22, 20, 33, 26, 22, 21, 28, 23, 22, 22, 24, 22, 20, 20, 22, 21, - 19, 19, 22, 22, 19, 17, 21, 22, 19, 16, 20, 21, 18, 15, - /* Size 8x4 */ - 33, 33, 28, 24, 22, 22, 21, 20, 27, 26, 23, 22, 21, 22, 22, 21, 22, 22, - 22, 20, 19, 19, 19, 18, 20, 21, 22, 20, 19, 17, 16, 15, - /* Size 8x16 */ - 32, 33, 29, 27, 21, 21, 20, 20, 33, 33, 28, 26, 22, 22, 21, 20, 34, 32, - 27, 26, 22, 23, 22, 21, 33, 31, 27, 25, 22, 23, 22, 21, 31, 28, 25, 23, - 22, 22, 22, 22, 28, 26, 23, 22, 22, 23, 22, 22, 26, 25, 22, 22, 21, 22, - 22, 21, 24, 24, 22, 21, 20, 21, 20, 20, 21, 22, 21, 21, 19, 19, 19, 19, - 21, 22, 22, 21, 19, 19, 19, 18, 21, 22, 22, 21, 19, 18, 18, 18, 21, 23, - 23, 22, 19, 18, 17, 17, 20, 22, 22, 21, 19, 17, 17, 16, 20, 22, 22, 21, - 19, 17, 17, 16, 20, 21, 22, 21, 19, 17, 16, 16, 19, 20, 21, 20, 19, 17, - 16, 15, - /* Size 16x8 */ - 32, 33, 34, 33, 31, 28, 26, 24, 21, 21, 21, 21, 20, 20, 20, 19, 33, 33, - 32, 31, 28, 26, 25, 24, 22, 22, 22, 23, 22, 22, 21, 20, 29, 28, 27, 27, - 25, 23, 22, 22, 21, 22, 22, 23, 22, 22, 22, 21, 27, 26, 26, 25, 23, 22, - 22, 21, 21, 21, 21, 22, 21, 21, 21, 20, 21, 22, 22, 22, 22, 22, 21, 20, - 19, 19, 19, 19, 19, 19, 19, 19, 21, 22, 23, 23, 22, 23, 22, 21, 19, 19, - 18, 18, 17, 17, 17, 17, 20, 21, 22, 22, 22, 22, 22, 20, 19, 19, 18, 17, - 17, 17, 16, 16, 20, 20, 21, 21, 22, 22, 21, 20, 19, 18, 18, 17, 16, 16, - 16, 15, - /* Size 16x32 */ - 32, 33, 33, 33, 29, 28, 27, 22, 21, 21, 21, 21, 20, 20, 20, 19, 33, 33, - 33, 32, 28, 27, 26, 22, 22, 22, 21, 21, 21, 20, 20, 19, 33, 33, 33, 32, - 28, 27, 26, 22, 22, 22, 22, 22, 21, 20, 20, 20, 33, 33, 33, 32, 28, 27, - 26, 22, 22, 22, 22, 22, 21, 20, 20, 20, 34, 33, 32, 32, 27, 26, 26, 23, - 22, 22, 23, 23, 22, 21, 21, 20, 34, 33, 32, 31, 27, 26, 25, 23, 22, 22, - 23, 23, 22, 21, 21, 20, 33, 32, 31, 31, 27, 26, 25, 23, 22, 22, 23, 23, - 22, 21, 21, 20, 31, 29, 29, 28, 25, 24, 24, 22, 22, 22, 23, 23, 22, 22, - 22, 21, 31, 29, 28, 28, 25, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, - 30, 28, 28, 28, 24, 23, 23, 22, 22, 22, 23, 23, 22, 22, 22, 21, 28, 26, - 26, 25, 23, 22, 22, 22, 22, 22, 23, 23, 22, 22, 22, 21, 28, 26, 26, 25, - 23, 22, 22, 22, 22, 22, 23, 23, 22, 22, 22, 21, 26, 26, 25, 24, 22, 22, - 22, 21, 21, 21, 22, 22, 22, 21, 21, 20, 24, 24, 24, 24, 22, 22, 21, 20, - 20, 20, 21, 21, 20, 20, 20, 20, 24, 24, 24, 24, 22, 22, 21, 20, 20, 20, - 21, 21, 20, 20, 20, 20, 23, 23, 23, 23, 22, 22, 21, 20, 20, 20, 20, 20, - 20, 20, 20, 19, 21, 22, 22, 22, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, - 19, 19, 21, 22, 22, 22, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 19, - 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 19, 19, 19, 18, 18, 18, 21, 22, - 22, 22, 22, 22, 21, 20, 19, 19, 18, 18, 18, 18, 18, 17, 21, 22, 22, 22, - 22, 22, 21, 20, 19, 19, 18, 18, 18, 18, 18, 17, 21, 22, 23, 23, 22, 22, - 22, 20, 19, 19, 18, 18, 18, 17, 17, 17, 21, 22, 23, 23, 23, 22, 22, 20, - 19, 19, 18, 18, 17, 17, 17, 17, 21, 22, 23, 23, 22, 22, 22, 20, 19, 19, - 18, 18, 17, 17, 17, 16, 20, 22, 22, 22, 22, 22, 21, 19, 19, 19, 17, 17, - 17, 16, 16, 16, 20, 21, 22, 22, 22, 22, 21, 19, 19, 19, 17, 17, 17, 16, - 16, 16, 20, 21, 22, 22, 22, 22, 21, 19, 19, 19, 17, 17, 17, 16, 16, 16, - 20, 21, 21, 21, 22, 22, 21, 19, 19, 18, 17, 17, 16, 16, 16, 15, 20, 21, - 21, 21, 22, 22, 21, 19, 19, 18, 17, 17, 16, 16, 16, 15, 19, 20, 21, 21, - 21, 21, 21, 19, 19, 18, 17, 17, 16, 15, 15, 15, 19, 20, 20, 20, 21, 21, - 20, 19, 19, 18, 17, 17, 16, 15, 15, 14, 19, 20, 20, 20, 21, 21, 20, 19, - 19, 18, 17, 17, 16, 15, 15, 14, - /* Size 32x16 */ - 32, 33, 33, 33, 34, 34, 33, 31, 31, 30, 28, 28, 26, 24, 24, 23, 21, 21, - 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 33, 33, 33, 33, - 33, 33, 32, 29, 29, 28, 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 33, 33, 33, 33, 32, 32, 31, 29, - 28, 28, 26, 26, 25, 24, 24, 23, 22, 22, 22, 22, 22, 23, 23, 23, 22, 22, - 22, 21, 21, 21, 20, 20, 33, 32, 32, 32, 32, 31, 31, 28, 28, 28, 25, 25, - 24, 24, 24, 23, 22, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 21, 21, 21, - 20, 20, 29, 28, 28, 28, 27, 27, 27, 25, 25, 24, 23, 23, 22, 22, 22, 22, - 21, 21, 22, 22, 22, 22, 23, 22, 22, 22, 22, 22, 22, 21, 21, 21, 28, 27, - 27, 27, 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 22, 21, 21, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 27, 26, 26, 26, 26, 25, - 25, 24, 23, 23, 22, 22, 22, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, - 21, 21, 21, 21, 21, 21, 20, 20, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, - 22, 22, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, - 19, 19, 19, 19, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, - 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 21, 21, 22, 22, - 23, 23, 23, 23, 22, 23, 23, 23, 22, 21, 21, 20, 19, 19, 19, 18, 18, 18, - 18, 18, 17, 17, 17, 17, 17, 17, 17, 17, 21, 21, 22, 22, 23, 23, 23, 23, - 22, 23, 23, 23, 22, 21, 21, 20, 19, 19, 19, 18, 18, 18, 18, 18, 17, 17, - 17, 17, 17, 17, 17, 17, 20, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, - 16, 16, 20, 20, 20, 20, 21, 21, 21, 22, 22, 22, 22, 22, 21, 20, 20, 20, - 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 20, 20, - 20, 20, 21, 21, 21, 22, 22, 22, 22, 22, 21, 20, 20, 20, 19, 19, 18, 18, - 18, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 19, 19, 20, 20, 20, 20, - 20, 21, 21, 21, 21, 21, 20, 20, 20, 19, 19, 19, 18, 17, 17, 17, 17, 16, - 16, 16, 16, 15, 15, 15, 14, 14, - /* Size 4x16 */ - 33, 28, 21, 20, 33, 27, 22, 20, 33, 26, 22, 21, 32, 26, 22, 21, 29, 24, - 22, 22, 26, 22, 22, 22, 26, 22, 21, 21, 24, 22, 20, 20, 22, 21, 19, 19, - 22, 22, 19, 18, 22, 22, 19, 18, 22, 22, 19, 17, 22, 22, 19, 16, 21, 22, - 19, 16, 21, 22, 18, 16, 20, 21, 18, 15, - /* Size 16x4 */ - 33, 33, 33, 32, 29, 26, 26, 24, 22, 22, 22, 22, 22, 21, 21, 20, 28, 27, - 26, 26, 24, 22, 22, 22, 21, 22, 22, 22, 22, 22, 22, 21, 21, 22, 22, 22, - 22, 22, 21, 20, 19, 19, 19, 19, 19, 19, 18, 18, 20, 20, 21, 21, 22, 22, - 21, 20, 19, 18, 18, 17, 16, 16, 16, 15, - /* Size 8x32 */ - 32, 33, 29, 27, 21, 21, 20, 20, 33, 33, 28, 26, 22, 21, 21, 20, 33, 33, - 28, 26, 22, 22, 21, 20, 33, 33, 28, 26, 22, 22, 21, 20, 34, 32, 27, 26, - 22, 23, 22, 21, 34, 32, 27, 25, 22, 23, 22, 21, 33, 31, 27, 25, 22, 23, - 22, 21, 31, 29, 25, 24, 22, 23, 22, 22, 31, 28, 25, 23, 22, 22, 22, 22, - 30, 28, 24, 23, 22, 23, 22, 22, 28, 26, 23, 22, 22, 23, 22, 22, 28, 26, - 23, 22, 22, 23, 22, 22, 26, 25, 22, 22, 21, 22, 22, 21, 24, 24, 22, 21, - 20, 21, 20, 20, 24, 24, 22, 21, 20, 21, 20, 20, 23, 23, 22, 21, 20, 20, - 20, 20, 21, 22, 21, 21, 19, 19, 19, 19, 21, 22, 21, 21, 19, 19, 19, 19, - 21, 22, 22, 21, 19, 19, 19, 18, 21, 22, 22, 21, 19, 18, 18, 18, 21, 22, - 22, 21, 19, 18, 18, 18, 21, 23, 22, 22, 19, 18, 18, 17, 21, 23, 23, 22, - 19, 18, 17, 17, 21, 23, 22, 22, 19, 18, 17, 17, 20, 22, 22, 21, 19, 17, - 17, 16, 20, 22, 22, 21, 19, 17, 17, 16, 20, 22, 22, 21, 19, 17, 17, 16, - 20, 21, 22, 21, 19, 17, 16, 16, 20, 21, 22, 21, 19, 17, 16, 16, 19, 21, - 21, 21, 19, 17, 16, 15, 19, 20, 21, 20, 19, 17, 16, 15, 19, 20, 21, 20, - 19, 17, 16, 15, - /* Size 32x8 */ - 32, 33, 33, 33, 34, 34, 33, 31, 31, 30, 28, 28, 26, 24, 24, 23, 21, 21, - 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 33, 33, 33, 33, - 32, 32, 31, 29, 28, 28, 26, 26, 25, 24, 24, 23, 22, 22, 22, 22, 22, 23, - 23, 23, 22, 22, 22, 21, 21, 21, 20, 20, 29, 28, 28, 28, 27, 27, 27, 25, - 25, 24, 23, 23, 22, 22, 22, 22, 21, 21, 22, 22, 22, 22, 23, 22, 22, 22, - 22, 22, 22, 21, 21, 21, 27, 26, 26, 26, 26, 25, 25, 24, 23, 23, 22, 22, - 22, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 21, 21, 21, 21, 21, 21, - 20, 20, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, - 22, 22, 23, 23, 23, 23, 22, 23, 23, 23, 22, 21, 21, 20, 19, 19, 19, 18, - 18, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 17, 20, 21, 21, 21, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, - 17, 17, 17, 16, 16, 16, 16, 16, 20, 20, 20, 20, 21, 21, 21, 22, 22, 22, - 22, 22, 21, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, - 16, 15, 15, 15 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 32, 29, 24, 32, 30, 28, 24, 29, 28, 21, 19, 24, 24, 19, 16, - /* Size 8x8 */ - 33, 33, 32, 32, 30, 28, 24, 22, 33, 32, 32, 32, 30, 28, 25, 23, 32, 32, - 31, 30, 29, 27, 24, 23, 32, 32, 30, 29, 28, 26, 24, 22, 30, 30, 29, 28, - 25, 23, 21, 20, 28, 28, 27, 26, 23, 20, 18, 17, 24, 25, 24, 24, 21, 18, - 16, 15, 22, 23, 23, 22, 20, 17, 15, 14, - /* Size 16x16 */ - 32, 33, 33, 33, 33, 33, 32, 32, 30, 29, 28, 26, 25, 23, 22, 21, 33, 32, - 32, 32, 32, 32, 32, 31, 30, 29, 29, 27, 26, 24, 23, 22, 33, 32, 32, 32, - 32, 32, 32, 31, 30, 30, 29, 27, 26, 24, 23, 23, 33, 32, 32, 32, 32, 32, - 32, 31, 31, 30, 30, 28, 27, 25, 23, 23, 33, 32, 32, 32, 31, 31, 31, 30, - 29, 28, 28, 26, 26, 24, 23, 23, 33, 32, 32, 32, 31, 31, 30, 30, 29, 28, - 28, 26, 26, 24, 23, 23, 32, 32, 32, 32, 31, 30, 29, 28, 28, 27, 27, 26, - 25, 24, 23, 22, 32, 31, 31, 31, 30, 30, 28, 28, 27, 26, 26, 24, 24, 23, - 22, 22, 30, 30, 30, 31, 29, 29, 28, 27, 26, 24, 24, 23, 22, 22, 20, 20, - 29, 29, 30, 30, 28, 28, 27, 26, 24, 22, 22, 21, 20, 20, 19, 19, 28, 29, - 29, 30, 28, 28, 27, 26, 24, 22, 21, 20, 20, 19, 18, 18, 26, 27, 27, 28, - 26, 26, 26, 24, 23, 21, 20, 19, 19, 18, 17, 17, 25, 26, 26, 27, 26, 26, - 25, 24, 22, 20, 20, 19, 18, 17, 17, 16, 23, 24, 24, 25, 24, 24, 24, 23, - 22, 20, 19, 18, 17, 16, 16, 15, 22, 23, 23, 23, 23, 23, 23, 22, 20, 19, - 18, 17, 17, 16, 15, 15, 21, 22, 23, 23, 23, 23, 22, 22, 20, 19, 18, 17, - 16, 15, 15, 14, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 30, 30, 30, - 29, 28, 28, 27, 26, 26, 25, 23, 23, 23, 22, 21, 21, 20, 33, 33, 33, 33, - 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 29, 29, 29, 28, - 26, 26, 26, 24, 24, 23, 22, 22, 22, 20, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 29, 29, 29, 28, 27, 27, 26, 24, - 24, 24, 23, 22, 22, 21, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 30, 30, 30, 29, 29, 29, 28, 27, 27, 26, 24, 24, 24, 23, 22, - 22, 21, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, - 30, 30, 30, 29, 29, 28, 27, 27, 26, 24, 24, 24, 23, 23, 23, 21, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, - 30, 28, 28, 28, 27, 25, 25, 25, 23, 23, 23, 22, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 28, 28, 28, - 27, 25, 25, 25, 23, 23, 23, 22, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 28, 27, 27, 26, 25, 25, 24, - 23, 23, 23, 22, 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, - 30, 29, 29, 29, 28, 28, 28, 28, 26, 26, 26, 24, 24, 24, 23, 23, 23, 21, - 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 29, 29, 29, - 28, 28, 28, 27, 26, 26, 26, 24, 24, 24, 23, 23, 23, 21, 33, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 27, - 26, 26, 26, 24, 24, 24, 23, 23, 23, 21, 32, 32, 32, 32, 32, 32, 32, 31, - 31, 31, 31, 30, 29, 29, 29, 28, 28, 28, 28, 28, 28, 26, 26, 26, 25, 24, - 24, 24, 23, 23, 23, 21, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 29, - 29, 29, 28, 28, 28, 28, 27, 27, 27, 26, 26, 26, 25, 24, 24, 24, 23, 22, - 22, 21, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 29, 29, 29, 28, 28, - 28, 28, 27, 27, 27, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 21, 32, 31, - 31, 31, 31, 31, 31, 31, 30, 30, 30, 29, 28, 28, 28, 27, 27, 27, 26, 26, - 26, 25, 24, 24, 24, 23, 23, 23, 22, 22, 22, 20, 30, 30, 30, 30, 30, 31, - 31, 30, 29, 29, 29, 28, 28, 28, 27, 26, 26, 26, 24, 24, 24, 23, 23, 23, - 22, 22, 22, 21, 20, 20, 20, 19, 30, 30, 30, 30, 30, 31, 31, 30, 29, 29, - 29, 28, 28, 28, 27, 26, 26, 26, 24, 24, 24, 23, 23, 23, 22, 22, 22, 21, - 20, 20, 20, 19, 30, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 28, 28, 28, - 27, 26, 26, 25, 24, 23, 23, 23, 22, 22, 22, 21, 21, 21, 20, 20, 20, 19, - 29, 29, 29, 29, 30, 30, 30, 30, 28, 28, 28, 28, 27, 27, 26, 24, 24, 24, - 22, 22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 19, 18, 28, 29, 29, 29, - 29, 30, 30, 29, 28, 28, 28, 28, 27, 27, 26, 24, 24, 23, 22, 21, 21, 20, - 20, 20, 20, 19, 19, 19, 18, 18, 18, 18, 28, 29, 29, 29, 29, 30, 30, 29, - 28, 28, 28, 28, 27, 27, 26, 24, 24, 23, 22, 21, 21, 20, 20, 20, 20, 19, - 19, 19, 18, 18, 18, 18, 27, 28, 28, 28, 28, 28, 28, 28, 28, 27, 27, 26, - 26, 26, 25, 23, 23, 23, 21, 20, 20, 20, 20, 20, 19, 18, 18, 18, 18, 17, - 17, 17, 26, 26, 27, 27, 27, 28, 28, 27, 26, 26, 26, 26, 26, 26, 24, 23, - 23, 22, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 26, 26, - 27, 27, 27, 28, 28, 27, 26, 26, 26, 26, 26, 26, 24, 23, 23, 22, 21, 20, - 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 25, 26, 26, 26, 26, 27, - 27, 26, 26, 26, 26, 25, 25, 25, 24, 22, 22, 22, 20, 20, 20, 19, 19, 19, - 18, 17, 17, 17, 17, 16, 16, 16, 23, 24, 24, 24, 24, 25, 25, 25, 24, 24, - 24, 24, 24, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 18, 17, 16, 16, 16, - 16, 15, 15, 15, 23, 24, 24, 24, 24, 25, 25, 25, 24, 24, 24, 24, 24, 24, - 23, 22, 22, 21, 20, 19, 19, 18, 18, 18, 17, 16, 16, 16, 16, 15, 15, 15, - 23, 23, 24, 24, 24, 25, 25, 24, 24, 24, 24, 24, 24, 24, 23, 21, 21, 21, - 19, 19, 19, 18, 18, 18, 17, 16, 16, 16, 15, 15, 15, 15, 22, 22, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 22, 20, 20, 20, 19, 18, 18, 18, - 17, 17, 17, 16, 16, 15, 15, 15, 15, 14, 21, 22, 22, 22, 23, 23, 23, 23, - 23, 23, 23, 23, 22, 22, 22, 20, 20, 20, 19, 18, 18, 17, 17, 17, 16, 15, - 15, 15, 15, 14, 14, 14, 21, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, - 22, 22, 22, 20, 20, 20, 19, 18, 18, 17, 17, 17, 16, 15, 15, 15, 15, 14, - 14, 14, 20, 20, 21, 21, 21, 22, 22, 22, 21, 21, 21, 21, 21, 21, 20, 19, - 19, 19, 18, 18, 18, 17, 16, 16, 16, 15, 15, 15, 14, 14, 14, 13, - /* Size 4x8 */ - 33, 32, 29, 24, 32, 31, 30, 25, 32, 30, 28, 24, 32, 29, 27, 24, 30, 28, - 24, 21, 28, 26, 21, 18, 24, 24, 19, 16, 22, 22, 18, 15, - /* Size 8x4 */ - 33, 32, 32, 32, 30, 28, 24, 22, 32, 31, 30, 29, 28, 26, 24, 22, 29, 30, - 28, 27, 24, 21, 19, 18, 24, 25, 24, 24, 21, 18, 16, 15, - /* Size 8x16 */ - 32, 33, 33, 32, 29, 28, 23, 22, 33, 32, 32, 32, 29, 29, 24, 23, 33, 32, - 32, 32, 30, 29, 25, 23, 33, 32, 32, 31, 30, 30, 25, 23, 33, 32, 31, 30, - 29, 28, 24, 23, 32, 32, 31, 30, 28, 28, 24, 23, 32, 31, 30, 29, 28, 27, - 24, 23, 32, 31, 30, 28, 26, 26, 23, 22, 30, 30, 29, 28, 25, 24, 21, 20, - 29, 30, 28, 27, 23, 22, 20, 19, 28, 30, 28, 27, 22, 21, 19, 18, 26, 28, - 26, 26, 21, 20, 18, 17, 25, 26, 26, 25, 21, 20, 17, 17, 23, 25, 24, 24, - 20, 19, 16, 16, 22, 23, 23, 23, 19, 18, 16, 15, 21, 23, 23, 22, 19, 18, - 15, 15, - /* Size 16x8 */ - 32, 33, 33, 33, 33, 32, 32, 32, 30, 29, 28, 26, 25, 23, 22, 21, 33, 32, - 32, 32, 32, 32, 31, 31, 30, 30, 30, 28, 26, 25, 23, 23, 33, 32, 32, 32, - 31, 31, 30, 30, 29, 28, 28, 26, 26, 24, 23, 23, 32, 32, 32, 31, 30, 30, - 29, 28, 28, 27, 27, 26, 25, 24, 23, 22, 29, 29, 30, 30, 29, 28, 28, 26, - 25, 23, 22, 21, 21, 20, 19, 19, 28, 29, 29, 30, 28, 28, 27, 26, 24, 22, - 21, 20, 20, 19, 18, 18, 23, 24, 25, 25, 24, 24, 24, 23, 21, 20, 19, 18, - 17, 16, 16, 15, 22, 23, 23, 23, 23, 23, 23, 22, 20, 19, 18, 17, 17, 16, - 15, 15, - /* Size 16x32 */ - 32, 33, 33, 33, 33, 32, 32, 32, 29, 28, 28, 26, 23, 23, 22, 19, 33, 33, - 32, 32, 32, 32, 32, 31, 29, 29, 29, 26, 24, 24, 22, 20, 33, 32, 32, 32, - 32, 32, 32, 31, 29, 29, 29, 26, 24, 24, 23, 20, 33, 32, 32, 32, 32, 32, - 32, 31, 29, 29, 29, 26, 24, 24, 23, 20, 33, 32, 32, 32, 32, 32, 32, 31, - 30, 29, 29, 26, 25, 25, 23, 20, 33, 32, 32, 32, 32, 31, 31, 31, 30, 30, - 30, 27, 25, 25, 23, 21, 33, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 27, - 25, 25, 23, 21, 33, 32, 32, 32, 32, 31, 31, 31, 30, 29, 29, 27, 25, 25, - 23, 21, 33, 32, 32, 32, 31, 30, 30, 30, 29, 28, 28, 26, 24, 24, 23, 21, - 32, 32, 32, 32, 31, 30, 30, 30, 28, 28, 28, 26, 24, 24, 23, 20, 32, 32, - 32, 32, 31, 30, 30, 30, 28, 28, 28, 26, 24, 24, 23, 20, 32, 32, 32, 32, - 31, 29, 29, 29, 28, 28, 28, 26, 24, 24, 23, 21, 32, 32, 31, 31, 30, 29, - 29, 28, 28, 27, 27, 25, 24, 24, 23, 21, 32, 32, 31, 31, 30, 29, 29, 28, - 28, 27, 27, 25, 24, 24, 23, 21, 32, 31, 31, 31, 30, 28, 28, 28, 26, 26, - 26, 24, 23, 23, 22, 20, 30, 30, 30, 30, 29, 28, 28, 27, 25, 24, 24, 23, - 21, 21, 20, 19, 30, 30, 30, 30, 29, 28, 28, 27, 25, 24, 24, 23, 21, 21, - 20, 19, 30, 30, 30, 30, 29, 28, 28, 27, 24, 24, 24, 22, 21, 21, 20, 19, - 29, 29, 30, 30, 28, 27, 27, 26, 23, 22, 22, 20, 20, 20, 19, 17, 28, 29, - 30, 30, 28, 27, 27, 26, 22, 21, 21, 20, 19, 19, 18, 17, 28, 29, 30, 30, - 28, 27, 27, 26, 22, 21, 21, 20, 19, 19, 18, 17, 27, 28, 28, 28, 28, 26, - 26, 25, 22, 21, 21, 19, 18, 18, 18, 16, 26, 27, 28, 28, 26, 26, 26, 24, - 21, 20, 20, 19, 18, 18, 17, 16, 26, 27, 28, 28, 26, 26, 26, 24, 21, 20, - 20, 19, 18, 18, 17, 16, 25, 26, 26, 26, 26, 25, 25, 24, 21, 20, 20, 18, - 17, 17, 17, 15, 23, 24, 25, 25, 24, 24, 24, 23, 20, 19, 19, 17, 16, 16, - 16, 14, 23, 24, 25, 25, 24, 24, 24, 23, 20, 19, 19, 17, 16, 16, 16, 14, - 23, 24, 24, 24, 24, 24, 24, 23, 20, 19, 19, 17, 16, 16, 15, 14, 22, 23, - 23, 23, 23, 23, 23, 22, 19, 18, 18, 17, 16, 16, 15, 14, 21, 22, 23, 23, - 23, 22, 22, 21, 19, 18, 18, 17, 15, 15, 15, 13, 21, 22, 23, 23, 23, 22, - 22, 21, 19, 18, 18, 17, 15, 15, 15, 13, 20, 21, 22, 22, 21, 21, 21, 20, - 18, 18, 18, 16, 15, 15, 14, 13, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 30, 30, 30, - 29, 28, 28, 27, 26, 26, 25, 23, 23, 23, 22, 21, 21, 20, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 29, 29, 29, 28, - 27, 27, 26, 24, 24, 24, 23, 22, 22, 21, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 30, 28, 28, 28, 26, 25, - 25, 24, 23, 23, 23, 22, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 30, 30, 30, 30, 30, 30, 28, 28, 28, 26, 25, 25, 24, 23, 23, - 23, 22, 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 29, - 29, 29, 28, 28, 28, 28, 26, 26, 26, 24, 24, 24, 23, 23, 23, 21, 32, 32, - 32, 32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 27, 27, - 27, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 21, 32, 32, 32, 32, 32, 31, - 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 27, 27, 27, 26, 26, 26, - 25, 24, 24, 24, 23, 22, 22, 21, 32, 31, 31, 31, 31, 31, 31, 31, 30, 30, - 30, 29, 28, 28, 28, 27, 27, 27, 26, 26, 26, 25, 24, 24, 24, 23, 23, 23, - 22, 21, 21, 20, 29, 29, 29, 29, 30, 30, 30, 30, 29, 28, 28, 28, 28, 28, - 26, 25, 25, 24, 23, 22, 22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 18, - 28, 29, 29, 29, 29, 30, 30, 29, 28, 28, 28, 28, 27, 27, 26, 24, 24, 24, - 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, 18, 28, 29, 29, 29, - 29, 30, 30, 29, 28, 28, 28, 28, 27, 27, 26, 24, 24, 24, 22, 21, 21, 21, - 20, 20, 20, 19, 19, 19, 18, 18, 18, 18, 26, 26, 26, 26, 26, 27, 27, 27, - 26, 26, 26, 26, 25, 25, 24, 23, 23, 22, 20, 20, 20, 19, 19, 19, 18, 17, - 17, 17, 17, 17, 17, 16, 23, 24, 24, 24, 25, 25, 25, 25, 24, 24, 24, 24, - 24, 24, 23, 21, 21, 21, 20, 19, 19, 18, 18, 18, 17, 16, 16, 16, 16, 15, - 15, 15, 23, 24, 24, 24, 25, 25, 25, 25, 24, 24, 24, 24, 24, 24, 23, 21, - 21, 21, 20, 19, 19, 18, 18, 18, 17, 16, 16, 16, 16, 15, 15, 15, 22, 22, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 22, 20, 20, 20, 19, 18, - 18, 18, 17, 17, 17, 16, 16, 15, 15, 15, 15, 14, 19, 20, 20, 20, 20, 21, - 21, 21, 21, 20, 20, 21, 21, 21, 20, 19, 19, 19, 17, 17, 17, 16, 16, 16, - 15, 14, 14, 14, 14, 13, 13, 13, - /* Size 4x16 */ - 33, 32, 28, 23, 32, 32, 29, 24, 32, 32, 29, 25, 32, 31, 30, 25, 32, 30, - 28, 24, 32, 30, 28, 24, 32, 29, 27, 24, 31, 28, 26, 23, 30, 28, 24, 21, - 29, 27, 22, 20, 29, 27, 21, 19, 27, 26, 20, 18, 26, 25, 20, 17, 24, 24, - 19, 16, 23, 23, 18, 16, 22, 22, 18, 15, - /* Size 16x4 */ - 33, 32, 32, 32, 32, 32, 32, 31, 30, 29, 29, 27, 26, 24, 23, 22, 32, 32, - 32, 31, 30, 30, 29, 28, 28, 27, 27, 26, 25, 24, 23, 22, 28, 29, 29, 30, - 28, 28, 27, 26, 24, 22, 21, 20, 20, 19, 18, 18, 23, 24, 25, 25, 24, 24, - 24, 23, 21, 20, 19, 18, 17, 16, 16, 15, - /* Size 8x32 */ - 32, 33, 33, 32, 29, 28, 23, 22, 33, 32, 32, 32, 29, 29, 24, 22, 33, 32, - 32, 32, 29, 29, 24, 23, 33, 32, 32, 32, 29, 29, 24, 23, 33, 32, 32, 32, - 30, 29, 25, 23, 33, 32, 32, 31, 30, 30, 25, 23, 33, 32, 32, 31, 30, 30, - 25, 23, 33, 32, 32, 31, 30, 29, 25, 23, 33, 32, 31, 30, 29, 28, 24, 23, - 32, 32, 31, 30, 28, 28, 24, 23, 32, 32, 31, 30, 28, 28, 24, 23, 32, 32, - 31, 29, 28, 28, 24, 23, 32, 31, 30, 29, 28, 27, 24, 23, 32, 31, 30, 29, - 28, 27, 24, 23, 32, 31, 30, 28, 26, 26, 23, 22, 30, 30, 29, 28, 25, 24, - 21, 20, 30, 30, 29, 28, 25, 24, 21, 20, 30, 30, 29, 28, 24, 24, 21, 20, - 29, 30, 28, 27, 23, 22, 20, 19, 28, 30, 28, 27, 22, 21, 19, 18, 28, 30, - 28, 27, 22, 21, 19, 18, 27, 28, 28, 26, 22, 21, 18, 18, 26, 28, 26, 26, - 21, 20, 18, 17, 26, 28, 26, 26, 21, 20, 18, 17, 25, 26, 26, 25, 21, 20, - 17, 17, 23, 25, 24, 24, 20, 19, 16, 16, 23, 25, 24, 24, 20, 19, 16, 16, - 23, 24, 24, 24, 20, 19, 16, 15, 22, 23, 23, 23, 19, 18, 16, 15, 21, 23, - 23, 22, 19, 18, 15, 15, 21, 23, 23, 22, 19, 18, 15, 15, 20, 22, 21, 21, - 18, 18, 15, 14, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 30, 30, 30, - 29, 28, 28, 27, 26, 26, 25, 23, 23, 23, 22, 21, 21, 20, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 30, 28, - 28, 28, 26, 25, 25, 24, 23, 23, 23, 22, 33, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 26, 26, 26, 24, - 24, 24, 23, 23, 23, 21, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 29, - 29, 29, 28, 28, 28, 28, 27, 27, 27, 26, 26, 26, 25, 24, 24, 24, 23, 22, - 22, 21, 29, 29, 29, 29, 30, 30, 30, 30, 29, 28, 28, 28, 28, 28, 26, 25, - 25, 24, 23, 22, 22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 18, 28, 29, - 29, 29, 29, 30, 30, 29, 28, 28, 28, 28, 27, 27, 26, 24, 24, 24, 22, 21, - 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, 18, 23, 24, 24, 24, 25, 25, - 25, 25, 24, 24, 24, 24, 24, 24, 23, 21, 21, 21, 20, 19, 19, 18, 18, 18, - 17, 16, 16, 16, 16, 15, 15, 15, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 22, 20, 20, 20, 19, 18, 18, 18, 17, 17, 17, 16, 16, 15, - 15, 15, 15, 14 }, - { /* Chroma */ - /* Size 4x4 */ - 33, 28, 22, 22, 28, 23, 22, 23, 22, 22, 19, 19, 22, 23, 19, 17, - /* Size 8x8 */ - 33, 33, 30, 28, 24, 21, 22, 21, 33, 32, 29, 26, 24, 22, 23, 22, 30, 29, - 26, 24, 23, 22, 23, 22, 28, 26, 24, 22, 22, 22, 23, 22, 24, 24, 23, 22, - 21, 20, 20, 20, 21, 22, 22, 22, 20, 19, 19, 19, 22, 23, 23, 23, 20, 19, - 18, 17, 21, 22, 22, 22, 20, 19, 17, 17, - /* Size 16x16 */ - 32, 33, 33, 34, 31, 31, 28, 27, 25, 22, 21, 21, 21, 21, 20, 20, 33, 33, - 33, 33, 30, 30, 27, 26, 24, 22, 22, 22, 22, 22, 21, 21, 33, 33, 33, 33, - 30, 29, 26, 26, 24, 22, 22, 22, 22, 22, 22, 22, 34, 33, 33, 32, 30, 29, - 26, 25, 24, 23, 22, 23, 23, 23, 22, 22, 31, 30, 30, 30, 28, 27, 24, 24, - 23, 22, 22, 22, 22, 23, 22, 22, 31, 30, 29, 29, 27, 26, 24, 23, 23, 22, - 22, 22, 22, 23, 22, 22, 28, 27, 26, 26, 24, 24, 22, 22, 22, 22, 21, 22, - 22, 23, 22, 22, 27, 26, 26, 25, 24, 23, 22, 22, 21, 21, 21, 21, 22, 22, - 22, 22, 25, 24, 24, 24, 23, 23, 22, 21, 21, 20, 20, 21, 21, 21, 20, 20, - 22, 22, 22, 23, 22, 22, 22, 21, 20, 20, 20, 20, 20, 20, 19, 19, 21, 22, - 22, 22, 22, 22, 21, 21, 20, 20, 19, 19, 19, 19, 19, 19, 21, 22, 22, 23, - 22, 22, 22, 21, 21, 20, 19, 19, 19, 19, 18, 18, 21, 22, 22, 23, 22, 22, - 22, 22, 21, 20, 19, 19, 19, 18, 18, 18, 21, 22, 22, 23, 23, 23, 23, 22, - 21, 20, 19, 19, 18, 18, 17, 17, 20, 21, 22, 22, 22, 22, 22, 22, 20, 19, - 19, 18, 18, 17, 17, 17, 20, 21, 22, 22, 22, 22, 22, 22, 20, 19, 19, 18, - 18, 17, 17, 17, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 34, 34, 33, 31, 31, 31, 29, 28, 28, 27, 25, 25, 24, - 22, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 33, 33, 33, 33, - 33, 33, 33, 33, 31, 30, 30, 28, 28, 28, 26, 24, 24, 24, 22, 21, 21, 21, - 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 33, 33, 33, 33, 33, 33, 33, 32, - 30, 30, 30, 28, 27, 27, 26, 24, 24, 24, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 21, 21, 21, 21, 33, 33, 33, 33, 33, 33, 33, 32, 30, 30, 30, 28, - 27, 27, 26, 24, 24, 24, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, - 21, 21, 33, 33, 33, 33, 33, 33, 33, 32, 30, 29, 29, 28, 26, 26, 26, 24, - 24, 24, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 34, 33, - 33, 33, 33, 32, 32, 32, 30, 29, 29, 27, 26, 26, 25, 24, 24, 24, 23, 22, - 22, 22, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 34, 33, 33, 33, 33, 32, - 32, 32, 30, 29, 29, 27, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 23, 23, - 23, 23, 23, 23, 22, 22, 22, 22, 33, 33, 32, 32, 32, 32, 32, 31, 29, 28, - 28, 27, 26, 26, 25, 24, 24, 24, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, - 22, 22, 22, 22, 31, 31, 30, 30, 30, 30, 30, 29, 28, 27, 27, 25, 24, 24, - 24, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, - 31, 30, 30, 30, 29, 29, 29, 28, 27, 26, 26, 25, 24, 24, 23, 23, 23, 23, - 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 31, 30, 30, 30, - 29, 29, 29, 28, 27, 26, 26, 25, 24, 24, 23, 23, 23, 23, 22, 22, 22, 22, - 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 29, 28, 28, 28, 28, 27, 27, 27, - 25, 25, 25, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, - 23, 23, 22, 22, 22, 22, 28, 28, 27, 27, 26, 26, 26, 26, 24, 24, 24, 22, - 22, 22, 22, 22, 22, 22, 22, 21, 21, 22, 22, 22, 22, 23, 23, 23, 22, 22, - 22, 22, 28, 28, 27, 27, 26, 26, 26, 26, 24, 24, 24, 22, 22, 22, 22, 22, - 22, 22, 22, 21, 21, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 27, 26, - 26, 26, 26, 25, 25, 25, 24, 23, 23, 22, 22, 22, 22, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 21, 25, 24, 24, 24, 24, 24, - 24, 24, 23, 23, 23, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, 21, 21, - 21, 21, 21, 21, 20, 20, 20, 20, 25, 24, 24, 24, 24, 24, 24, 24, 23, 23, - 23, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, - 20, 20, 20, 20, 24, 24, 24, 24, 24, 24, 24, 24, 23, 23, 23, 22, 22, 22, - 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 22, 22, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 21, 21, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 21, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 18, 21, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 22, 22, 21, 21, - 21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 21, 22, - 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 19, - 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 21, 22, 22, 22, 22, 23, - 23, 23, 22, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 19, 19, 19, 19, 19, - 19, 18, 18, 18, 18, 18, 18, 18, 21, 22, 22, 22, 22, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 22, 21, 21, 20, 20, 19, 19, 19, 19, 19, 18, 18, 18, 17, - 17, 17, 17, 17, 21, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 22, 21, 21, 20, 20, 19, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, - 21, 21, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 22, 21, 21, 20, - 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 20, 21, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 20, 20, 20, 19, 19, 19, 19, - 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 20, 21, 21, 21, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 20, 20, 20, 19, 19, 19, 19, 18, 18, 18, 17, - 17, 17, 17, 17, 17, 16, 20, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 20, 20, 20, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, - 17, 16, 20, 20, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, - 20, 20, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, - /* Size 4x8 */ - 33, 27, 22, 21, 33, 26, 22, 23, 29, 24, 22, 22, 26, 22, 22, 23, 24, 22, - 20, 20, 22, 22, 19, 19, 22, 22, 19, 18, 21, 22, 19, 17, - /* Size 8x4 */ - 33, 33, 29, 26, 24, 22, 22, 21, 27, 26, 24, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 20, 19, 19, 19, 21, 23, 22, 23, 20, 19, 18, 17, - /* Size 8x16 */ - 32, 33, 31, 28, 23, 21, 21, 20, 33, 33, 30, 27, 23, 22, 22, 21, 33, 32, - 30, 26, 23, 22, 22, 22, 34, 32, 29, 26, 23, 22, 23, 22, 31, 29, 28, 24, - 22, 22, 23, 22, 31, 28, 27, 24, 22, 22, 22, 22, 28, 26, 24, 22, 22, 22, - 23, 22, 26, 25, 24, 22, 21, 21, 22, 22, 24, 24, 23, 22, 21, 20, 21, 20, - 22, 22, 22, 21, 20, 20, 19, 19, 21, 22, 22, 21, 20, 19, 19, 19, 21, 22, - 22, 22, 20, 19, 18, 18, 21, 23, 22, 22, 20, 19, 18, 18, 21, 23, 23, 22, - 20, 19, 18, 17, 20, 22, 22, 22, 20, 19, 17, 17, 20, 22, 22, 22, 20, 19, - 17, 17, - /* Size 16x8 */ - 32, 33, 33, 34, 31, 31, 28, 26, 24, 22, 21, 21, 21, 21, 20, 20, 33, 33, - 32, 32, 29, 28, 26, 25, 24, 22, 22, 22, 23, 23, 22, 22, 31, 30, 30, 29, - 28, 27, 24, 24, 23, 22, 22, 22, 22, 23, 22, 22, 28, 27, 26, 26, 24, 24, - 22, 22, 22, 21, 21, 22, 22, 22, 22, 22, 23, 23, 23, 23, 22, 22, 22, 21, - 21, 20, 20, 20, 20, 20, 20, 20, 21, 22, 22, 22, 22, 22, 22, 21, 20, 20, - 19, 19, 19, 19, 19, 19, 21, 22, 22, 23, 23, 22, 23, 22, 21, 19, 19, 18, - 18, 18, 17, 17, 20, 21, 22, 22, 22, 22, 22, 22, 20, 19, 19, 18, 18, 17, - 17, 17, - /* Size 16x32 */ - 32, 33, 33, 33, 31, 28, 28, 27, 23, 21, 21, 21, 21, 21, 20, 20, 33, 33, - 33, 33, 31, 27, 27, 26, 23, 22, 22, 21, 21, 21, 21, 20, 33, 33, 33, 33, - 30, 27, 27, 26, 23, 22, 22, 22, 22, 22, 21, 20, 33, 33, 33, 33, 30, 27, - 27, 26, 23, 22, 22, 22, 22, 22, 21, 20, 33, 33, 32, 32, 30, 26, 26, 26, - 23, 22, 22, 22, 22, 22, 22, 21, 34, 33, 32, 32, 29, 26, 26, 25, 23, 22, - 22, 23, 23, 23, 22, 21, 34, 33, 32, 32, 29, 26, 26, 25, 23, 22, 22, 23, - 23, 23, 22, 21, 33, 32, 31, 31, 29, 26, 26, 25, 23, 22, 22, 23, 23, 23, - 22, 21, 31, 30, 29, 29, 28, 24, 24, 24, 22, 22, 22, 22, 23, 23, 22, 22, - 31, 29, 28, 28, 27, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 31, 29, - 28, 28, 27, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 29, 28, 27, 27, - 25, 23, 23, 22, 22, 22, 22, 22, 23, 23, 22, 22, 28, 26, 26, 26, 24, 22, - 22, 22, 22, 22, 22, 22, 23, 23, 22, 22, 28, 26, 26, 26, 24, 22, 22, 22, - 22, 22, 22, 22, 23, 23, 22, 22, 26, 26, 25, 25, 24, 22, 22, 22, 21, 21, - 21, 22, 22, 22, 22, 21, 24, 24, 24, 24, 23, 22, 22, 21, 21, 20, 20, 21, - 21, 21, 20, 20, 24, 24, 24, 24, 23, 22, 22, 21, 21, 20, 20, 21, 21, 21, - 20, 20, 24, 24, 24, 24, 23, 22, 22, 21, 20, 20, 20, 20, 20, 20, 20, 20, - 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 19, 19, 19, 19, 21, 22, - 22, 22, 22, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 21, 22, 22, 22, - 22, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 21, 22, 22, 22, 22, 22, - 22, 21, 20, 19, 19, 19, 19, 19, 19, 18, 21, 22, 22, 22, 22, 22, 22, 21, - 20, 19, 19, 19, 18, 18, 18, 18, 21, 22, 22, 22, 22, 22, 22, 21, 20, 19, - 19, 19, 18, 18, 18, 18, 21, 22, 23, 23, 22, 22, 22, 22, 20, 19, 19, 19, - 18, 18, 18, 17, 21, 22, 23, 23, 23, 22, 22, 22, 20, 19, 19, 18, 18, 18, - 17, 17, 21, 22, 23, 23, 23, 22, 22, 22, 20, 19, 19, 18, 18, 18, 17, 17, - 21, 22, 23, 23, 23, 22, 22, 22, 20, 19, 19, 18, 18, 18, 17, 17, 20, 21, - 22, 22, 22, 22, 22, 21, 20, 19, 19, 18, 17, 17, 17, 16, 20, 21, 22, 22, - 22, 22, 22, 21, 20, 19, 19, 18, 17, 17, 17, 16, 20, 21, 22, 22, 22, 22, - 22, 21, 20, 19, 19, 18, 17, 17, 17, 16, 20, 21, 22, 22, 22, 22, 22, 21, - 20, 19, 19, 18, 17, 17, 17, 16, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 34, 34, 33, 31, 31, 31, 29, 28, 28, 26, 24, 24, 24, - 22, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 33, 33, 33, 33, - 33, 33, 33, 32, 30, 29, 29, 28, 26, 26, 26, 24, 24, 24, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 33, 33, 33, 33, 32, 32, 32, 31, - 29, 28, 28, 27, 26, 26, 25, 24, 24, 24, 22, 22, 22, 22, 22, 22, 23, 23, - 23, 23, 22, 22, 22, 22, 33, 33, 33, 33, 32, 32, 32, 31, 29, 28, 28, 27, - 26, 26, 25, 24, 24, 24, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 22, 22, - 22, 22, 31, 31, 30, 30, 30, 29, 29, 29, 28, 27, 27, 25, 24, 24, 24, 23, - 23, 23, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 28, 27, - 27, 27, 26, 26, 26, 26, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 21, 21, - 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 28, 27, 27, 27, 26, 26, - 26, 26, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 27, 26, 26, 26, 26, 25, 25, 25, 24, 23, - 23, 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, - 21, 21, 21, 21, 23, 23, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, - 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, - 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 20, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 23, 23, 23, - 22, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 19, 19, 19, 19, 19, 19, 18, - 18, 18, 18, 18, 18, 18, 21, 21, 22, 22, 22, 23, 23, 23, 23, 22, 22, 23, - 23, 23, 22, 21, 21, 20, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 17, 17, - 17, 17, 21, 21, 22, 22, 22, 23, 23, 23, 23, 22, 22, 23, 23, 23, 22, 21, - 21, 20, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 20, 21, - 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 20, 20, 20, 19, 19, - 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 20, 20, 20, 20, 21, 21, - 21, 21, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, - 17, 17, 17, 17, 16, 16, 16, 16, - /* Size 4x16 */ - 33, 28, 21, 21, 33, 27, 22, 22, 33, 26, 22, 22, 33, 26, 22, 23, 30, 24, - 22, 23, 29, 24, 22, 22, 26, 22, 22, 23, 26, 22, 21, 22, 24, 22, 20, 21, - 22, 21, 20, 19, 22, 21, 19, 19, 22, 22, 19, 18, 22, 22, 19, 18, 22, 22, - 19, 18, 21, 22, 19, 17, 21, 22, 19, 17, - /* Size 16x4 */ - 33, 33, 33, 33, 30, 29, 26, 26, 24, 22, 22, 22, 22, 22, 21, 21, 28, 27, - 26, 26, 24, 24, 22, 22, 22, 21, 21, 22, 22, 22, 22, 22, 21, 22, 22, 22, - 22, 22, 22, 21, 20, 20, 19, 19, 19, 19, 19, 19, 21, 22, 22, 23, 23, 22, - 23, 22, 21, 19, 19, 18, 18, 18, 17, 17, - /* Size 8x32 */ - 32, 33, 31, 28, 23, 21, 21, 20, 33, 33, 31, 27, 23, 22, 21, 21, 33, 33, - 30, 27, 23, 22, 22, 21, 33, 33, 30, 27, 23, 22, 22, 21, 33, 32, 30, 26, - 23, 22, 22, 22, 34, 32, 29, 26, 23, 22, 23, 22, 34, 32, 29, 26, 23, 22, - 23, 22, 33, 31, 29, 26, 23, 22, 23, 22, 31, 29, 28, 24, 22, 22, 23, 22, - 31, 28, 27, 24, 22, 22, 22, 22, 31, 28, 27, 24, 22, 22, 22, 22, 29, 27, - 25, 23, 22, 22, 23, 22, 28, 26, 24, 22, 22, 22, 23, 22, 28, 26, 24, 22, - 22, 22, 23, 22, 26, 25, 24, 22, 21, 21, 22, 22, 24, 24, 23, 22, 21, 20, - 21, 20, 24, 24, 23, 22, 21, 20, 21, 20, 24, 24, 23, 22, 20, 20, 20, 20, - 22, 22, 22, 21, 20, 20, 19, 19, 21, 22, 22, 21, 20, 19, 19, 19, 21, 22, - 22, 21, 20, 19, 19, 19, 21, 22, 22, 22, 20, 19, 19, 19, 21, 22, 22, 22, - 20, 19, 18, 18, 21, 22, 22, 22, 20, 19, 18, 18, 21, 23, 22, 22, 20, 19, - 18, 18, 21, 23, 23, 22, 20, 19, 18, 17, 21, 23, 23, 22, 20, 19, 18, 17, - 21, 23, 23, 22, 20, 19, 18, 17, 20, 22, 22, 22, 20, 19, 17, 17, 20, 22, - 22, 22, 20, 19, 17, 17, 20, 22, 22, 22, 20, 19, 17, 17, 20, 22, 22, 22, - 20, 19, 17, 17, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 34, 34, 33, 31, 31, 31, 29, 28, 28, 26, 24, 24, 24, - 22, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 33, 33, 33, 33, - 32, 32, 32, 31, 29, 28, 28, 27, 26, 26, 25, 24, 24, 24, 22, 22, 22, 22, - 22, 22, 23, 23, 23, 23, 22, 22, 22, 22, 31, 31, 30, 30, 30, 29, 29, 29, - 28, 27, 27, 25, 24, 24, 24, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 23, - 23, 23, 22, 22, 22, 22, 28, 27, 27, 27, 26, 26, 26, 26, 24, 24, 24, 23, - 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 21, 21, - 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 20, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 23, - 23, 23, 23, 22, 22, 23, 23, 23, 22, 21, 21, 20, 19, 19, 19, 19, 18, 18, - 18, 18, 18, 18, 17, 17, 17, 17, 20, 21, 21, 21, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 20, 20, 20, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, - 17, 17, 17, 17 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 32, 30, 27, 32, 31, 29, 26, 30, 29, 26, 23, 27, 26, 23, 19, - /* Size 8x8 */ - 33, 33, 32, 32, 31, 30, 28, 25, 33, 32, 32, 32, 31, 30, 28, 26, 32, 32, - 32, 31, 30, 29, 28, 26, 32, 32, 31, 30, 29, 28, 27, 25, 31, 31, 30, 29, - 28, 26, 25, 23, 30, 30, 29, 28, 26, 24, 22, 21, 28, 28, 28, 27, 25, 22, - 20, 19, 25, 26, 26, 25, 23, 21, 19, 18, - /* Size 16x16 */ - 32, 33, 33, 33, 33, 33, 33, 32, 32, 30, 30, 28, 28, 26, 26, 23, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 30, 30, 29, 29, 27, 27, 24, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 30, 30, 29, 29, 27, 27, 24, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 31, 31, 30, 30, 28, 28, 25, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 30, 30, 28, 28, 25, 33, 32, 32, 32, 32, 31, 31, 30, 30, 29, - 29, 28, 28, 26, 26, 24, 33, 32, 32, 32, 32, 31, 31, 30, 30, 29, 29, 28, - 28, 26, 26, 24, 32, 32, 32, 32, 32, 30, 30, 29, 29, 28, 28, 27, 27, 26, - 26, 24, 32, 32, 32, 32, 32, 30, 30, 29, 29, 28, 28, 27, 27, 26, 26, 24, - 30, 30, 30, 31, 31, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 22, 30, 30, - 30, 31, 31, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 22, 28, 29, 29, 30, - 30, 28, 28, 27, 27, 24, 24, 21, 21, 20, 20, 19, 28, 29, 29, 30, 30, 28, - 28, 27, 27, 24, 24, 21, 21, 20, 20, 19, 26, 27, 27, 28, 28, 26, 26, 26, - 26, 23, 23, 20, 20, 19, 19, 18, 26, 27, 27, 28, 28, 26, 26, 26, 26, 23, - 23, 20, 20, 19, 19, 18, 23, 24, 24, 25, 25, 24, 24, 24, 24, 22, 22, 19, - 19, 18, 18, 16, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 31, - 30, 30, 30, 29, 28, 28, 28, 28, 26, 26, 26, 25, 23, 23, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 30, - 29, 29, 29, 28, 26, 26, 26, 25, 24, 24, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 30, 29, 29, 29, 28, - 27, 27, 27, 26, 24, 24, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 30, 30, 30, 30, 29, 29, 29, 28, 27, 27, 27, 26, - 24, 24, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 30, 30, 30, 30, 29, 29, 29, 28, 27, 27, 27, 26, 24, 24, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, - 30, 30, 29, 29, 29, 28, 27, 27, 27, 26, 25, 25, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, - 30, 28, 28, 28, 28, 26, 25, 25, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 28, 28, 28, - 28, 26, 25, 25, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 28, 28, 28, 28, 26, 25, 25, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 30, - 30, 30, 30, 29, 29, 29, 29, 28, 27, 27, 27, 26, 25, 25, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 28, - 28, 28, 28, 27, 26, 26, 26, 26, 24, 24, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 27, - 26, 26, 26, 26, 24, 24, 33, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, - 31, 31, 30, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 27, 26, 26, 26, 26, - 24, 24, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, - 30, 29, 28, 28, 28, 28, 28, 28, 28, 27, 26, 26, 26, 25, 24, 24, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 30, 29, 29, 29, 28, 28, 28, - 28, 28, 27, 27, 27, 26, 26, 26, 26, 25, 24, 24, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 31, 30, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 27, 27, - 27, 26, 26, 26, 26, 25, 24, 24, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, - 30, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 27, 27, 27, 26, 26, 26, - 26, 25, 24, 24, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 29, - 28, 28, 28, 28, 27, 27, 27, 26, 26, 26, 26, 25, 24, 24, 24, 23, 23, 23, - 30, 30, 30, 30, 30, 30, 31, 31, 31, 30, 29, 29, 29, 28, 28, 28, 28, 27, - 26, 26, 26, 25, 24, 24, 24, 23, 23, 23, 23, 22, 22, 22, 30, 30, 30, 30, - 30, 30, 31, 31, 31, 30, 29, 29, 29, 28, 28, 28, 28, 27, 26, 26, 26, 25, - 24, 24, 24, 23, 23, 23, 23, 22, 22, 22, 30, 30, 30, 30, 30, 30, 31, 31, - 31, 30, 29, 29, 29, 28, 28, 28, 28, 27, 26, 26, 26, 25, 24, 24, 24, 23, - 23, 23, 23, 22, 22, 22, 29, 30, 30, 30, 30, 30, 30, 30, 30, 29, 28, 28, - 28, 28, 28, 28, 28, 26, 25, 25, 25, 24, 23, 23, 23, 22, 22, 22, 22, 21, - 20, 20, 28, 29, 29, 29, 29, 29, 30, 30, 30, 29, 28, 28, 28, 28, 27, 27, - 27, 26, 24, 24, 24, 23, 21, 21, 21, 21, 20, 20, 20, 20, 19, 19, 28, 29, - 29, 29, 29, 29, 30, 30, 30, 29, 28, 28, 28, 28, 27, 27, 27, 26, 24, 24, - 24, 23, 21, 21, 21, 21, 20, 20, 20, 20, 19, 19, 28, 29, 29, 29, 29, 29, - 30, 30, 30, 29, 28, 28, 28, 28, 27, 27, 27, 26, 24, 24, 24, 23, 21, 21, - 21, 21, 20, 20, 20, 20, 19, 19, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 27, 27, 27, 27, 26, 26, 26, 25, 23, 23, 23, 22, 21, 21, 21, 20, 20, 20, - 20, 19, 18, 18, 26, 26, 27, 27, 27, 27, 28, 28, 28, 27, 26, 26, 26, 26, - 26, 26, 26, 24, 23, 23, 23, 22, 20, 20, 20, 20, 19, 19, 19, 18, 18, 18, - 26, 26, 27, 27, 27, 27, 28, 28, 28, 27, 26, 26, 26, 26, 26, 26, 26, 24, - 23, 23, 23, 22, 20, 20, 20, 20, 19, 19, 19, 18, 18, 18, 26, 26, 27, 27, - 27, 27, 28, 28, 28, 27, 26, 26, 26, 26, 26, 26, 26, 24, 23, 23, 23, 22, - 20, 20, 20, 20, 19, 19, 19, 18, 18, 18, 25, 25, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 25, 25, 25, 25, 23, 22, 22, 22, 21, 20, 20, 20, 19, - 18, 18, 18, 18, 17, 17, 23, 24, 24, 24, 24, 25, 25, 25, 25, 25, 24, 24, - 24, 24, 24, 24, 24, 23, 22, 22, 22, 20, 19, 19, 19, 18, 18, 18, 18, 17, - 16, 16, 23, 24, 24, 24, 24, 25, 25, 25, 25, 25, 24, 24, 24, 24, 24, 24, - 24, 23, 22, 22, 22, 20, 19, 19, 19, 18, 18, 18, 18, 17, 16, 16, - /* Size 4x8 */ - 33, 32, 30, 26, 32, 32, 30, 27, 32, 31, 30, 27, 32, 31, 28, 26, 31, 30, - 27, 24, 30, 28, 25, 22, 28, 27, 23, 20, 26, 26, 22, 18, - /* Size 8x4 */ - 33, 32, 32, 32, 31, 30, 28, 26, 32, 32, 31, 31, 30, 28, 27, 26, 30, 30, - 30, 28, 27, 25, 23, 22, 26, 27, 27, 26, 24, 22, 20, 18, - /* Size 8x16 */ - 32, 33, 33, 32, 32, 28, 28, 23, 33, 32, 32, 32, 32, 29, 29, 24, 33, 32, - 32, 32, 32, 29, 29, 24, 33, 32, 32, 31, 31, 30, 30, 25, 33, 32, 32, 31, - 31, 30, 30, 25, 32, 32, 32, 30, 30, 28, 28, 24, 32, 32, 32, 30, 30, 28, - 28, 24, 32, 31, 31, 29, 29, 27, 27, 24, 32, 31, 31, 29, 29, 27, 27, 24, - 30, 30, 30, 28, 28, 24, 24, 21, 30, 30, 30, 28, 28, 24, 24, 21, 28, 30, - 30, 27, 27, 21, 21, 19, 28, 30, 30, 27, 27, 21, 21, 19, 26, 28, 28, 26, - 26, 20, 20, 18, 26, 28, 28, 26, 26, 20, 20, 18, 23, 25, 25, 24, 24, 19, - 19, 16, - /* Size 16x8 */ - 32, 33, 33, 33, 33, 32, 32, 32, 32, 30, 30, 28, 28, 26, 26, 23, 33, 32, - 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 30, 28, 28, 25, 33, 32, 32, 32, - 32, 32, 32, 31, 31, 30, 30, 30, 30, 28, 28, 25, 32, 32, 32, 31, 31, 30, - 30, 29, 29, 28, 28, 27, 27, 26, 26, 24, 32, 32, 32, 31, 31, 30, 30, 29, - 29, 28, 28, 27, 27, 26, 26, 24, 28, 29, 29, 30, 30, 28, 28, 27, 27, 24, - 24, 21, 21, 20, 20, 19, 28, 29, 29, 30, 30, 28, 28, 27, 27, 24, 24, 21, - 21, 20, 20, 19, 23, 24, 24, 25, 25, 24, 24, 24, 24, 21, 21, 19, 19, 18, - 18, 16, - /* Size 16x32 */ - 32, 33, 33, 33, 33, 32, 32, 32, 32, 30, 28, 28, 28, 26, 23, 23, 33, 33, - 33, 33, 33, 32, 32, 32, 32, 30, 29, 29, 29, 26, 24, 24, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 30, 29, 29, 29, 27, 24, 24, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 30, 29, 29, 29, 27, 24, 24, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 30, 29, 29, 29, 27, 24, 24, 33, 32, 32, 32, 32, 32, 32, 32, 32, 30, - 29, 29, 29, 27, 25, 25, 33, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, - 30, 28, 25, 25, 33, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 28, - 25, 25, 33, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 28, 25, 25, - 33, 32, 32, 32, 32, 31, 31, 31, 31, 30, 29, 29, 29, 27, 25, 25, 32, 32, - 32, 32, 32, 31, 30, 30, 30, 29, 28, 28, 28, 26, 24, 24, 32, 32, 32, 32, - 32, 31, 30, 30, 30, 29, 28, 28, 28, 26, 24, 24, 32, 32, 32, 32, 32, 31, - 30, 30, 30, 29, 28, 28, 28, 26, 24, 24, 32, 32, 32, 32, 32, 31, 30, 30, - 30, 28, 28, 28, 28, 26, 24, 24, 32, 32, 31, 31, 31, 30, 29, 29, 29, 28, - 27, 27, 27, 26, 24, 24, 32, 32, 31, 31, 31, 30, 29, 29, 29, 28, 27, 27, - 27, 26, 24, 24, 32, 32, 31, 31, 31, 30, 29, 29, 29, 28, 27, 27, 27, 26, - 24, 24, 31, 31, 31, 31, 31, 30, 28, 28, 28, 27, 26, 26, 26, 24, 23, 23, - 30, 30, 30, 30, 30, 29, 28, 28, 28, 26, 24, 24, 24, 23, 21, 21, 30, 30, - 30, 30, 30, 29, 28, 28, 28, 26, 24, 24, 24, 23, 21, 21, 30, 30, 30, 30, - 30, 29, 28, 28, 28, 26, 24, 24, 24, 23, 21, 21, 29, 30, 30, 30, 30, 28, - 28, 28, 28, 25, 23, 23, 23, 22, 20, 20, 28, 29, 30, 30, 30, 28, 27, 27, - 27, 24, 21, 21, 21, 20, 19, 19, 28, 29, 30, 30, 30, 28, 27, 27, 27, 24, - 21, 21, 21, 20, 19, 19, 28, 29, 30, 30, 30, 28, 27, 27, 27, 24, 21, 21, - 21, 20, 19, 19, 28, 28, 28, 28, 28, 27, 26, 26, 26, 23, 21, 21, 21, 20, - 18, 18, 26, 27, 28, 28, 28, 26, 26, 26, 26, 23, 20, 20, 20, 19, 18, 18, - 26, 27, 28, 28, 28, 26, 26, 26, 26, 23, 20, 20, 20, 19, 18, 18, 26, 27, - 28, 28, 28, 26, 26, 26, 26, 23, 20, 20, 20, 19, 18, 18, 25, 26, 26, 26, - 26, 26, 24, 24, 24, 22, 20, 20, 20, 18, 17, 17, 23, 24, 25, 25, 25, 24, - 24, 24, 24, 21, 19, 19, 19, 18, 16, 16, 23, 24, 25, 25, 25, 24, 24, 24, - 24, 21, 19, 19, 19, 18, 16, 16, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, - 30, 30, 30, 29, 28, 28, 28, 28, 26, 26, 26, 25, 23, 23, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 30, - 29, 29, 29, 28, 27, 27, 27, 26, 24, 24, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 28, - 28, 28, 28, 26, 25, 25, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 28, 28, 28, 28, 26, - 25, 25, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 30, 30, 30, 30, 30, 30, 30, 28, 28, 28, 28, 26, 25, 25, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29, - 29, 28, 28, 28, 28, 27, 26, 26, 26, 26, 24, 24, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 27, 27, - 27, 26, 26, 26, 26, 24, 24, 24, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 30, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 27, 27, 27, 26, 26, 26, - 26, 24, 24, 24, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, - 29, 29, 29, 28, 28, 28, 28, 28, 27, 27, 27, 26, 26, 26, 26, 24, 24, 24, - 30, 30, 30, 30, 30, 30, 31, 31, 31, 30, 29, 29, 29, 28, 28, 28, 28, 27, - 26, 26, 26, 25, 24, 24, 24, 23, 23, 23, 23, 22, 21, 21, 28, 29, 29, 29, - 29, 29, 30, 30, 30, 29, 28, 28, 28, 28, 27, 27, 27, 26, 24, 24, 24, 23, - 21, 21, 21, 21, 20, 20, 20, 20, 19, 19, 28, 29, 29, 29, 29, 29, 30, 30, - 30, 29, 28, 28, 28, 28, 27, 27, 27, 26, 24, 24, 24, 23, 21, 21, 21, 21, - 20, 20, 20, 20, 19, 19, 28, 29, 29, 29, 29, 29, 30, 30, 30, 29, 28, 28, - 28, 28, 27, 27, 27, 26, 24, 24, 24, 23, 21, 21, 21, 21, 20, 20, 20, 20, - 19, 19, 26, 26, 27, 27, 27, 27, 28, 28, 28, 27, 26, 26, 26, 26, 26, 26, - 26, 24, 23, 23, 23, 22, 20, 20, 20, 20, 19, 19, 19, 18, 18, 18, 23, 24, - 24, 24, 24, 25, 25, 25, 25, 25, 24, 24, 24, 24, 24, 24, 24, 23, 21, 21, - 21, 20, 19, 19, 19, 18, 18, 18, 18, 17, 16, 16, 23, 24, 24, 24, 24, 25, - 25, 25, 25, 25, 24, 24, 24, 24, 24, 24, 24, 23, 21, 21, 21, 20, 19, 19, - 19, 18, 18, 18, 18, 17, 16, 16, - /* Size 4x16 */ - 33, 32, 30, 26, 32, 32, 30, 27, 32, 32, 30, 27, 32, 32, 31, 28, 32, 32, - 31, 28, 32, 31, 29, 26, 32, 31, 29, 26, 32, 30, 28, 26, 32, 30, 28, 26, - 30, 29, 26, 23, 30, 29, 26, 23, 29, 28, 24, 20, 29, 28, 24, 20, 27, 26, - 23, 19, 27, 26, 23, 19, 24, 24, 21, 18, - /* Size 16x4 */ - 33, 32, 32, 32, 32, 32, 32, 32, 32, 30, 30, 29, 29, 27, 27, 24, 32, 32, - 32, 32, 32, 31, 31, 30, 30, 29, 29, 28, 28, 26, 26, 24, 30, 30, 30, 31, - 31, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 21, 26, 27, 27, 28, 28, 26, - 26, 26, 26, 23, 23, 20, 20, 19, 19, 18, - /* Size 8x32 */ - 32, 33, 33, 32, 32, 28, 28, 23, 33, 33, 33, 32, 32, 29, 29, 24, 33, 32, - 32, 32, 32, 29, 29, 24, 33, 32, 32, 32, 32, 29, 29, 24, 33, 32, 32, 32, - 32, 29, 29, 24, 33, 32, 32, 32, 32, 29, 29, 25, 33, 32, 32, 31, 31, 30, - 30, 25, 33, 32, 32, 31, 31, 30, 30, 25, 33, 32, 32, 31, 31, 30, 30, 25, - 33, 32, 32, 31, 31, 29, 29, 25, 32, 32, 32, 30, 30, 28, 28, 24, 32, 32, - 32, 30, 30, 28, 28, 24, 32, 32, 32, 30, 30, 28, 28, 24, 32, 32, 32, 30, - 30, 28, 28, 24, 32, 31, 31, 29, 29, 27, 27, 24, 32, 31, 31, 29, 29, 27, - 27, 24, 32, 31, 31, 29, 29, 27, 27, 24, 31, 31, 31, 28, 28, 26, 26, 23, - 30, 30, 30, 28, 28, 24, 24, 21, 30, 30, 30, 28, 28, 24, 24, 21, 30, 30, - 30, 28, 28, 24, 24, 21, 29, 30, 30, 28, 28, 23, 23, 20, 28, 30, 30, 27, - 27, 21, 21, 19, 28, 30, 30, 27, 27, 21, 21, 19, 28, 30, 30, 27, 27, 21, - 21, 19, 28, 28, 28, 26, 26, 21, 21, 18, 26, 28, 28, 26, 26, 20, 20, 18, - 26, 28, 28, 26, 26, 20, 20, 18, 26, 28, 28, 26, 26, 20, 20, 18, 25, 26, - 26, 24, 24, 20, 20, 17, 23, 25, 25, 24, 24, 19, 19, 16, 23, 25, 25, 24, - 24, 19, 19, 16, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, - 30, 30, 30, 29, 28, 28, 28, 28, 26, 26, 26, 25, 23, 23, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, - 30, 30, 30, 28, 28, 28, 28, 26, 25, 25, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 28, - 28, 28, 28, 26, 25, 25, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, - 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 27, 27, 27, 26, 26, 26, 26, 24, - 24, 24, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29, - 29, 28, 28, 28, 28, 28, 27, 27, 27, 26, 26, 26, 26, 24, 24, 24, 28, 29, - 29, 29, 29, 29, 30, 30, 30, 29, 28, 28, 28, 28, 27, 27, 27, 26, 24, 24, - 24, 23, 21, 21, 21, 21, 20, 20, 20, 20, 19, 19, 28, 29, 29, 29, 29, 29, - 30, 30, 30, 29, 28, 28, 28, 28, 27, 27, 27, 26, 24, 24, 24, 23, 21, 21, - 21, 21, 20, 20, 20, 20, 19, 19, 23, 24, 24, 24, 24, 25, 25, 25, 25, 25, - 24, 24, 24, 24, 24, 24, 24, 23, 21, 21, 21, 20, 19, 19, 19, 18, 18, 18, - 18, 17, 16, 16 }, - { /* Chroma */ - /* Size 4x4 */ - 33, 30, 24, 22, 30, 26, 23, 22, 24, 23, 21, 21, 22, 22, 21, 19, - /* Size 8x8 */ - 33, 33, 32, 29, 26, 23, 21, 21, 33, 33, 31, 28, 25, 23, 22, 22, 32, 31, - 29, 26, 24, 23, 22, 23, 29, 28, 26, 24, 23, 22, 22, 22, 26, 25, 24, 23, - 22, 21, 21, 22, 23, 23, 23, 22, 21, 20, 20, 20, 21, 22, 22, 22, 21, 20, - 19, 19, 21, 22, 23, 22, 22, 20, 19, 18, - /* Size 16x16 */ - 32, 33, 33, 34, 34, 31, 31, 28, 28, 25, 25, 21, 21, 21, 21, 21, 33, 33, - 33, 33, 33, 30, 30, 27, 27, 24, 24, 22, 22, 22, 22, 22, 33, 33, 33, 33, - 33, 30, 30, 27, 27, 24, 24, 22, 22, 22, 22, 22, 34, 33, 33, 32, 32, 29, - 29, 26, 26, 24, 24, 22, 22, 23, 23, 23, 34, 33, 33, 32, 32, 29, 29, 26, - 26, 24, 24, 22, 22, 23, 23, 23, 31, 30, 30, 29, 29, 26, 26, 24, 24, 23, - 23, 22, 22, 22, 22, 23, 31, 30, 30, 29, 29, 26, 26, 24, 24, 23, 23, 22, - 22, 22, 22, 23, 28, 27, 27, 26, 26, 24, 24, 22, 22, 22, 22, 21, 21, 22, - 22, 23, 28, 27, 27, 26, 26, 24, 24, 22, 22, 22, 22, 21, 21, 22, 22, 23, - 25, 24, 24, 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, 21, 21, 21, 25, 24, - 24, 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, 21, 21, 21, 21, 22, 22, 22, - 22, 22, 22, 21, 21, 20, 20, 19, 19, 19, 19, 19, 21, 22, 22, 22, 22, 22, - 22, 21, 21, 20, 20, 19, 19, 19, 19, 19, 21, 22, 22, 23, 23, 22, 22, 22, - 22, 21, 21, 19, 19, 19, 19, 19, 21, 22, 22, 23, 23, 22, 22, 22, 22, 21, - 21, 19, 19, 19, 19, 19, 21, 22, 22, 23, 23, 23, 23, 23, 23, 21, 21, 19, - 19, 19, 19, 18, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 33, 34, 34, 34, 32, 31, 31, 31, 29, 28, 28, 28, 26, - 25, 25, 25, 23, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 32, 30, 30, 30, 29, 28, 28, 28, 26, 24, 24, 24, 23, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 31, 30, 30, 30, 28, 27, 27, 27, 26, 24, 24, 24, 23, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 30, 30, - 30, 28, 27, 27, 27, 26, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 30, 30, 30, 28, 27, 27, - 27, 26, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 31, 29, 29, 29, 28, 26, 26, 26, 25, 24, 24, - 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 34, 33, 33, 33, 33, 33, - 32, 32, 32, 31, 29, 29, 29, 28, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, - 22, 22, 23, 23, 23, 23, 23, 23, 34, 33, 33, 33, 33, 33, 32, 32, 32, 31, - 29, 29, 29, 28, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 23, 23, - 23, 23, 23, 23, 34, 33, 33, 33, 33, 33, 32, 32, 32, 31, 29, 29, 29, 28, - 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, - 32, 32, 31, 31, 31, 31, 31, 31, 31, 29, 28, 28, 28, 26, 25, 25, 25, 24, - 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 31, 30, 30, 30, - 30, 29, 29, 29, 29, 28, 26, 26, 26, 25, 24, 24, 24, 23, 23, 23, 23, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 31, 30, 30, 30, 30, 29, 29, 29, - 29, 28, 26, 26, 26, 25, 24, 24, 24, 23, 23, 23, 23, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 23, 23, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 26, 26, - 26, 25, 24, 24, 24, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 23, 23, 29, 29, 28, 28, 28, 28, 28, 28, 28, 26, 25, 25, 25, 24, 23, 23, - 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 28, 28, - 27, 27, 27, 26, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, - 22, 22, 21, 21, 21, 22, 22, 22, 22, 22, 23, 23, 28, 28, 27, 27, 27, 26, - 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, - 21, 22, 22, 22, 22, 22, 23, 23, 28, 28, 27, 27, 27, 26, 26, 26, 26, 25, - 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 22, 22, - 22, 22, 23, 23, 26, 26, 26, 26, 26, 25, 25, 25, 25, 24, 23, 23, 23, 23, - 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, - 25, 24, 24, 24, 24, 24, 24, 24, 24, 24, 23, 23, 23, 22, 22, 22, 22, 21, - 21, 21, 21, 21, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 25, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 23, 23, 23, 22, 22, 22, 22, 21, 21, 21, 21, 21, - 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 25, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 23, 23, 23, 22, 22, 22, 22, 21, 21, 21, 21, 21, 20, 20, 20, 20, - 21, 21, 21, 21, 21, 21, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 22, 22, - 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, - 21, 21, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20, - 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 22, - 22, 22, 22, 21, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 21, 21, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, - 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, - 22, 22, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 23, 23, - 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 19, 19, 19, 19, - 19, 19, 19, 18, 18, 18, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 22, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 18, - 18, 18, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 22, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, - /* Size 4x8 */ - 33, 30, 24, 21, 33, 29, 24, 22, 31, 28, 23, 22, 28, 25, 22, 22, 26, 23, - 21, 21, 23, 22, 21, 20, 22, 22, 20, 19, 22, 22, 21, 19, - /* Size 8x4 */ - 33, 33, 31, 28, 26, 23, 22, 22, 30, 29, 28, 25, 23, 22, 22, 22, 24, 24, - 23, 22, 21, 21, 20, 21, 21, 22, 22, 22, 21, 20, 19, 19, - /* Size 8x16 */ - 32, 33, 33, 28, 28, 21, 21, 21, 33, 33, 33, 27, 27, 22, 22, 22, 33, 33, - 33, 27, 27, 22, 22, 22, 34, 32, 32, 26, 26, 22, 22, 23, 34, 32, 32, 26, - 26, 22, 22, 23, 31, 28, 28, 24, 24, 22, 22, 22, 31, 28, 28, 24, 24, 22, - 22, 22, 28, 26, 26, 22, 22, 22, 22, 23, 28, 26, 26, 22, 22, 22, 22, 23, - 24, 24, 24, 22, 22, 20, 20, 21, 24, 24, 24, 22, 22, 20, 20, 21, 21, 22, - 22, 21, 21, 19, 19, 19, 21, 22, 22, 21, 21, 19, 19, 19, 21, 22, 22, 22, - 22, 19, 19, 18, 21, 22, 22, 22, 22, 19, 19, 18, 21, 23, 23, 22, 22, 19, - 19, 18, - /* Size 16x8 */ - 32, 33, 33, 34, 34, 31, 31, 28, 28, 24, 24, 21, 21, 21, 21, 21, 33, 33, - 33, 32, 32, 28, 28, 26, 26, 24, 24, 22, 22, 22, 22, 23, 33, 33, 33, 32, - 32, 28, 28, 26, 26, 24, 24, 22, 22, 22, 22, 23, 28, 27, 27, 26, 26, 24, - 24, 22, 22, 22, 22, 21, 21, 22, 22, 22, 28, 27, 27, 26, 26, 24, 24, 22, - 22, 22, 22, 21, 21, 22, 22, 22, 21, 22, 22, 22, 22, 22, 22, 22, 22, 20, - 20, 19, 19, 19, 19, 19, 21, 22, 22, 22, 22, 22, 22, 22, 22, 20, 20, 19, - 19, 19, 19, 19, 21, 22, 22, 23, 23, 22, 22, 23, 23, 21, 21, 19, 19, 18, - 18, 18, - /* Size 16x32 */ - 32, 33, 33, 33, 33, 31, 28, 28, 28, 24, 21, 21, 21, 21, 21, 21, 33, 33, - 33, 33, 33, 30, 28, 28, 28, 24, 22, 22, 22, 21, 21, 21, 33, 33, 33, 33, - 33, 30, 27, 27, 27, 24, 22, 22, 22, 22, 22, 22, 33, 33, 33, 33, 33, 30, - 27, 27, 27, 24, 22, 22, 22, 22, 22, 22, 33, 33, 33, 33, 33, 30, 27, 27, - 27, 24, 22, 22, 22, 22, 22, 22, 33, 33, 32, 32, 32, 29, 26, 26, 26, 24, - 22, 22, 22, 22, 22, 22, 34, 33, 32, 32, 32, 29, 26, 26, 26, 24, 22, 22, - 22, 23, 23, 23, 34, 33, 32, 32, 32, 29, 26, 26, 26, 24, 22, 22, 22, 23, - 23, 23, 34, 33, 32, 32, 32, 29, 26, 26, 26, 24, 22, 22, 22, 23, 23, 23, - 32, 31, 30, 30, 30, 28, 25, 25, 25, 23, 22, 22, 22, 22, 23, 23, 31, 30, - 28, 28, 28, 26, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 31, 30, 28, 28, - 28, 26, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 31, 30, 28, 28, 28, 26, - 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 29, 28, 27, 27, 27, 25, 23, 23, - 23, 22, 22, 22, 22, 22, 23, 23, 28, 27, 26, 26, 26, 24, 22, 22, 22, 22, - 22, 22, 22, 22, 23, 23, 28, 27, 26, 26, 26, 24, 22, 22, 22, 22, 22, 22, - 22, 22, 23, 23, 28, 27, 26, 26, 26, 24, 22, 22, 22, 22, 22, 22, 22, 22, - 23, 23, 26, 26, 25, 25, 25, 23, 22, 22, 22, 21, 21, 21, 21, 21, 22, 22, - 24, 24, 24, 24, 24, 23, 22, 22, 22, 21, 20, 20, 20, 20, 21, 21, 24, 24, - 24, 24, 24, 23, 22, 22, 22, 21, 20, 20, 20, 20, 21, 21, 24, 24, 24, 24, - 24, 23, 22, 22, 22, 21, 20, 20, 20, 20, 21, 21, 23, 23, 23, 23, 23, 22, - 22, 22, 22, 21, 20, 20, 20, 20, 20, 20, 21, 21, 22, 22, 22, 22, 21, 21, - 21, 20, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 21, 21, 21, 20, - 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 21, 21, 21, 20, 19, 19, - 19, 19, 19, 19, 21, 22, 22, 22, 22, 22, 22, 22, 22, 20, 19, 19, 19, 19, - 19, 19, 21, 22, 22, 22, 22, 22, 22, 22, 22, 20, 19, 19, 19, 19, 18, 18, - 21, 22, 22, 22, 22, 22, 22, 22, 22, 20, 19, 19, 19, 19, 18, 18, 21, 22, - 22, 22, 22, 22, 22, 22, 22, 20, 19, 19, 19, 19, 18, 18, 21, 22, 23, 23, - 23, 22, 22, 22, 22, 21, 19, 19, 19, 19, 18, 18, 21, 22, 23, 23, 23, 23, - 22, 22, 22, 21, 19, 19, 19, 18, 18, 18, 21, 22, 23, 23, 23, 23, 22, 22, - 22, 21, 19, 19, 19, 18, 18, 18, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 33, 34, 34, 34, 32, 31, 31, 31, 29, 28, 28, 28, 26, - 24, 24, 24, 23, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 31, 30, 30, 30, 28, 27, 27, 27, 26, 24, 24, 24, 23, - 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 33, 33, 33, 33, 33, 32, 32, 32, - 32, 30, 28, 28, 28, 27, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, - 22, 22, 22, 23, 23, 23, 33, 33, 33, 33, 33, 32, 32, 32, 32, 30, 28, 28, - 28, 27, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 23, - 23, 23, 33, 33, 33, 33, 33, 32, 32, 32, 32, 30, 28, 28, 28, 27, 26, 26, - 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 31, 30, - 30, 30, 30, 29, 29, 29, 29, 28, 26, 26, 26, 25, 24, 24, 24, 23, 23, 23, - 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 28, 28, 27, 27, 27, 26, - 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, - 21, 22, 22, 22, 22, 22, 22, 22, 28, 28, 27, 27, 27, 26, 26, 26, 26, 25, - 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 22, 22, - 22, 22, 22, 22, 28, 28, 27, 27, 27, 26, 26, 26, 26, 25, 24, 24, 24, 23, - 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 23, 23, 23, 23, 22, 22, 22, 22, 21, - 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 20, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 20, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 21, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 21, 21, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, - 22, 21, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 21, 21, - 22, 22, 22, 22, 23, 23, 23, 23, 22, 22, 22, 23, 23, 23, 23, 22, 21, 21, - 21, 20, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 21, 21, 22, 22, 22, 22, - 23, 23, 23, 23, 22, 22, 22, 23, 23, 23, 23, 22, 21, 21, 21, 20, 19, 19, - 19, 19, 18, 18, 18, 18, 18, 18, - /* Size 4x16 */ - 33, 31, 24, 21, 33, 30, 24, 22, 33, 30, 24, 22, 33, 29, 24, 23, 33, 29, - 24, 23, 30, 26, 23, 22, 30, 26, 23, 22, 27, 24, 22, 22, 27, 24, 22, 22, - 24, 23, 21, 20, 24, 23, 21, 20, 21, 22, 20, 19, 21, 22, 20, 19, 22, 22, - 20, 19, 22, 22, 20, 19, 22, 23, 21, 18, - /* Size 16x4 */ - 33, 33, 33, 33, 33, 30, 30, 27, 27, 24, 24, 21, 21, 22, 22, 22, 31, 30, - 30, 29, 29, 26, 26, 24, 24, 23, 23, 22, 22, 22, 22, 23, 24, 24, 24, 24, - 24, 23, 23, 22, 22, 21, 21, 20, 20, 20, 20, 21, 21, 22, 22, 23, 23, 22, - 22, 22, 22, 20, 20, 19, 19, 19, 19, 18, - /* Size 8x32 */ - 32, 33, 33, 28, 28, 21, 21, 21, 33, 33, 33, 28, 28, 22, 22, 21, 33, 33, - 33, 27, 27, 22, 22, 22, 33, 33, 33, 27, 27, 22, 22, 22, 33, 33, 33, 27, - 27, 22, 22, 22, 33, 32, 32, 26, 26, 22, 22, 22, 34, 32, 32, 26, 26, 22, - 22, 23, 34, 32, 32, 26, 26, 22, 22, 23, 34, 32, 32, 26, 26, 22, 22, 23, - 32, 30, 30, 25, 25, 22, 22, 23, 31, 28, 28, 24, 24, 22, 22, 22, 31, 28, - 28, 24, 24, 22, 22, 22, 31, 28, 28, 24, 24, 22, 22, 22, 29, 27, 27, 23, - 23, 22, 22, 23, 28, 26, 26, 22, 22, 22, 22, 23, 28, 26, 26, 22, 22, 22, - 22, 23, 28, 26, 26, 22, 22, 22, 22, 23, 26, 25, 25, 22, 22, 21, 21, 22, - 24, 24, 24, 22, 22, 20, 20, 21, 24, 24, 24, 22, 22, 20, 20, 21, 24, 24, - 24, 22, 22, 20, 20, 21, 23, 23, 23, 22, 22, 20, 20, 20, 21, 22, 22, 21, - 21, 19, 19, 19, 21, 22, 22, 21, 21, 19, 19, 19, 21, 22, 22, 21, 21, 19, - 19, 19, 21, 22, 22, 22, 22, 19, 19, 19, 21, 22, 22, 22, 22, 19, 19, 18, - 21, 22, 22, 22, 22, 19, 19, 18, 21, 22, 22, 22, 22, 19, 19, 18, 21, 23, - 23, 22, 22, 19, 19, 18, 21, 23, 23, 22, 22, 19, 19, 18, 21, 23, 23, 22, - 22, 19, 19, 18, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 33, 34, 34, 34, 32, 31, 31, 31, 29, 28, 28, 28, 26, - 24, 24, 24, 23, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 33, 33, 33, 33, - 33, 32, 32, 32, 32, 30, 28, 28, 28, 27, 26, 26, 26, 25, 24, 24, 24, 23, - 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 33, 33, 33, 33, 33, 32, 32, 32, - 32, 30, 28, 28, 28, 27, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, - 22, 22, 22, 23, 23, 23, 28, 28, 27, 27, 27, 26, 26, 26, 26, 25, 24, 24, - 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 22, 22, 22, 22, - 22, 22, 28, 28, 27, 27, 27, 26, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, - 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 21, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, - 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 20, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, - 22, 22, 22, 23, 23, 23, 23, 22, 21, 21, 21, 20, 19, 19, 19, 19, 18, 18, - 18, 18, 18, 18 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 32, 32, 29, 32, 32, 31, 29, 32, 31, 29, 27, 29, 29, 27, 22, - /* Size 8x8 */ - 33, 33, 33, 32, 32, 32, 30, 29, 33, 32, 32, 32, 32, 31, 30, 29, 33, 32, - 32, 32, 32, 31, 31, 30, 32, 32, 32, 31, 30, 30, 29, 28, 32, 32, 32, 30, - 29, 29, 28, 27, 32, 31, 31, 30, 29, 28, 27, 26, 30, 30, 31, 29, 28, 27, - 26, 24, 29, 29, 30, 28, 27, 26, 24, 21, - /* Size 16x16 */ - 32, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 31, 30, 30, 28, 28, 33, 33, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 31, 31, 30, 30, 30, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 30, 30, 30, 33, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 30, - 29, 29, 28, 28, 33, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 29, 29, 28, - 28, 28, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 29, 28, 28, 28, 28, - 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 32, 32, - 32, 32, 32, 32, 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 31, 31, 31, 31, - 31, 31, 30, 29, 29, 28, 28, 27, 26, 26, 24, 24, 30, 30, 30, 30, 31, 31, - 29, 29, 28, 28, 28, 26, 26, 25, 24, 24, 30, 30, 30, 30, 30, 30, 29, 28, - 28, 28, 28, 26, 25, 24, 23, 23, 28, 29, 29, 29, 30, 30, 28, 28, 28, 27, - 27, 24, 24, 23, 21, 21, 28, 29, 29, 29, 30, 30, 28, 28, 28, 27, 27, 24, - 24, 23, 21, 21, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, - 32, 32, 32, 32, 31, 30, 30, 30, 30, 29, 28, 28, 28, 28, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 33, 33, 33, 33, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, - 30, 29, 29, 29, 29, 28, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 30, 29, 29, 29, - 29, 28, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 33, 33, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, - 30, 30, 30, 30, 29, 29, 29, 28, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 30, 30, - 30, 30, 30, 29, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29, - 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, - 31, 31, 31, 31, 30, 30, 30, 30, 30, 29, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, - 30, 29, 29, 29, 29, 28, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 29, 29, 29, 29, 28, 28, 28, - 28, 28, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, - 31, 30, 30, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, - 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29, 29, - 29, 29, 28, 28, 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29, 29, 28, 28, 28, 28, 28, - 28, 28, 28, 27, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, - 30, 30, 30, 29, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 26, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 30, 29, - 29, 29, 29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, - 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, - 28, 27, 27, 27, 27, 26, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 30, 30, 30, 30, 29, 29, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 26, 26, - 26, 25, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 29, 29, 29, - 29, 28, 28, 28, 28, 28, 27, 26, 26, 26, 26, 25, 24, 24, 24, 24, 30, 30, - 30, 30, 30, 30, 30, 31, 31, 31, 31, 30, 29, 29, 29, 29, 28, 28, 28, 28, - 28, 27, 26, 26, 26, 26, 25, 24, 24, 24, 24, 24, 30, 30, 30, 30, 30, 30, - 30, 31, 31, 31, 31, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 27, 26, 26, - 26, 26, 25, 24, 24, 24, 24, 24, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, - 31, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 27, 26, 26, 26, 26, 25, 24, - 24, 24, 24, 24, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29, 28, - 28, 28, 28, 28, 28, 28, 28, 27, 26, 25, 25, 25, 24, 23, 23, 23, 23, 23, - 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 29, 28, 28, 28, 28, 28, 28, - 27, 27, 27, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 21, 28, 29, 29, 29, - 29, 29, 29, 30, 30, 30, 30, 29, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, - 24, 24, 24, 24, 23, 22, 21, 21, 21, 21, 28, 29, 29, 29, 29, 29, 29, 30, - 30, 30, 30, 29, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 24, 24, 24, 24, - 23, 22, 21, 21, 21, 21, 28, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 29, - 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 24, 24, 24, 24, 23, 22, 21, 21, - 21, 21, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 28, 28, 28, 28, 28, - 27, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 21, 21, 21, 21, 20, - /* Size 4x8 */ - 33, 33, 32, 29, 32, 32, 32, 29, 32, 32, 31, 30, 32, 32, 30, 28, 32, 31, - 29, 27, 31, 31, 28, 26, 30, 30, 28, 24, 29, 30, 27, 21, - /* Size 8x4 */ - 33, 32, 32, 32, 32, 31, 30, 29, 33, 32, 32, 32, 31, 31, 30, 30, 32, 32, - 31, 30, 29, 28, 28, 27, 29, 29, 30, 28, 27, 26, 24, 21, - /* Size 8x16 */ - 32, 33, 33, 33, 32, 32, 29, 28, 33, 32, 32, 32, 32, 32, 29, 29, 33, 32, - 32, 32, 32, 32, 29, 29, 33, 32, 32, 32, 32, 32, 30, 29, 33, 32, 32, 32, - 31, 31, 30, 30, 33, 32, 32, 32, 31, 31, 30, 30, 33, 32, 32, 31, 30, 30, - 29, 28, 32, 32, 32, 31, 30, 30, 28, 28, 32, 32, 32, 31, 30, 30, 28, 28, - 32, 32, 31, 30, 29, 29, 28, 27, 32, 32, 31, 30, 29, 29, 28, 27, 31, 31, - 31, 29, 28, 28, 26, 25, 30, 30, 30, 29, 28, 28, 25, 24, 30, 30, 30, 29, - 28, 28, 24, 23, 28, 29, 30, 28, 27, 27, 22, 21, 28, 29, 30, 28, 27, 27, - 22, 21, - /* Size 16x8 */ - 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 31, 30, 30, 28, 28, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 33, 32, 32, 32, 32, 32, - 31, 31, 31, 30, 30, 29, 29, 29, 28, 28, 32, 32, 32, 32, 31, 31, 30, 30, - 30, 29, 29, 28, 28, 28, 27, 27, 32, 32, 32, 32, 31, 31, 30, 30, 30, 29, - 29, 28, 28, 28, 27, 27, 29, 29, 29, 30, 30, 30, 29, 28, 28, 28, 28, 26, - 25, 24, 22, 22, 28, 29, 29, 29, 30, 30, 28, 28, 28, 27, 27, 25, 24, 23, - 21, 21, - /* Size 16x32 */ - 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 31, 29, 28, 28, 28, 33, 33, - 33, 33, 33, 33, 32, 32, 32, 32, 32, 31, 29, 29, 29, 29, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 31, 29, 29, 29, 29, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 29, 29, 29, 29, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 31, 29, 29, 29, 29, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 29, 29, 29, 29, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, - 30, 29, 29, 29, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 29, - 29, 29, 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, - 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 33, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 33, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 31, 30, 29, 29, 29, 29, 33, 32, 32, 32, 32, 32, - 31, 31, 30, 30, 30, 30, 29, 28, 28, 28, 32, 32, 32, 32, 32, 32, 31, 30, - 30, 30, 30, 29, 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, - 30, 29, 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 30, 29, - 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 30, 29, 28, 28, - 28, 28, 32, 32, 32, 31, 31, 31, 31, 30, 29, 29, 29, 28, 28, 27, 27, 27, - 32, 32, 32, 31, 31, 31, 30, 29, 29, 29, 29, 28, 28, 27, 27, 27, 32, 32, - 32, 31, 31, 31, 30, 29, 29, 29, 29, 28, 28, 27, 27, 27, 32, 32, 32, 31, - 31, 31, 30, 29, 29, 29, 29, 28, 28, 27, 27, 27, 32, 31, 31, 31, 31, 31, - 30, 29, 28, 28, 28, 28, 26, 26, 26, 26, 31, 31, 31, 31, 31, 31, 29, 28, - 28, 28, 28, 27, 26, 25, 25, 25, 30, 30, 30, 30, 30, 30, 29, 28, 28, 28, - 28, 26, 25, 24, 24, 24, 30, 30, 30, 30, 30, 30, 29, 28, 28, 28, 28, 26, - 25, 24, 24, 24, 30, 30, 30, 30, 30, 30, 29, 28, 28, 28, 28, 26, 25, 24, - 24, 24, 30, 30, 30, 30, 30, 30, 29, 28, 28, 28, 28, 26, 24, 23, 23, 23, - 29, 29, 30, 30, 30, 30, 28, 28, 27, 27, 27, 25, 23, 22, 22, 22, 28, 29, - 29, 30, 30, 30, 28, 28, 27, 27, 27, 24, 22, 21, 21, 21, 28, 29, 29, 30, - 30, 30, 28, 28, 27, 27, 27, 24, 22, 21, 21, 21, 28, 29, 29, 30, 30, 30, - 28, 28, 27, 27, 27, 24, 22, 21, 21, 21, 28, 28, 28, 28, 28, 28, 28, 27, - 26, 26, 26, 24, 22, 21, 21, 21, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 30, 30, 30, 30, 29, 28, 28, 28, 28, 33, 33, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, - 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, - 30, 30, 29, 29, 29, 28, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, - 30, 28, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 30, 28, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, - 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 30, 28, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29, - 29, 29, 29, 28, 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 30, 30, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 27, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, - 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29, - 29, 29, 29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29, 29, 29, 29, 28, - 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 27, 26, 26, 26, - 26, 25, 24, 24, 24, 24, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 29, - 29, 28, 28, 28, 28, 28, 28, 28, 28, 26, 26, 25, 25, 25, 24, 23, 22, 22, - 22, 22, 28, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 29, 28, 28, 28, 28, - 28, 27, 27, 27, 27, 26, 25, 24, 24, 24, 23, 22, 21, 21, 21, 21, 28, 29, - 29, 29, 29, 29, 29, 29, 30, 30, 30, 29, 28, 28, 28, 28, 28, 27, 27, 27, - 27, 26, 25, 24, 24, 24, 23, 22, 21, 21, 21, 21, 28, 29, 29, 29, 29, 29, - 29, 29, 30, 30, 30, 29, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 25, 24, - 24, 24, 23, 22, 21, 21, 21, 21, - /* Size 4x16 */ - 33, 33, 32, 28, 33, 32, 32, 29, 32, 32, 32, 29, 32, 32, 32, 29, 32, 32, - 31, 30, 32, 32, 31, 30, 32, 32, 30, 28, 32, 32, 30, 28, 32, 32, 30, 28, - 32, 31, 29, 27, 32, 31, 29, 27, 31, 31, 28, 25, 30, 30, 28, 24, 30, 30, - 28, 23, 29, 30, 27, 21, 29, 30, 27, 21, - /* Size 16x4 */ - 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 32, 32, 32, 32, - 31, 31, 30, 30, 30, 29, 29, 28, 28, 28, 27, 27, 28, 29, 29, 29, 30, 30, - 28, 28, 28, 27, 27, 25, 24, 23, 21, 21, - /* Size 8x32 */ - 32, 33, 33, 33, 32, 32, 29, 28, 33, 33, 33, 32, 32, 32, 29, 29, 33, 32, - 32, 32, 32, 32, 29, 29, 33, 32, 32, 32, 32, 32, 29, 29, 33, 32, 32, 32, - 32, 32, 29, 29, 33, 32, 32, 32, 32, 32, 29, 29, 33, 32, 32, 32, 32, 32, - 30, 29, 33, 32, 32, 32, 32, 32, 30, 29, 33, 32, 32, 32, 31, 31, 30, 30, - 33, 32, 32, 32, 31, 31, 30, 30, 33, 32, 32, 32, 31, 31, 30, 30, 33, 32, - 32, 31, 31, 31, 29, 29, 33, 32, 32, 31, 30, 30, 29, 28, 32, 32, 32, 31, - 30, 30, 28, 28, 32, 32, 32, 31, 30, 30, 28, 28, 32, 32, 32, 31, 30, 30, - 28, 28, 32, 32, 32, 31, 30, 30, 28, 28, 32, 32, 31, 31, 29, 29, 28, 27, - 32, 32, 31, 30, 29, 29, 28, 27, 32, 32, 31, 30, 29, 29, 28, 27, 32, 32, - 31, 30, 29, 29, 28, 27, 32, 31, 31, 30, 28, 28, 26, 26, 31, 31, 31, 29, - 28, 28, 26, 25, 30, 30, 30, 29, 28, 28, 25, 24, 30, 30, 30, 29, 28, 28, - 25, 24, 30, 30, 30, 29, 28, 28, 25, 24, 30, 30, 30, 29, 28, 28, 24, 23, - 29, 30, 30, 28, 27, 27, 23, 22, 28, 29, 30, 28, 27, 27, 22, 21, 28, 29, - 30, 28, 27, 27, 22, 21, 28, 29, 30, 28, 27, 27, 22, 21, 28, 28, 28, 28, - 26, 26, 22, 21, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 30, 30, 30, 30, 29, 28, 28, 28, 28, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, - 31, 30, 30, 30, 30, 30, 29, 29, 29, 28, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, - 30, 30, 30, 30, 30, 28, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, - 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 29, 28, 28, 28, - 28, 28, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, - 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29, 29, 29, - 29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 29, 29, 29, 29, 29, 29, - 30, 30, 30, 30, 30, 29, 29, 28, 28, 28, 28, 28, 28, 28, 28, 26, 26, 25, - 25, 25, 24, 23, 22, 22, 22, 22, 28, 29, 29, 29, 29, 29, 29, 29, 30, 30, - 30, 29, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 25, 24, 24, 24, 23, 22, - 21, 21, 21, 21 }, - { /* Chroma */ - /* Size 4x4 */ - 33, 32, 27, 22, 32, 30, 25, 22, 27, 25, 22, 22, 22, 22, 22, 20, - /* Size 8x8 */ - 33, 33, 34, 30, 28, 26, 24, 21, 33, 33, 33, 30, 28, 26, 24, 22, 34, 33, - 32, 29, 26, 25, 24, 22, 30, 30, 29, 26, 24, 23, 23, 22, 28, 28, 26, 24, - 22, 22, 22, 22, 26, 26, 25, 23, 22, 22, 21, 21, 24, 24, 24, 23, 22, 21, - 21, 20, 21, 22, 22, 22, 22, 21, 20, 19, - /* Size 16x16 */ - 32, 33, 33, 33, 34, 34, 31, 31, 30, 28, 28, 26, 25, 23, 21, 21, 33, 33, - 33, 33, 33, 33, 31, 30, 28, 27, 27, 25, 24, 23, 21, 21, 33, 33, 33, 33, - 33, 33, 30, 30, 28, 27, 27, 25, 24, 23, 22, 22, 33, 33, 33, 33, 33, 33, - 30, 29, 28, 26, 26, 25, 24, 23, 22, 22, 34, 33, 33, 33, 32, 32, 30, 29, - 28, 26, 26, 24, 24, 23, 22, 22, 34, 33, 33, 33, 32, 32, 30, 29, 28, 26, - 26, 24, 24, 23, 22, 22, 31, 31, 30, 30, 30, 30, 28, 27, 26, 24, 24, 23, - 23, 23, 22, 22, 31, 30, 30, 29, 29, 29, 27, 26, 26, 24, 24, 23, 23, 22, - 22, 22, 30, 28, 28, 28, 28, 28, 26, 26, 24, 23, 23, 23, 22, 22, 22, 22, - 28, 27, 27, 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 21, 21, 28, 27, - 27, 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 21, 21, 26, 25, 25, 25, - 24, 24, 23, 23, 23, 22, 22, 21, 21, 21, 20, 20, 25, 24, 24, 24, 24, 24, - 23, 23, 22, 22, 22, 21, 21, 21, 20, 20, 23, 23, 23, 23, 23, 23, 23, 22, - 22, 22, 22, 21, 21, 20, 20, 20, 21, 21, 22, 22, 22, 22, 22, 22, 22, 21, - 21, 20, 20, 20, 19, 19, 21, 21, 22, 22, 22, 22, 22, 22, 22, 21, 21, 20, - 20, 20, 19, 19, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 33, 31, 31, 31, 31, 30, 28, - 28, 28, 28, 27, 26, 25, 25, 25, 23, 22, 21, 21, 21, 21, 33, 33, 33, 33, - 33, 33, 33, 33, 34, 34, 34, 32, 31, 30, 30, 30, 29, 28, 28, 28, 28, 26, - 25, 24, 24, 24, 23, 22, 21, 21, 21, 21, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 32, 31, 30, 30, 30, 28, 28, 27, 27, 27, 26, 25, 24, 24, 24, - 23, 22, 21, 21, 21, 22, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, - 30, 30, 30, 30, 28, 28, 27, 27, 27, 26, 25, 24, 24, 24, 23, 22, 22, 22, - 22, 22, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 30, 30, 30, 30, - 28, 28, 27, 27, 27, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 22, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 30, 30, 30, 30, 28, 28, 27, 27, - 27, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 22, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 31, 30, 29, 29, 29, 28, 27, 26, 26, 26, 26, 25, 24, - 24, 24, 23, 22, 22, 22, 22, 22, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 31, 30, 29, 29, 29, 28, 27, 26, 26, 26, 25, 24, 24, 24, 24, 23, 22, - 22, 22, 22, 22, 34, 34, 33, 33, 33, 33, 33, 33, 32, 32, 32, 31, 30, 29, - 29, 29, 28, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 23, 22, 22, 22, 22, - 34, 34, 33, 33, 33, 33, 33, 33, 32, 32, 32, 31, 30, 29, 29, 29, 28, 26, - 26, 26, 26, 25, 24, 24, 24, 24, 23, 23, 22, 22, 22, 22, 34, 34, 33, 33, - 33, 33, 33, 33, 32, 32, 32, 31, 30, 29, 29, 29, 28, 26, 26, 26, 26, 25, - 24, 24, 24, 24, 23, 23, 22, 22, 22, 22, 33, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 31, 30, 28, 28, 28, 28, 27, 26, 25, 25, 25, 24, 24, 24, 24, 24, - 23, 22, 22, 22, 22, 22, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 30, 28, - 28, 27, 27, 27, 26, 25, 24, 24, 24, 24, 23, 23, 23, 23, 23, 22, 22, 22, - 22, 22, 31, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 28, 27, 26, 26, 26, - 26, 24, 24, 24, 24, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 31, 30, - 30, 30, 30, 30, 29, 29, 29, 29, 29, 28, 27, 26, 26, 26, 26, 24, 24, 24, - 24, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 31, 30, 30, 30, 30, 30, - 29, 29, 29, 29, 29, 28, 27, 26, 26, 26, 26, 24, 24, 24, 24, 23, 23, 23, - 23, 23, 22, 22, 22, 22, 22, 22, 30, 29, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 27, 26, 26, 26, 26, 24, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 28, 28, 28, 28, 28, 28, 27, 27, 26, 26, 26, 26, 25, 24, - 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 28, 28, 27, 27, 27, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 28, 28, 27, 27, - 27, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 28, 28, 27, 27, 27, 27, 26, 26, - 26, 26, 26, 25, 24, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 21, 21, 21, 22, 27, 26, 26, 26, 26, 26, 26, 25, 25, 25, 25, 24, - 24, 23, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 26, 25, 25, 25, 25, 25, 25, 24, 24, 24, 24, 24, 23, 23, 23, 23, - 23, 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 21, 25, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 23, 23, 23, 23, 22, 22, 22, 22, - 22, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 25, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 23, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, - 21, 21, 21, 20, 20, 20, 20, 20, 25, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 23, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 20, - 20, 20, 20, 20, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 22, - 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, - 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, - 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 21, 21, 21, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, - 20, 20, 19, 19, 19, 19, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 19, 19, - 19, 19, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 19, - /* Size 4x8 */ - 33, 33, 28, 21, 33, 33, 27, 22, 33, 32, 26, 22, 30, 28, 24, 22, 28, 26, - 22, 22, 26, 25, 22, 21, 24, 24, 22, 20, 21, 22, 21, 19, - /* Size 8x4 */ - 33, 33, 33, 30, 28, 26, 24, 21, 33, 33, 32, 28, 26, 25, 24, 22, 28, 27, - 26, 24, 22, 22, 22, 21, 21, 22, 22, 22, 22, 21, 20, 19, - /* Size 8x16 */ - 32, 33, 33, 31, 28, 28, 23, 21, 33, 33, 33, 30, 27, 27, 23, 22, 33, 33, - 33, 30, 27, 27, 23, 22, 33, 33, 32, 30, 26, 26, 23, 22, 34, 32, 32, 29, - 26, 26, 23, 22, 34, 32, 32, 29, 26, 26, 23, 22, 31, 30, 29, 28, 24, 24, - 22, 22, 31, 29, 28, 27, 24, 24, 22, 22, 29, 28, 28, 26, 23, 23, 22, 22, - 28, 26, 26, 24, 22, 22, 22, 22, 28, 26, 26, 24, 22, 22, 22, 22, 25, 24, - 24, 23, 22, 22, 21, 21, 24, 24, 24, 23, 22, 22, 21, 20, 23, 23, 23, 23, - 22, 22, 20, 20, 21, 22, 22, 22, 21, 21, 20, 19, 21, 22, 22, 22, 21, 21, - 20, 19, - /* Size 16x8 */ - 32, 33, 33, 33, 34, 34, 31, 31, 29, 28, 28, 25, 24, 23, 21, 21, 33, 33, - 33, 33, 32, 32, 30, 29, 28, 26, 26, 24, 24, 23, 22, 22, 33, 33, 33, 32, - 32, 32, 29, 28, 28, 26, 26, 24, 24, 23, 22, 22, 31, 30, 30, 30, 29, 29, - 28, 27, 26, 24, 24, 23, 23, 23, 22, 22, 28, 27, 27, 26, 26, 26, 24, 24, - 23, 22, 22, 22, 22, 22, 21, 21, 28, 27, 27, 26, 26, 26, 24, 24, 23, 22, - 22, 22, 22, 22, 21, 21, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 21, - 21, 20, 20, 20, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, - 19, 19, - /* Size 16x32 */ - 32, 33, 33, 33, 33, 33, 31, 29, 28, 28, 28, 26, 23, 21, 21, 21, 33, 33, - 33, 33, 33, 33, 31, 28, 28, 28, 28, 25, 23, 21, 21, 21, 33, 33, 33, 33, - 33, 33, 30, 28, 27, 27, 27, 25, 23, 22, 22, 22, 33, 33, 33, 33, 33, 33, - 30, 28, 27, 27, 27, 25, 23, 22, 22, 22, 33, 33, 33, 33, 33, 33, 30, 28, - 27, 27, 27, 25, 23, 22, 22, 22, 33, 33, 33, 33, 33, 33, 30, 28, 27, 27, - 27, 25, 23, 22, 22, 22, 33, 33, 33, 32, 32, 32, 30, 28, 26, 26, 26, 25, - 23, 22, 22, 22, 34, 33, 33, 32, 32, 32, 30, 27, 26, 26, 26, 24, 23, 22, - 22, 22, 34, 33, 32, 32, 32, 32, 29, 27, 26, 26, 26, 24, 23, 22, 22, 22, - 34, 33, 32, 32, 32, 32, 29, 27, 26, 26, 26, 24, 23, 22, 22, 22, 34, 33, - 32, 32, 32, 32, 29, 27, 26, 26, 26, 24, 23, 22, 22, 22, 33, 32, 31, 31, - 31, 31, 28, 26, 25, 25, 25, 24, 23, 22, 22, 22, 31, 30, 30, 29, 29, 29, - 28, 26, 24, 24, 24, 23, 22, 22, 22, 22, 31, 30, 29, 28, 28, 28, 27, 25, - 24, 24, 24, 23, 22, 22, 22, 22, 31, 30, 29, 28, 28, 28, 27, 25, 24, 24, - 24, 23, 22, 22, 22, 22, 31, 30, 29, 28, 28, 28, 27, 25, 24, 24, 24, 23, - 22, 22, 22, 22, 29, 28, 28, 28, 28, 28, 26, 24, 23, 23, 23, 23, 22, 22, - 22, 22, 28, 28, 27, 26, 26, 26, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, - 28, 27, 26, 26, 26, 26, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 28, 27, - 26, 26, 26, 26, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 28, 27, 26, 26, - 26, 26, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 26, 26, 26, 25, 25, 25, - 24, 22, 22, 22, 22, 21, 21, 21, 21, 21, 25, 25, 24, 24, 24, 24, 23, 22, - 22, 22, 22, 21, 21, 21, 21, 21, 24, 24, 24, 24, 24, 24, 23, 22, 22, 22, - 22, 21, 21, 20, 20, 20, 24, 24, 24, 24, 24, 24, 23, 22, 22, 22, 22, 21, - 21, 20, 20, 20, 24, 24, 24, 24, 24, 24, 23, 22, 22, 22, 22, 21, 21, 20, - 20, 20, 23, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 21, 20, 20, 20, 20, - 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, 21, 21, - 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20, 19, 19, 19, 21, 21, 22, 22, - 22, 22, 22, 21, 21, 21, 21, 20, 20, 19, 19, 19, 21, 21, 22, 22, 22, 22, - 22, 21, 21, 21, 21, 20, 20, 19, 19, 19, 21, 21, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 21, 20, 19, 19, 19, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 33, 31, 31, 31, 31, 29, 28, - 28, 28, 28, 26, 25, 24, 24, 24, 23, 22, 21, 21, 21, 21, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 32, 30, 30, 30, 30, 28, 28, 27, 27, 27, 26, - 25, 24, 24, 24, 23, 22, 21, 21, 21, 21, 33, 33, 33, 33, 33, 33, 33, 33, - 32, 32, 32, 31, 30, 29, 29, 29, 28, 27, 26, 26, 26, 26, 24, 24, 24, 24, - 23, 22, 22, 22, 22, 22, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 31, - 29, 28, 28, 28, 28, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 22, 22, 22, - 22, 22, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 31, 29, 28, 28, 28, - 28, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 22, 22, 22, 22, 22, 33, 33, - 33, 33, 33, 33, 32, 32, 32, 32, 32, 31, 29, 28, 28, 28, 28, 26, 26, 26, - 26, 25, 24, 24, 24, 24, 23, 22, 22, 22, 22, 22, 31, 31, 30, 30, 30, 30, - 30, 30, 29, 29, 29, 28, 28, 27, 27, 27, 26, 24, 24, 24, 24, 24, 23, 23, - 23, 23, 23, 22, 22, 22, 22, 22, 29, 28, 28, 28, 28, 28, 28, 27, 27, 27, - 27, 26, 26, 25, 25, 25, 24, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, - 21, 21, 21, 22, 28, 28, 27, 27, 27, 27, 26, 26, 26, 26, 26, 25, 24, 24, - 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 22, - 28, 28, 27, 27, 27, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 22, 28, 28, 27, 27, - 27, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 21, 21, 21, 21, 22, 26, 25, 25, 25, 25, 25, 25, 24, - 24, 24, 24, 24, 23, 23, 23, 23, 23, 22, 22, 22, 22, 21, 21, 21, 21, 21, - 21, 21, 20, 20, 20, 21, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 21, 20, 20, 20, 20, - 20, 20, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 19, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 20, - 20, 20, 20, 20, 19, 19, 19, 19, - /* Size 4x16 */ - 33, 33, 28, 21, 33, 33, 27, 22, 33, 33, 27, 22, 33, 32, 26, 22, 33, 32, - 26, 22, 33, 32, 26, 22, 30, 29, 24, 22, 30, 28, 24, 22, 28, 28, 23, 22, - 27, 26, 22, 22, 27, 26, 22, 22, 25, 24, 22, 21, 24, 24, 22, 20, 23, 23, - 22, 20, 21, 22, 21, 19, 21, 22, 21, 19, - /* Size 16x4 */ - 33, 33, 33, 33, 33, 33, 30, 30, 28, 27, 27, 25, 24, 23, 21, 21, 33, 33, - 33, 32, 32, 32, 29, 28, 28, 26, 26, 24, 24, 23, 22, 22, 28, 27, 27, 26, - 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 21, 21, 21, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 21, 20, 20, 19, 19, - /* Size 8x32 */ - 32, 33, 33, 31, 28, 28, 23, 21, 33, 33, 33, 31, 28, 28, 23, 21, 33, 33, - 33, 30, 27, 27, 23, 22, 33, 33, 33, 30, 27, 27, 23, 22, 33, 33, 33, 30, - 27, 27, 23, 22, 33, 33, 33, 30, 27, 27, 23, 22, 33, 33, 32, 30, 26, 26, - 23, 22, 34, 33, 32, 30, 26, 26, 23, 22, 34, 32, 32, 29, 26, 26, 23, 22, - 34, 32, 32, 29, 26, 26, 23, 22, 34, 32, 32, 29, 26, 26, 23, 22, 33, 31, - 31, 28, 25, 25, 23, 22, 31, 30, 29, 28, 24, 24, 22, 22, 31, 29, 28, 27, - 24, 24, 22, 22, 31, 29, 28, 27, 24, 24, 22, 22, 31, 29, 28, 27, 24, 24, - 22, 22, 29, 28, 28, 26, 23, 23, 22, 22, 28, 27, 26, 24, 22, 22, 22, 22, - 28, 26, 26, 24, 22, 22, 22, 22, 28, 26, 26, 24, 22, 22, 22, 22, 28, 26, - 26, 24, 22, 22, 22, 22, 26, 26, 25, 24, 22, 22, 21, 21, 25, 24, 24, 23, - 22, 22, 21, 21, 24, 24, 24, 23, 22, 22, 21, 20, 24, 24, 24, 23, 22, 22, - 21, 20, 24, 24, 24, 23, 22, 22, 21, 20, 23, 23, 23, 23, 22, 22, 20, 20, - 22, 22, 22, 22, 21, 21, 20, 20, 21, 22, 22, 22, 21, 21, 20, 19, 21, 22, - 22, 22, 21, 21, 20, 19, 21, 22, 22, 22, 21, 21, 20, 19, 21, 22, 22, 22, - 22, 22, 20, 19, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 33, 31, 31, 31, 31, 29, 28, - 28, 28, 28, 26, 25, 24, 24, 24, 23, 22, 21, 21, 21, 21, 33, 33, 33, 33, - 33, 33, 33, 33, 32, 32, 32, 31, 30, 29, 29, 29, 28, 27, 26, 26, 26, 26, - 24, 24, 24, 24, 23, 22, 22, 22, 22, 22, 33, 33, 33, 33, 33, 33, 32, 32, - 32, 32, 32, 31, 29, 28, 28, 28, 28, 26, 26, 26, 26, 25, 24, 24, 24, 24, - 23, 22, 22, 22, 22, 22, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, 29, 28, - 28, 27, 27, 27, 26, 24, 24, 24, 24, 24, 23, 23, 23, 23, 23, 22, 22, 22, - 22, 22, 28, 28, 27, 27, 27, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, - 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 22, 28, 28, - 27, 27, 27, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 22, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, - 21, 21, 20, 20, 20, 20, 20, 20, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 20, - 19, 19, 19, 19 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 33, 32, 32, 32, 32, 32, 32, 31, 32, 32, 31, 30, 32, 31, 30, 29, - /* Size 8x8 */ - 33, 33, 33, 33, 32, 32, 32, 31, 33, 32, 32, 32, 32, 32, 32, 31, 33, 32, - 32, 32, 32, 32, 32, 31, 33, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, - 31, 31, 30, 29, 32, 32, 32, 32, 31, 30, 30, 29, 32, 32, 32, 32, 30, 30, - 29, 28, 31, 31, 31, 31, 29, 29, 28, 27, - /* Size 16x16 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 30, 33, 33, - 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 30, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, - 31, 30, 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 29, - 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 29, 33, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 29, 32, 32, 32, 32, - 32, 32, 32, 31, 31, 31, 31, 30, 29, 29, 29, 28, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 30, 30, 29, 29, 29, 28, 28, 32, 32, 32, 32, 32, 32, 32, 31, - 31, 30, 30, 29, 29, 29, 28, 28, 32, 31, 31, 31, 31, 31, 31, 31, 30, 30, - 30, 29, 28, 28, 28, 27, 30, 30, 30, 30, 30, 31, 31, 30, 29, 29, 29, 28, - 28, 28, 27, 26, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 30, 30, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 30, 30, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 33, 33, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 33, 33, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 30, 30, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 30, 30, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, - 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 33, 33, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 31, 31, 31, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, - 31, 31, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 30, 30, 30, 30, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, - 30, 30, 29, 29, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, 29, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, - 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, 29, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, - 31, 30, 30, 30, 30, 30, 30, 29, 29, 29, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, - 30, 30, 30, 29, 29, 29, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 29, 29, - 29, 29, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 31, 31, 31, 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, - 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 28, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 29, 29, - 29, 29, 29, 29, 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, - 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 30, - 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, 32, 32, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, - 29, 29, 28, 28, 28, 28, 28, 28, 27, 27, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 28, - 28, 28, 28, 27, 27, 27, 30, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, - 31, 31, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 27, 27, - 26, 26, 30, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 30, 30, - 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 27, 27, 26, 26, - /* Size 4x8 */ - 33, 33, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, - 31, 30, 32, 32, 30, 30, 32, 31, 30, 29, 31, 31, 29, 28, - /* Size 8x4 */ - 33, 33, 32, 32, 32, 32, 32, 31, 33, 32, 32, 32, 32, 32, 31, 31, 32, 32, - 32, 32, 31, 30, 30, 29, 32, 32, 32, 31, 30, 30, 29, 28, - /* Size 8x16 */ - 32, 33, 33, 33, 33, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 31, 33, 32, - 32, 32, 32, 32, 32, 31, 33, 32, 32, 32, 32, 32, 32, 31, 33, 32, 32, 32, - 32, 32, 32, 31, 33, 32, 32, 32, 32, 31, 31, 31, 33, 32, 32, 32, 32, 31, - 31, 31, 33, 32, 32, 32, 32, 31, 31, 31, 33, 32, 32, 32, 31, 30, 30, 30, - 32, 32, 32, 32, 31, 30, 30, 30, 32, 32, 32, 32, 31, 30, 30, 30, 32, 32, - 32, 32, 31, 29, 29, 29, 32, 32, 31, 31, 30, 29, 29, 28, 32, 32, 31, 31, - 30, 29, 29, 28, 32, 31, 31, 31, 30, 28, 28, 28, 30, 30, 30, 30, 29, 28, - 28, 27, - /* Size 16x8 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 30, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 33, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 31, 30, 30, 30, 29, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, - 30, 29, 29, 29, 28, 28, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 29, - 29, 29, 28, 28, 32, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 29, 28, 28, - 28, 27, - /* Size 16x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 30, 33, 33, - 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 31, 30, 33, 33, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 31, - 31, 31, 31, 31, 31, 30, 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 31, 31, 30, 30, 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 30, - 30, 29, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 29, - 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 29, 32, 32, - 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 29, 32, 32, 32, 32, - 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 29, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 30, 30, 30, 30, 30, 29, 29, 32, 32, 32, 32, 32, 32, 32, 31, - 31, 30, 29, 29, 29, 29, 29, 28, 32, 32, 32, 32, 31, 31, 31, 31, 31, 30, - 29, 29, 29, 29, 28, 28, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 29, 29, - 29, 29, 28, 28, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 29, 29, 29, 29, - 28, 28, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 29, 29, 29, 29, 28, 28, - 32, 32, 32, 31, 31, 31, 31, 31, 30, 30, 29, 29, 29, 29, 28, 28, 32, 31, - 31, 31, 31, 31, 31, 31, 30, 29, 28, 28, 28, 28, 28, 27, 31, 31, 31, 31, - 31, 31, 31, 30, 30, 29, 28, 28, 28, 28, 28, 27, 30, 30, 30, 30, 30, 30, - 30, 30, 29, 28, 28, 28, 28, 28, 27, 26, 30, 30, 30, 30, 30, 30, 30, 30, - 29, 28, 28, 28, 28, 28, 27, 26, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 30, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, - 30, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 30, 30, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 30, 30, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, - 31, 31, 31, 31, 31, 31, 30, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 30, 30, 30, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 29, 29, 28, 28, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, - 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, - 29, 29, 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, - 28, 28, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, - 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, 32, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, - 30, 29, 29, 28, 28, 28, 28, 28, 28, 28, 27, 27, 30, 30, 30, 30, 30, 30, - 30, 30, 30, 31, 31, 31, 31, 31, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, - 28, 28, 28, 28, 27, 27, 26, 26, - /* Size 4x16 */ - 33, 33, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, - 32, 32, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 31, 31, 32, 32, 31, 30, - 32, 32, 31, 30, 32, 32, 31, 30, 32, 32, 30, 29, 32, 31, 30, 29, 32, 31, - 30, 29, 31, 31, 29, 28, 30, 30, 28, 28, - /* Size 16x4 */ - 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 32, 32, 32, 32, - 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 29, 28, 32, 32, 32, 32, 32, 31, - 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, - /* Size 8x32 */ - 32, 33, 33, 33, 33, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 31, 33, 33, - 32, 32, 32, 32, 32, 31, 33, 32, 32, 32, 32, 32, 32, 31, 33, 32, 32, 32, - 32, 32, 32, 31, 33, 32, 32, 32, 32, 32, 32, 31, 33, 32, 32, 32, 32, 32, - 32, 31, 33, 32, 32, 32, 32, 32, 32, 31, 33, 32, 32, 32, 32, 32, 32, 31, - 33, 32, 32, 32, 32, 32, 32, 31, 33, 32, 32, 32, 32, 31, 31, 31, 33, 32, - 32, 32, 32, 31, 31, 31, 33, 32, 32, 32, 32, 31, 31, 31, 33, 32, 32, 32, - 32, 31, 31, 31, 33, 32, 32, 32, 32, 31, 31, 31, 33, 32, 32, 32, 31, 31, - 31, 30, 33, 32, 32, 32, 31, 30, 30, 30, 32, 32, 32, 32, 31, 30, 30, 30, - 32, 32, 32, 32, 31, 30, 30, 30, 32, 32, 32, 32, 31, 30, 30, 30, 32, 32, - 32, 32, 31, 30, 30, 30, 32, 32, 32, 32, 31, 30, 30, 29, 32, 32, 32, 32, - 31, 29, 29, 29, 32, 32, 31, 31, 31, 29, 29, 28, 32, 32, 31, 31, 30, 29, - 29, 28, 32, 32, 31, 31, 30, 29, 29, 28, 32, 32, 31, 31, 30, 29, 29, 28, - 32, 32, 31, 31, 30, 29, 29, 28, 32, 31, 31, 31, 30, 28, 28, 28, 31, 31, - 31, 31, 30, 28, 28, 28, 30, 30, 30, 30, 29, 28, 28, 27, 30, 30, 30, 30, - 29, 28, 28, 27, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 33, 33, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, - 31, 31, 31, 31, 30, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, - 30, 30, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, - 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, - 29, 29, 29, 29, 28, 28, 28, 28, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, 28, 28, 28, 28, 28, - 28, 28, 27, 27 }, - { /* Chroma */ - /* Size 4x4 */ - 33, 33, 30, 27, 33, 32, 29, 26, 30, 29, 26, 24, 27, 26, 24, 22, - /* Size 8x8 */ - 33, 33, 33, 34, 30, 29, 28, 26, 33, 33, 33, 33, 30, 29, 27, 25, 33, 33, - 33, 33, 29, 28, 26, 25, 34, 33, 33, 32, 29, 28, 26, 24, 30, 30, 29, 29, - 26, 26, 24, 23, 29, 29, 28, 28, 26, 25, 23, 23, 28, 27, 26, 26, 24, 23, - 22, 22, 26, 25, 25, 24, 23, 23, 22, 21, - /* Size 16x16 */ - 32, 33, 33, 33, 33, 34, 34, 33, 31, 31, 31, 29, 28, 28, 27, 25, 33, 33, - 33, 33, 33, 33, 33, 33, 31, 30, 30, 28, 28, 28, 26, 24, 33, 33, 33, 33, - 33, 33, 33, 32, 30, 30, 30, 28, 27, 27, 26, 24, 33, 33, 33, 33, 33, 33, - 33, 32, 30, 30, 30, 28, 27, 27, 26, 24, 33, 33, 33, 33, 33, 33, 33, 32, - 30, 29, 29, 28, 26, 26, 26, 24, 34, 33, 33, 33, 33, 32, 32, 32, 30, 29, - 29, 27, 26, 26, 25, 24, 34, 33, 33, 33, 33, 32, 32, 32, 30, 29, 29, 27, - 26, 26, 25, 24, 33, 33, 32, 32, 32, 32, 32, 31, 29, 28, 28, 27, 26, 26, - 25, 24, 31, 31, 30, 30, 30, 30, 30, 29, 28, 27, 27, 25, 24, 24, 24, 23, - 31, 30, 30, 30, 29, 29, 29, 28, 27, 26, 26, 25, 24, 24, 23, 23, 31, 30, - 30, 30, 29, 29, 29, 28, 27, 26, 26, 25, 24, 24, 23, 23, 29, 28, 28, 28, - 28, 27, 27, 27, 25, 25, 25, 23, 22, 22, 22, 22, 28, 28, 27, 27, 26, 26, - 26, 26, 24, 24, 24, 22, 22, 22, 22, 22, 28, 28, 27, 27, 26, 26, 26, 26, - 24, 24, 24, 22, 22, 22, 22, 22, 27, 26, 26, 26, 26, 25, 25, 25, 24, 23, - 23, 22, 22, 22, 22, 21, 25, 24, 24, 24, 24, 24, 24, 24, 23, 23, 23, 22, - 22, 22, 21, 21, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 33, 32, 31, 31, - 31, 31, 31, 30, 29, 28, 28, 28, 28, 28, 27, 26, 25, 25, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 33, 32, 31, 30, 30, 30, 30, 29, - 28, 28, 28, 28, 28, 28, 26, 26, 24, 24, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 30, 30, 30, 29, 28, 28, 28, 28, - 28, 27, 26, 26, 24, 24, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 32, 32, 30, 30, 30, 30, 30, 29, 28, 27, 27, 27, 27, 27, 26, 25, - 24, 24, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, - 30, 30, 30, 30, 30, 29, 28, 27, 27, 27, 27, 26, 26, 25, 24, 24, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 30, 30, 30, - 30, 29, 28, 27, 27, 27, 27, 26, 26, 25, 24, 24, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 30, 30, 30, 30, 29, 28, 27, - 27, 27, 27, 26, 26, 25, 24, 24, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 32, 31, 30, 30, 30, 30, 30, 28, 28, 27, 27, 27, 27, 26, - 26, 25, 24, 24, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 32, 31, 30, 29, 29, 29, 29, 28, 28, 27, 26, 26, 26, 26, 26, 25, 24, 24, - 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 29, - 29, 29, 29, 28, 28, 26, 26, 26, 26, 26, 26, 25, 24, 24, 34, 34, 33, 33, - 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 31, 30, 29, 29, 29, 29, 28, - 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 34, 34, 33, 33, 33, 33, 33, 33, - 33, 33, 32, 32, 32, 32, 32, 31, 30, 29, 29, 29, 29, 28, 27, 26, 26, 26, - 26, 26, 25, 24, 24, 24, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, - 32, 32, 32, 31, 30, 29, 29, 29, 29, 28, 27, 26, 26, 26, 26, 26, 25, 24, - 24, 24, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 31, - 30, 29, 29, 29, 29, 28, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 33, 33, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 29, 28, 28, 28, - 28, 28, 27, 26, 26, 26, 26, 25, 25, 24, 24, 24, 32, 32, 32, 32, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 30, 29, 28, 28, 28, 28, 28, 27, 26, 25, - 25, 25, 25, 24, 24, 24, 24, 24, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, - 30, 30, 30, 30, 29, 28, 28, 27, 27, 27, 27, 26, 25, 24, 24, 24, 24, 24, - 24, 23, 23, 23, 31, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, - 28, 28, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 24, 23, 23, 23, 23, - 31, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 27, 26, - 26, 26, 26, 26, 25, 24, 24, 24, 24, 24, 23, 23, 23, 23, 31, 30, 30, 30, - 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 27, 26, 26, 26, 26, 26, - 25, 24, 24, 24, 24, 24, 23, 23, 23, 23, 31, 30, 30, 30, 30, 30, 30, 30, - 29, 29, 29, 29, 29, 29, 28, 28, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, - 24, 24, 23, 23, 23, 23, 30, 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, 28, - 28, 28, 28, 27, 26, 26, 26, 26, 26, 25, 24, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 29, 28, 28, 28, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 27, 26, - 25, 25, 25, 25, 25, 24, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 28, 28, - 28, 27, 27, 27, 27, 27, 27, 26, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, - 24, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 28, 28, 28, 27, 27, 27, - 27, 27, 26, 26, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 24, 23, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 28, 28, 28, 27, 27, 27, 27, 27, 26, 26, - 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 28, 28, 28, 27, 27, 27, 27, 27, 26, 26, 26, 26, 26, 26, - 26, 25, 24, 24, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 28, 28, 27, 27, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 25, 24, 24, 24, - 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 27, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 25, 25, 25, 25, 25, 24, 24, 23, 23, 23, 23, 23, - 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 26, 26, 26, 25, 25, 25, 25, 25, - 25, 25, 24, 24, 24, 24, 24, 24, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, - 22, 22, 21, 21, 21, 21, 25, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 21, 21, - 21, 21, 25, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, - /* Size 4x8 */ - 33, 33, 29, 28, 33, 33, 28, 27, 33, 32, 28, 26, 33, 32, 28, 26, 30, 28, - 26, 24, 29, 28, 24, 23, 27, 26, 23, 22, 25, 24, 23, 22, - /* Size 8x4 */ - 33, 33, 33, 33, 30, 29, 27, 25, 33, 33, 32, 32, 28, 28, 26, 24, 29, 28, - 28, 28, 26, 24, 23, 23, 28, 27, 26, 26, 24, 23, 22, 22, - /* Size 8x16 */ - 32, 33, 33, 33, 31, 28, 28, 27, 33, 33, 33, 33, 31, 27, 27, 26, 33, 33, - 33, 33, 30, 27, 27, 26, 33, 33, 33, 33, 30, 27, 27, 26, 33, 33, 32, 32, - 30, 26, 26, 26, 34, 33, 32, 32, 29, 26, 26, 25, 34, 33, 32, 32, 29, 26, - 26, 25, 33, 32, 31, 31, 29, 26, 26, 25, 31, 30, 29, 29, 28, 24, 24, 24, - 31, 29, 28, 28, 27, 24, 24, 23, 31, 29, 28, 28, 27, 24, 24, 23, 29, 28, - 27, 27, 25, 23, 23, 22, 28, 26, 26, 26, 24, 22, 22, 22, 28, 26, 26, 26, - 24, 22, 22, 22, 26, 26, 25, 25, 24, 22, 22, 22, 24, 24, 24, 24, 23, 22, - 22, 21, - /* Size 16x8 */ - 32, 33, 33, 33, 33, 34, 34, 33, 31, 31, 31, 29, 28, 28, 26, 24, 33, 33, - 33, 33, 33, 33, 33, 32, 30, 29, 29, 28, 26, 26, 26, 24, 33, 33, 33, 33, - 32, 32, 32, 31, 29, 28, 28, 27, 26, 26, 25, 24, 33, 33, 33, 33, 32, 32, - 32, 31, 29, 28, 28, 27, 26, 26, 25, 24, 31, 31, 30, 30, 30, 29, 29, 29, - 28, 27, 27, 25, 24, 24, 24, 23, 28, 27, 27, 27, 26, 26, 26, 26, 24, 24, - 24, 23, 22, 22, 22, 22, 28, 27, 27, 27, 26, 26, 26, 26, 24, 24, 24, 23, - 22, 22, 22, 22, 27, 26, 26, 26, 26, 25, 25, 25, 24, 23, 23, 22, 22, 22, - 22, 21, - /* Size 16x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 31, 29, 28, 28, 28, 28, 27, 24, 33, 33, - 33, 33, 33, 33, 33, 33, 31, 29, 28, 28, 28, 28, 26, 24, 33, 33, 33, 33, - 33, 33, 33, 32, 31, 29, 27, 27, 27, 27, 26, 24, 33, 33, 33, 33, 33, 33, - 33, 32, 30, 28, 27, 27, 27, 27, 26, 24, 33, 33, 33, 33, 33, 33, 33, 32, - 30, 28, 27, 27, 27, 27, 26, 24, 33, 33, 33, 33, 33, 33, 33, 32, 30, 28, - 27, 27, 27, 27, 26, 24, 33, 33, 33, 33, 33, 33, 33, 32, 30, 28, 27, 27, - 27, 27, 26, 24, 33, 33, 33, 33, 33, 33, 33, 32, 30, 28, 27, 27, 27, 27, - 26, 24, 33, 33, 33, 33, 32, 32, 32, 32, 30, 28, 26, 26, 26, 26, 26, 24, - 34, 33, 33, 32, 32, 32, 32, 32, 30, 28, 26, 26, 26, 26, 26, 24, 34, 33, - 33, 32, 32, 32, 32, 31, 29, 28, 26, 26, 26, 26, 25, 24, 34, 33, 33, 32, - 32, 32, 32, 31, 29, 28, 26, 26, 26, 26, 25, 24, 34, 33, 33, 32, 32, 32, - 32, 31, 29, 28, 26, 26, 26, 26, 25, 24, 34, 33, 33, 32, 32, 32, 32, 31, - 29, 28, 26, 26, 26, 26, 25, 24, 33, 33, 32, 32, 31, 31, 31, 31, 29, 27, - 26, 26, 26, 26, 25, 24, 32, 32, 31, 31, 30, 30, 30, 30, 28, 26, 25, 25, - 25, 25, 24, 23, 31, 31, 30, 29, 29, 29, 29, 29, 28, 26, 24, 24, 24, 24, - 24, 23, 31, 30, 29, 29, 28, 28, 28, 28, 27, 26, 24, 24, 24, 24, 23, 23, - 31, 30, 29, 29, 28, 28, 28, 28, 27, 26, 24, 24, 24, 24, 23, 23, 31, 30, - 29, 29, 28, 28, 28, 28, 27, 26, 24, 24, 24, 24, 23, 23, 31, 30, 29, 29, - 28, 28, 28, 28, 27, 26, 24, 24, 24, 24, 23, 23, 30, 29, 28, 28, 28, 28, - 28, 28, 26, 24, 23, 23, 23, 23, 23, 23, 29, 28, 28, 27, 27, 27, 27, 26, - 25, 24, 23, 23, 23, 23, 22, 22, 28, 28, 27, 26, 26, 26, 26, 26, 24, 23, - 22, 22, 22, 22, 22, 22, 28, 27, 26, 26, 26, 26, 26, 25, 24, 23, 22, 22, - 22, 22, 22, 22, 28, 27, 26, 26, 26, 26, 26, 25, 24, 23, 22, 22, 22, 22, - 22, 22, 28, 27, 26, 26, 26, 26, 26, 25, 24, 23, 22, 22, 22, 22, 22, 22, - 28, 27, 26, 26, 26, 26, 26, 25, 24, 23, 22, 22, 22, 22, 22, 22, 26, 26, - 26, 25, 25, 25, 25, 24, 24, 23, 22, 22, 22, 22, 22, 21, 26, 25, 25, 24, - 24, 24, 24, 24, 23, 23, 22, 22, 22, 22, 22, 21, 24, 24, 24, 24, 24, 24, - 24, 24, 23, 22, 22, 22, 22, 22, 21, 21, 24, 24, 24, 24, 24, 24, 24, 24, - 23, 22, 22, 22, 22, 22, 21, 21, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 33, 32, 31, 31, - 31, 31, 31, 30, 29, 28, 28, 28, 28, 28, 26, 26, 24, 24, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 30, 30, 30, 29, - 28, 28, 27, 27, 27, 27, 26, 25, 24, 24, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 32, 31, 30, 29, 29, 29, 29, 28, 28, 27, 26, 26, - 26, 26, 26, 25, 24, 24, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, - 32, 32, 32, 31, 29, 29, 29, 29, 29, 28, 27, 26, 26, 26, 26, 26, 25, 24, - 24, 24, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 31, 30, - 29, 28, 28, 28, 28, 28, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 33, 33, - 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 31, 30, 29, 28, 28, 28, - 28, 28, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 33, 33, 33, 33, 33, 33, - 33, 33, 32, 32, 32, 32, 32, 32, 31, 30, 29, 28, 28, 28, 28, 28, 27, 26, - 26, 26, 26, 26, 25, 24, 24, 24, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 31, 31, 30, 29, 28, 28, 28, 28, 28, 26, 26, 25, 25, 25, 25, - 24, 24, 24, 24, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, - 29, 28, 28, 27, 27, 27, 27, 26, 25, 24, 24, 24, 24, 24, 24, 23, 23, 23, - 29, 29, 29, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 27, 26, 26, 26, - 26, 26, 26, 24, 24, 23, 23, 23, 23, 23, 23, 23, 22, 22, 28, 28, 27, 27, - 27, 27, 27, 27, 26, 26, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 24, 23, - 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 28, 28, 27, 27, 27, 27, 27, 27, - 26, 26, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 24, 23, 23, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 28, 28, 27, 27, 27, 27, 27, 27, 26, 26, 26, 26, - 26, 26, 26, 25, 24, 24, 24, 24, 24, 23, 23, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 28, 28, 27, 27, 27, 27, 27, 27, 26, 26, 26, 26, 26, 26, 26, 25, - 24, 24, 24, 24, 24, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 27, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 25, 25, 25, 25, 25, 24, 24, 23, 23, 23, - 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 23, 23, 23, 23, 23, 23, 23, 22, 22, - 22, 22, 22, 22, 21, 21, 21, 21, - /* Size 4x16 */ - 33, 33, 29, 28, 33, 33, 29, 27, 33, 33, 28, 27, 33, 33, 28, 27, 33, 32, - 28, 26, 33, 32, 28, 26, 33, 32, 28, 26, 33, 31, 27, 26, 31, 29, 26, 24, - 30, 28, 26, 24, 30, 28, 26, 24, 28, 27, 24, 23, 27, 26, 23, 22, 27, 26, - 23, 22, 26, 25, 23, 22, 24, 24, 22, 22, - /* Size 16x4 */ - 33, 33, 33, 33, 33, 33, 33, 33, 31, 30, 30, 28, 27, 27, 26, 24, 33, 33, - 33, 33, 32, 32, 32, 31, 29, 28, 28, 27, 26, 26, 25, 24, 29, 29, 28, 28, - 28, 28, 28, 27, 26, 26, 26, 24, 23, 23, 23, 22, 28, 27, 27, 27, 26, 26, - 26, 26, 24, 24, 24, 23, 22, 22, 22, 22, - /* Size 8x32 */ - 32, 33, 33, 33, 31, 28, 28, 27, 33, 33, 33, 33, 31, 28, 28, 26, 33, 33, - 33, 33, 31, 27, 27, 26, 33, 33, 33, 33, 30, 27, 27, 26, 33, 33, 33, 33, - 30, 27, 27, 26, 33, 33, 33, 33, 30, 27, 27, 26, 33, 33, 33, 33, 30, 27, - 27, 26, 33, 33, 33, 33, 30, 27, 27, 26, 33, 33, 32, 32, 30, 26, 26, 26, - 34, 33, 32, 32, 30, 26, 26, 26, 34, 33, 32, 32, 29, 26, 26, 25, 34, 33, - 32, 32, 29, 26, 26, 25, 34, 33, 32, 32, 29, 26, 26, 25, 34, 33, 32, 32, - 29, 26, 26, 25, 33, 32, 31, 31, 29, 26, 26, 25, 32, 31, 30, 30, 28, 25, - 25, 24, 31, 30, 29, 29, 28, 24, 24, 24, 31, 29, 28, 28, 27, 24, 24, 23, - 31, 29, 28, 28, 27, 24, 24, 23, 31, 29, 28, 28, 27, 24, 24, 23, 31, 29, - 28, 28, 27, 24, 24, 23, 30, 28, 28, 28, 26, 23, 23, 23, 29, 28, 27, 27, - 25, 23, 23, 22, 28, 27, 26, 26, 24, 22, 22, 22, 28, 26, 26, 26, 24, 22, - 22, 22, 28, 26, 26, 26, 24, 22, 22, 22, 28, 26, 26, 26, 24, 22, 22, 22, - 28, 26, 26, 26, 24, 22, 22, 22, 26, 26, 25, 25, 24, 22, 22, 22, 26, 25, - 24, 24, 23, 22, 22, 22, 24, 24, 24, 24, 23, 22, 22, 21, 24, 24, 24, 24, - 23, 22, 22, 21, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 33, 32, 31, 31, - 31, 31, 31, 30, 29, 28, 28, 28, 28, 28, 26, 26, 24, 24, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 29, 29, 29, 29, 28, - 28, 27, 26, 26, 26, 26, 26, 25, 24, 24, 33, 33, 33, 33, 33, 33, 33, 33, - 32, 32, 32, 32, 32, 32, 31, 30, 29, 28, 28, 28, 28, 28, 27, 26, 26, 26, - 26, 26, 25, 24, 24, 24, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, - 32, 32, 31, 30, 29, 28, 28, 28, 28, 28, 27, 26, 26, 26, 26, 26, 25, 24, - 24, 24, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 28, - 28, 27, 27, 27, 27, 26, 25, 24, 24, 24, 24, 24, 24, 23, 23, 23, 28, 28, - 27, 27, 27, 27, 27, 27, 26, 26, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, - 24, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 28, 28, 27, 27, 27, 27, - 27, 27, 26, 26, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 24, 23, 23, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 27, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 25, 25, 25, 25, 25, 24, 24, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, - 22, 22, 21, 21 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 33, 33, 33, 32, 33, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 31, - /* Size 8x8 */ - 33, 33, 33, 33, 33, 33, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, - 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, - 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, - /* Size 16x16 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 33, 33, 33, 33, - 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, - 31, 31, 31, 31, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - /* Size 4x8 */ - 33, 33, 33, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, - 32, 32, 33, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, - /* Size 8x4 */ - 33, 33, 33, 33, 33, 33, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, - /* Size 8x16 */ - 32, 33, 33, 33, 33, 33, 33, 32, 33, 33, 33, 33, 33, 33, 32, 32, 33, 33, - 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, - 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, - 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, - 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, - 32, 32, 32, 32, 31, 31, 33, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, - 32, 32, 31, 30, 32, 32, 32, 32, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32, - 31, 30, - /* Size 16x8 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 33, 33, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, - 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, - 30, 30, - /* Size 16x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 31, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, - 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, - 31, 31, 31, 30, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, - 30, 30, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 31, 31, 31, 30, 30, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 30, 30, 30, 30, 30, 30, 30, 30, - /* Size 4x16 */ - 33, 33, 33, 32, 33, 33, 33, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, - 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, - 33, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, - 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, - /* Size 16x4 */ - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, - /* Size 8x32 */ - 32, 33, 33, 33, 33, 33, 33, 32, 33, 33, 33, 33, 33, 33, 32, 32, 33, 33, - 33, 33, 33, 33, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, - 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, - 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, - 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, - 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, - 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, - 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, - 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, - 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 31, 33, 32, 32, 32, - 32, 32, 31, 31, 33, 32, 32, 32, 32, 32, 31, 31, 33, 32, 32, 32, 32, 32, - 31, 31, 32, 32, 32, 32, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32, 31, 30, - 32, 32, 32, 32, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32, 31, 30, 32, 32, - 32, 32, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32, 31, 30, 32, 32, 32, 32, - 32, 32, 31, 30, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, - 30, 30, 30, 30 }, - { /* Chroma */ - /* Size 4x4 */ - 33, 33, 33, 30, 33, 33, 33, 29, 33, 33, 32, 29, 30, 29, 29, 26, - /* Size 8x8 */ - 33, 33, 33, 33, 34, 33, 31, 31, 33, 33, 33, 33, 33, 32, 30, 30, 33, 33, - 33, 33, 33, 32, 30, 30, 33, 33, 33, 33, 33, 32, 29, 29, 34, 33, 33, 33, - 32, 32, 29, 29, 33, 32, 32, 32, 32, 31, 28, 28, 31, 30, 30, 29, 29, 28, - 26, 26, 31, 30, 30, 29, 29, 28, 26, 26, - /* Size 16x16 */ - 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 33, 31, 31, 31, 31, 33, 33, - 33, 33, 33, 33, 33, 33, 34, 34, 34, 32, 31, 30, 30, 30, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 30, 30, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 32, 30, 30, 30, 30, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 32, 30, 30, 30, 30, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 32, 30, 30, 30, 30, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, - 30, 29, 29, 29, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 30, 29, - 29, 29, 34, 34, 33, 33, 33, 33, 33, 33, 32, 32, 32, 31, 30, 29, 29, 29, - 34, 34, 33, 33, 33, 33, 33, 33, 32, 32, 32, 31, 30, 29, 29, 29, 34, 34, - 33, 33, 33, 33, 33, 33, 32, 32, 32, 31, 30, 29, 29, 29, 33, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 31, 30, 28, 28, 28, 28, 31, 31, 31, 30, 30, 30, - 30, 30, 30, 30, 30, 28, 28, 27, 27, 27, 31, 30, 30, 30, 30, 30, 29, 29, - 29, 29, 29, 28, 27, 26, 26, 26, 31, 30, 30, 30, 30, 30, 29, 29, 29, 29, - 29, 28, 27, 26, 26, 26, 31, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 28, - 27, 26, 26, 26, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, - 34, 34, 34, 33, 33, 32, 31, 31, 31, 31, 31, 31, 31, 30, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 33, - 33, 32, 31, 31, 31, 31, 31, 31, 31, 30, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 33, 32, 32, 31, 30, - 30, 30, 30, 30, 30, 30, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 31, 30, 30, 30, 30, 30, - 30, 29, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 32, 31, 31, 30, 30, 30, 30, 30, 30, 29, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 32, 32, 31, 30, 30, 30, 30, 30, 30, 30, 29, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 31, - 30, 30, 30, 30, 30, 30, 30, 29, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 31, 30, 30, 30, 30, - 30, 30, 30, 29, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 32, 32, 31, 30, 30, 30, 30, 30, 30, 30, 29, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 32, 32, 31, 30, 30, 30, 30, 30, 30, 30, 29, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, - 32, 31, 30, 30, 30, 30, 30, 30, 30, 29, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 31, 30, 29, - 29, 29, 29, 29, 29, 29, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 31, 30, 29, 29, 29, 29, 29, - 29, 29, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 32, 31, 31, 30, 29, 29, 29, 29, 29, 29, 28, 34, 34, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 32, 31, 30, 30, 29, 29, 29, 29, 29, 29, 28, 34, 34, 34, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 30, - 30, 29, 29, 29, 29, 29, 29, 28, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 29, - 29, 29, 29, 28, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 29, 29, 29, 29, 28, - 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, - 32, 32, 32, 32, 31, 30, 30, 29, 29, 29, 29, 29, 29, 28, 34, 34, 34, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, - 31, 30, 30, 29, 29, 29, 29, 29, 29, 28, 34, 34, 34, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, - 29, 29, 29, 29, 29, 28, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 28, 28, 28, 28, 28, - 28, 28, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, - 31, 31, 31, 31, 31, 30, 30, 29, 28, 28, 28, 28, 28, 28, 28, 28, 32, 32, - 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, - 30, 30, 29, 28, 28, 28, 28, 28, 28, 28, 28, 27, 31, 31, 31, 31, 31, 30, - 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29, 28, 28, - 28, 27, 27, 27, 27, 27, 27, 26, 31, 31, 30, 30, 30, 30, 30, 30, 30, 30, - 30, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 28, 28, 28, 27, 26, 26, 26, - 26, 26, 26, 26, 31, 31, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, - 29, 29, 29, 29, 29, 29, 29, 28, 28, 28, 27, 26, 26, 26, 26, 26, 26, 26, - 31, 31, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 29, - 29, 29, 29, 28, 28, 28, 27, 26, 26, 26, 26, 26, 26, 26, 31, 31, 30, 30, - 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 28, - 28, 28, 27, 26, 26, 26, 26, 26, 26, 26, 31, 31, 30, 30, 30, 30, 30, 30, - 30, 30, 30, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 28, 28, 28, 27, 26, - 26, 26, 26, 26, 26, 26, 31, 31, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29, - 29, 29, 29, 29, 29, 29, 29, 29, 29, 28, 28, 28, 27, 26, 26, 26, 26, 26, - 26, 26, 30, 30, 30, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 27, 26, 26, 26, 26, 26, 26, 26, 26, - /* Size 4x8 */ - 33, 33, 33, 30, 33, 33, 33, 29, 33, 33, 33, 29, 33, 32, 32, 28, 33, 32, - 32, 28, 33, 31, 31, 28, 30, 28, 28, 26, 30, 28, 28, 26, - /* Size 8x4 */ - 33, 33, 33, 33, 33, 33, 30, 30, 33, 33, 33, 32, 32, 31, 28, 28, 33, 33, - 33, 32, 32, 31, 28, 28, 30, 29, 29, 28, 28, 28, 26, 26, - /* Size 8x16 */ - 32, 33, 33, 33, 33, 33, 31, 29, 33, 33, 33, 33, 33, 33, 31, 28, 33, 33, - 33, 33, 33, 33, 30, 28, 33, 33, 33, 33, 33, 33, 30, 28, 33, 33, 33, 33, - 33, 33, 30, 28, 33, 33, 33, 33, 33, 33, 30, 28, 33, 33, 33, 32, 32, 32, - 30, 28, 34, 33, 33, 32, 32, 32, 30, 27, 34, 33, 32, 32, 32, 32, 29, 27, - 34, 33, 32, 32, 32, 32, 29, 27, 34, 33, 32, 32, 32, 32, 29, 27, 33, 32, - 31, 31, 31, 31, 28, 26, 31, 30, 30, 29, 29, 29, 28, 26, 31, 30, 29, 28, - 28, 28, 27, 25, 31, 30, 29, 28, 28, 28, 27, 25, 31, 30, 29, 28, 28, 28, - 27, 25, - /* Size 16x8 */ - 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 33, 31, 31, 31, 31, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 30, 30, 30, 30, 33, 33, 33, 33, - 33, 33, 33, 33, 32, 32, 32, 31, 30, 29, 29, 29, 33, 33, 33, 33, 33, 33, - 32, 32, 32, 32, 32, 31, 29, 28, 28, 28, 33, 33, 33, 33, 33, 33, 32, 32, - 32, 32, 32, 31, 29, 28, 28, 28, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, - 32, 31, 29, 28, 28, 28, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, 29, 28, - 28, 27, 27, 27, 29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 26, 25, - 25, 25, - /* Size 16x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 29, 28, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 29, 28, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 28, 28, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 32, 31, 29, 28, 27, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 32, 30, 29, 28, 27, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 31, 30, 29, 28, 27, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, - 30, 29, 28, 27, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 30, 29, - 28, 27, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 30, 29, 28, 27, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 30, 29, 28, 27, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 30, 29, 28, 27, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 31, 30, 29, 28, 27, 33, 33, 33, 33, 33, 32, - 32, 32, 32, 32, 32, 31, 30, 28, 28, 26, 33, 33, 33, 33, 33, 32, 32, 32, - 32, 32, 32, 31, 30, 28, 28, 26, 34, 33, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 31, 30, 28, 27, 26, 34, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, - 29, 28, 27, 26, 34, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 29, 28, - 27, 26, 34, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 29, 28, 27, 26, - 34, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 29, 28, 27, 26, 34, 33, - 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 29, 28, 27, 26, 34, 33, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 31, 29, 28, 27, 26, 33, 33, 33, 32, 32, 31, - 31, 31, 31, 31, 31, 30, 29, 28, 27, 26, 33, 32, 32, 31, 31, 31, 31, 31, - 31, 31, 31, 29, 28, 28, 26, 25, 32, 32, 31, 31, 30, 30, 30, 30, 30, 30, - 30, 29, 28, 27, 26, 25, 31, 31, 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, - 28, 26, 26, 24, 31, 30, 30, 29, 29, 28, 28, 28, 28, 28, 28, 28, 27, 26, - 25, 24, 31, 30, 30, 29, 29, 28, 28, 28, 28, 28, 28, 28, 27, 26, 25, 24, - 31, 30, 30, 29, 29, 28, 28, 28, 28, 28, 28, 28, 27, 26, 25, 24, 31, 30, - 30, 29, 29, 28, 28, 28, 28, 28, 28, 28, 27, 26, 25, 24, 31, 30, 30, 29, - 29, 28, 28, 28, 28, 28, 28, 28, 27, 26, 25, 24, 31, 30, 30, 29, 29, 28, - 28, 28, 28, 28, 28, 28, 27, 26, 25, 24, 30, 30, 29, 29, 28, 28, 28, 28, - 28, 28, 28, 27, 26, 26, 24, 23, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, - 34, 34, 34, 33, 33, 32, 31, 31, 31, 31, 31, 31, 31, 30, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 32, 32, 31, 30, 30, 30, 30, 30, 30, 30, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 30, - 30, 30, 30, 30, 30, 29, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 31, 30, 29, 29, 29, 29, 29, - 29, 29, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, - 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 29, 29, 29, 29, 28, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 30, 29, 28, 28, 28, 28, 28, 28, 28, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, - 29, 28, 28, 28, 28, 28, 28, 28, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 29, 28, 28, 28, - 28, 28, 28, 28, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 29, 28, 28, 28, 28, 28, 28, 28, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 31, 31, 30, 29, 28, 28, 28, 28, 28, 28, 28, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, - 31, 30, 29, 28, 28, 28, 28, 28, 28, 28, 32, 32, 32, 32, 32, 31, 31, 31, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 29, 29, 28, 28, - 28, 28, 28, 28, 28, 27, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 30, - 30, 30, 30, 29, 29, 29, 29, 29, 29, 29, 28, 28, 28, 27, 27, 27, 27, 27, - 27, 26, 30, 30, 30, 29, 29, 29, 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 27, 26, 26, 26, 26, 26, 26, 26, 26, 29, 29, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 27, 27, - 27, 27, 26, 26, 26, 25, 25, 25, 25, 25, 25, 24, 28, 28, 28, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 25, 25, - 24, 24, 24, 24, 24, 24, 24, 23, - /* Size 4x16 */ - 33, 33, 33, 30, 33, 33, 33, 30, 33, 33, 33, 29, 33, 33, 33, 29, 33, 33, - 33, 29, 33, 33, 33, 29, 33, 32, 32, 28, 33, 32, 32, 28, 33, 32, 32, 28, - 33, 32, 32, 28, 33, 32, 32, 28, 32, 31, 31, 28, 31, 29, 29, 26, 30, 28, - 28, 26, 30, 28, 28, 26, 30, 28, 28, 26, - /* Size 16x4 */ - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 30, 30, 33, 33, - 33, 33, 33, 33, 32, 32, 32, 32, 32, 31, 29, 28, 28, 28, 33, 33, 33, 33, - 33, 33, 32, 32, 32, 32, 32, 31, 29, 28, 28, 28, 30, 30, 29, 29, 29, 29, - 28, 28, 28, 28, 28, 28, 26, 26, 26, 26, - /* Size 8x32 */ - 32, 33, 33, 33, 33, 33, 31, 29, 33, 33, 33, 33, 33, 33, 31, 29, 33, 33, - 33, 33, 33, 33, 31, 28, 33, 33, 33, 33, 33, 33, 31, 28, 33, 33, 33, 33, - 33, 33, 30, 28, 33, 33, 33, 33, 33, 33, 30, 28, 33, 33, 33, 33, 33, 33, - 30, 28, 33, 33, 33, 33, 33, 33, 30, 28, 33, 33, 33, 33, 33, 33, 30, 28, - 33, 33, 33, 33, 33, 33, 30, 28, 33, 33, 33, 33, 33, 33, 30, 28, 33, 33, - 33, 33, 33, 33, 30, 28, 33, 33, 33, 32, 32, 32, 30, 28, 33, 33, 33, 32, - 32, 32, 30, 28, 34, 33, 33, 32, 32, 32, 30, 27, 34, 33, 32, 32, 32, 32, - 29, 27, 34, 33, 32, 32, 32, 32, 29, 27, 34, 33, 32, 32, 32, 32, 29, 27, - 34, 33, 32, 32, 32, 32, 29, 27, 34, 33, 32, 32, 32, 32, 29, 27, 34, 33, - 32, 32, 32, 32, 29, 27, 33, 33, 32, 31, 31, 31, 29, 27, 33, 32, 31, 31, - 31, 31, 28, 26, 32, 31, 30, 30, 30, 30, 28, 26, 31, 30, 30, 29, 29, 29, - 28, 26, 31, 30, 29, 28, 28, 28, 27, 25, 31, 30, 29, 28, 28, 28, 27, 25, - 31, 30, 29, 28, 28, 28, 27, 25, 31, 30, 29, 28, 28, 28, 27, 25, 31, 30, - 29, 28, 28, 28, 27, 25, 31, 30, 29, 28, 28, 28, 27, 25, 30, 29, 28, 28, - 28, 28, 26, 24, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, - 34, 34, 34, 33, 33, 32, 31, 31, 31, 31, 31, 31, 31, 30, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 32, 31, 30, 30, 30, 30, 30, 30, 30, 29, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, - 29, 29, 29, 29, 29, 28, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 29, 28, 28, 28, 28, 28, - 28, 28, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 31, 31, 30, 29, 28, 28, 28, 28, 28, 28, 28, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 31, 31, 30, 29, 28, 28, 28, 28, 28, 28, 28, 31, 31, 31, 31, 30, 30, - 30, 30, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 29, 28, 28, - 28, 27, 27, 27, 27, 27, 27, 26, 29, 29, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 27, 27, 27, 27, 27, 27, 27, 27, 26, 26, 26, 25, 25, 25, - 25, 25, 25, 24 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 33, 33, 33, 33, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, - /* Size 8x8 */ - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, - 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, - 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, - /* Size 16x16 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, - 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 4x8 */ - 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, - 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, - /* Size 8x4 */ - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, - 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, - /* Size 8x16 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 33, 33, 33, 32, - 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, - 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, - 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, - 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, - 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, - 32, 32, - /* Size 16x8 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, - /* Size 16x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 4x16 */ - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 33, 32, - 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, - 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, - 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, - /* Size 16x4 */ - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, - 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, - 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, - 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, - 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, - 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, - 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, - 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, - 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, - 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, - 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, - 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, - 32, 32, 32, 32, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32 }, - { /* Chroma */ - /* Size 4x4 */ - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - /* Size 8x8 */ - 33, 33, 33, 33, 33, 33, 33, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 34, 33, 33, 33, 33, 33, 33, 33, - /* Size 16x16 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 32, - /* Size 32x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 34, 34, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, - 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 32, 32, 34, 34, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, - /* Size 4x8 */ - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 34, 33, 32, 32, - /* Size 8x4 */ - 33, 33, 33, 33, 33, 33, 33, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 32, 32, 33, 33, 33, 33, 33, 33, 32, 32, - /* Size 8x16 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 33, 33, 33, 33, - 33, 32, 32, 32, 34, 33, 33, 33, 33, 32, 32, 32, 34, 33, 33, 33, 32, 32, - 32, 32, - /* Size 16x8 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, - 32, 32, - /* Size 16x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, - 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, - 34, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 34, 33, - 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 34, 34, 33, 33, - 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 34, 34, 33, 33, 33, 33, - 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 34, 34, 33, 33, 33, 33, 33, 33, - 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 32x16 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 4x16 */ - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 33, 33, - 32, 32, 33, 33, 32, 32, 34, 33, 32, 32, - /* Size 16x4 */ - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, - /* Size 8x32 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 33, 33, 33, 33, 33, 32, - 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, - 34, 33, 33, 33, 33, 32, 32, 32, 34, 33, 33, 33, 33, 32, 32, 32, 34, 33, - 33, 33, 32, 32, 32, 32, 34, 33, 33, 33, 32, 32, 32, 32, 34, 33, 33, 33, - 32, 32, 32, 32, - /* Size 32x8 */ - 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, - 32, 32, 32, 32 }, - }, - { - { /* Luma */ - /* Size 4x4 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x8 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 16x16 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, - /* Size 32x32 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 4x8 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x4 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x16 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, - /* Size 16x8 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, - /* Size 16x32 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 32x16 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 4x16 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 16x4 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x32 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, - /* Size 32x8 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32 }, - { /* Chroma */ - /* Size 4x4 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x8 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 16x16 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, - /* Size 32x32 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 4x8 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x4 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x16 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, - /* Size 16x8 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, - /* Size 16x32 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 32x16 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 4x16 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 16x4 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - /* Size 8x32 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, - /* Size 32x8 */ - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32 }, - }, -}; diff --git a/third_party/aom/av1/common/quant_common.h b/third_party/aom/av1/common/quant_common.h deleted file mode 100644 index d1f52a660..000000000 --- a/third_party/aom/av1/common/quant_common.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_QUANT_COMMON_H_ -#define AOM_AV1_COMMON_QUANT_COMMON_H_ - -#include "aom/aom_codec.h" -#include "av1/common/seg_common.h" -#include "av1/common/enums.h" -#include "av1/common/entropy.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MINQ 0 -#define MAXQ 255 -#define QINDEX_RANGE (MAXQ - MINQ + 1) -#define QINDEX_BITS 8 -// Total number of QM sets stored -#define QM_LEVEL_BITS 4 -#define NUM_QM_LEVELS (1 << QM_LEVEL_BITS) -/* Range of QMS is between first and last value, with offset applied to inter - * blocks*/ -#define DEFAULT_QM_Y 10 -#define DEFAULT_QM_U 11 -#define DEFAULT_QM_V 12 -#define DEFAULT_QM_FIRST 5 -#define DEFAULT_QM_LAST 9 - -struct AV1Common; - -int16_t av1_dc_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth); -int16_t av1_ac_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth); -int16_t av1_dc_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth); -int16_t av1_ac_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth); - -int av1_get_qindex(const struct segmentation *seg, int segment_id, - int base_qindex); -// Reduce the large number of quantizers to a smaller number of levels for which -// different matrices may be defined -static INLINE int aom_get_qmlevel(int qindex, int first, int last) { - return first + (qindex * (last + 1 - first)) / QINDEX_RANGE; -} -void av1_qm_init(struct AV1Common *cm); -const qm_val_t *av1_iqmatrix(struct AV1Common *cm, int qindex, int comp, - TX_SIZE tx_size); -const qm_val_t *av1_qmatrix(struct AV1Common *cm, int qindex, int comp, - TX_SIZE tx_size); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_QUANT_COMMON_H_ diff --git a/third_party/aom/av1/common/reconinter.c b/third_party/aom/av1/common/reconinter.c deleted file mode 100644 index 3203efce4..000000000 --- a/third_party/aom/av1/common/reconinter.c +++ /dev/null @@ -1,1162 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <assert.h> -#include <stdio.h> -#include <limits.h> - -#include "config/aom_config.h" -#include "config/aom_dsp_rtcd.h" -#include "config/aom_scale_rtcd.h" - -#include "aom/aom_integer.h" -#include "aom_dsp/blend.h" - -#include "av1/common/blockd.h" -#include "av1/common/mvref_common.h" -#include "av1/common/reconinter.h" -#include "av1/common/reconintra.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/obmc.h" - -#define USE_PRECOMPUTED_WEDGE_MASK 1 -#define USE_PRECOMPUTED_WEDGE_SIGN 1 - -// This function will determine whether or not to create a warped -// prediction. -int av1_allow_warp(const MB_MODE_INFO *const mbmi, - const WarpTypesAllowed *const warp_types, - const WarpedMotionParams *const gm_params, - int build_for_obmc, int x_scale, int y_scale, - WarpedMotionParams *final_warp_params) { - if (x_scale != SCALE_SUBPEL_SHIFTS || y_scale != SCALE_SUBPEL_SHIFTS) - return 0; - - if (final_warp_params != NULL) *final_warp_params = default_warp_params; - - if (build_for_obmc) return 0; - - if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) { - if (final_warp_params != NULL) - memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params)); - return 1; - } else if (warp_types->global_warp_allowed && !gm_params->invalid) { - if (final_warp_params != NULL) - memcpy(final_warp_params, gm_params, sizeof(*final_warp_params)); - return 1; - } - - return 0; -} - -void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, const SubpelParams *subpel_params, - const struct scale_factors *sf, int w, int h, - ConvolveParams *conv_params, - InterpFilters interp_filters, - const WarpTypesAllowed *warp_types, int p_col, - int p_row, int plane, int ref, - const MB_MODE_INFO *mi, int build_for_obmc, - const MACROBLOCKD *xd, int can_use_previous) { - // Make sure the selected motion mode is valid for this configuration - assert_motion_mode_valid(mi->motion_mode, xd->global_motion, xd, mi, - can_use_previous); - assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL)); - - WarpedMotionParams final_warp_params; - const int do_warp = - (w >= 8 && h >= 8 && - av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], - build_for_obmc, subpel_params->xs, subpel_params->ys, - &final_warp_params)); - const int is_intrabc = mi->use_intrabc; - assert(IMPLIES(is_intrabc, !do_warp)); - - if (do_warp && xd->cur_frame_force_integer_mv == 0) { - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const struct buf_2d *const pre_buf = &pd->pre[ref]; - av1_warp_plane(&final_warp_params, - xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd, - pre_buf->buf0, pre_buf->width, pre_buf->height, - pre_buf->stride, dst, p_col, p_row, w, h, dst_stride, - pd->subsampling_x, pd->subsampling_y, conv_params); - } else if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params, sf, - w, h, conv_params, interp_filters, is_intrabc, - xd->bd); - } else { - inter_predictor(src, src_stride, dst, dst_stride, subpel_params, sf, w, h, - conv_params, interp_filters, is_intrabc); - } -} - -#if USE_PRECOMPUTED_WEDGE_MASK -static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 6, 18, - 37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, -}; -static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 11, 27, - 46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, -}; -static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7, 21, - 43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, -}; - -static void shift_copy(const uint8_t *src, uint8_t *dst, int shift, int width) { - if (shift >= 0) { - memcpy(dst + shift, src, width - shift); - memset(dst, src[0], shift); - } else { - shift = -shift; - memcpy(dst, src + shift, width - shift); - memset(dst + width - shift, src[width - 1], shift); - } -} -#endif // USE_PRECOMPUTED_WEDGE_MASK - -#if USE_PRECOMPUTED_WEDGE_SIGN -/* clang-format off */ -DECLARE_ALIGNED(16, static uint8_t, - wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = { - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, - { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, - { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, - { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, - { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used - { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, }, - { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used -}; -/* clang-format on */ -#else -DECLARE_ALIGNED(16, static uint8_t, - wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]); -#endif // USE_PRECOMPUTED_WEDGE_SIGN - -// [negative][direction] -DECLARE_ALIGNED( - 16, static uint8_t, - wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]); - -// 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound -// on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE. -DECLARE_ALIGNED(16, static uint8_t, - wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]); - -static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2]; - -static const wedge_code_type wedge_codebook_16_hgtw[16] = { - { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, - { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, - { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 }, - { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 }, - { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, - { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, - { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, - { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, -}; - -static const wedge_code_type wedge_codebook_16_hltw[16] = { - { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, - { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, - { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 }, - { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 }, - { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, - { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, - { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, - { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, -}; - -static const wedge_code_type wedge_codebook_16_heqw[16] = { - { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, - { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, - { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 }, - { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 }, - { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, - { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, - { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, - { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, -}; - -const wedge_params_type wedge_params_lookup[BLOCK_SIZES_ALL] = { - { 0, NULL, NULL, NULL }, - { 0, NULL, NULL, NULL }, - { 0, NULL, NULL, NULL }, - { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], - wedge_masks[BLOCK_8X8] }, - { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16], - wedge_masks[BLOCK_8X16] }, - { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8], - wedge_masks[BLOCK_16X8] }, - { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16], - wedge_masks[BLOCK_16X16] }, - { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32], - wedge_masks[BLOCK_16X32] }, - { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16], - wedge_masks[BLOCK_32X16] }, - { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], - wedge_masks[BLOCK_32X32] }, - { 0, NULL, NULL, NULL }, - { 0, NULL, NULL, NULL }, - { 0, NULL, NULL, NULL }, - { 0, NULL, NULL, NULL }, - { 0, NULL, NULL, NULL }, - { 0, NULL, NULL, NULL }, - { 0, NULL, NULL, NULL }, - { 0, NULL, NULL, NULL }, - { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32], - wedge_masks[BLOCK_8X32] }, - { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8], - wedge_masks[BLOCK_32X8] }, - { 0, NULL, NULL, NULL }, - { 0, NULL, NULL, NULL }, -}; - -static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg, - BLOCK_SIZE sb_type) { - const uint8_t *master; - const int bh = block_size_high[sb_type]; - const int bw = block_size_wide[sb_type]; - const wedge_code_type *a = - wedge_params_lookup[sb_type].codebook + wedge_index; - int woff, hoff; - const uint8_t wsignflip = wedge_params_lookup[sb_type].signflip[wedge_index]; - - assert(wedge_index >= 0 && - wedge_index < (1 << get_wedge_bits_lookup(sb_type))); - woff = (a->x_offset * bw) >> 3; - hoff = (a->y_offset * bh) >> 3; - master = wedge_mask_obl[neg ^ wsignflip][a->direction] + - MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) + - MASK_MASTER_SIZE / 2 - woff; - return master; -} - -const uint8_t *av1_get_compound_type_mask( - const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) { - assert(is_masked_compound_type(comp_data->type)); - (void)sb_type; - switch (comp_data->type) { - case COMPOUND_WEDGE: - return av1_get_contiguous_soft_mask(comp_data->wedge_index, - comp_data->wedge_sign, sb_type); - case COMPOUND_DIFFWTD: return comp_data->seg_mask; - default: assert(0); return NULL; - } -} - -static void diffwtd_mask_d16(uint8_t *mask, int which_inverse, int mask_base, - const CONV_BUF_TYPE *src0, int src0_stride, - const CONV_BUF_TYPE *src1, int src1_stride, int h, - int w, ConvolveParams *conv_params, int bd) { - int round = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8); - int i, j, m, diff; - for (i = 0; i < h; ++i) { - for (j = 0; j < w; ++j) { - diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]); - diff = ROUND_POWER_OF_TWO(diff, round); - m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA); - mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m; - } - } -} - -void av1_build_compound_diffwtd_mask_d16_c( - uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, - int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, - ConvolveParams *conv_params, int bd) { - switch (mask_type) { - case DIFFWTD_38: - diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w, - conv_params, bd); - break; - case DIFFWTD_38_INV: - diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w, - conv_params, bd); - break; - default: assert(0); - } -} - -static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base, - const uint8_t *src0, int src0_stride, - const uint8_t *src1, int src1_stride, int h, int w) { - int i, j, m, diff; - for (i = 0; i < h; ++i) { - for (j = 0; j < w; ++j) { - diff = - abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]); - m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA); - mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m; - } - } -} - -void av1_build_compound_diffwtd_mask_c(uint8_t *mask, - DIFFWTD_MASK_TYPE mask_type, - const uint8_t *src0, int src0_stride, - const uint8_t *src1, int src1_stride, - int h, int w) { - switch (mask_type) { - case DIFFWTD_38: - diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w); - break; - case DIFFWTD_38_INV: - diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w); - break; - default: assert(0); - } -} - -static AOM_FORCE_INLINE void diffwtd_mask_highbd( - uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0, - int src0_stride, const uint16_t *src1, int src1_stride, int h, int w, - const unsigned int bd) { - assert(bd >= 8); - if (bd == 8) { - if (which_inverse) { - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; ++j) { - int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR; - unsigned int m = negative_to_zero(mask_base + diff); - m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA); - mask[j] = AOM_BLEND_A64_MAX_ALPHA - m; - } - src0 += src0_stride; - src1 += src1_stride; - mask += w; - } - } else { - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; ++j) { - int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR; - unsigned int m = negative_to_zero(mask_base + diff); - m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA); - mask[j] = m; - } - src0 += src0_stride; - src1 += src1_stride; - mask += w; - } - } - } else { - const unsigned int bd_shift = bd - 8; - if (which_inverse) { - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; ++j) { - int diff = - (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR; - unsigned int m = negative_to_zero(mask_base + diff); - m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA); - mask[j] = AOM_BLEND_A64_MAX_ALPHA - m; - } - src0 += src0_stride; - src1 += src1_stride; - mask += w; - } - } else { - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; ++j) { - int diff = - (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR; - unsigned int m = negative_to_zero(mask_base + diff); - m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA); - mask[j] = m; - } - src0 += src0_stride; - src1 += src1_stride; - mask += w; - } - } - } -} - -void av1_build_compound_diffwtd_mask_highbd_c( - uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, - int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, - int bd) { - switch (mask_type) { - case DIFFWTD_38: - diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride, - CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd); - break; - case DIFFWTD_38_INV: - diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride, - CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd); - break; - default: assert(0); - } -} - -static void init_wedge_master_masks() { - int i, j; - const int w = MASK_MASTER_SIZE; - const int h = MASK_MASTER_SIZE; - const int stride = MASK_MASTER_STRIDE; -// Note: index [0] stores the masters, and [1] its complement. -#if USE_PRECOMPUTED_WEDGE_MASK - // Generate prototype by shifting the masters - int shift = h / 4; - for (i = 0; i < h; i += 2) { - shift_copy(wedge_master_oblique_even, - &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift, - MASK_MASTER_SIZE); - shift--; - shift_copy(wedge_master_oblique_odd, - &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift, - MASK_MASTER_SIZE); - memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride], - wedge_master_vertical, - MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0])); - memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride], - wedge_master_vertical, - MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0])); - } -#else - static const double smoother_param = 2.85; - const int a[2] = { 2, 1 }; - const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]); - for (i = 0; i < h; i++) { - for (j = 0; j < w; ++j) { - int x = (2 * j + 1 - w); - int y = (2 * i + 1 - h); - double d = (a[0] * x + a[1] * y) / asqrt; - const int msk = (int)rint((1.0 + tanh(d / smoother_param)) * 32); - wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j] = msk; - const int mskx = (int)rint((1.0 + tanh(x / smoother_param)) * 32); - wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j] = mskx; - } - } -#endif // USE_PRECOMPUTED_WEDGE_MASK - for (i = 0; i < h; ++i) { - for (j = 0; j < w; ++j) { - const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j]; - wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk; - wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] = - wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = - (1 << WEDGE_WEIGHT_BITS) - msk; - wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] = - wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] = - (1 << WEDGE_WEIGHT_BITS) - msk; - wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] = - wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk; - const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j]; - wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx; - wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] = - wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] = - (1 << WEDGE_WEIGHT_BITS) - mskx; - } - } -} - -#if !USE_PRECOMPUTED_WEDGE_SIGN -// If the signs for the wedges for various blocksizes are -// inconsistent flip the sign flag. Do it only once for every -// wedge codebook. -static void init_wedge_signs() { - BLOCK_SIZE sb_type; - memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup)); - for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES_ALL; ++sb_type) { - const int bw = block_size_wide[sb_type]; - const int bh = block_size_high[sb_type]; - const wedge_params_type wedge_params = wedge_params_lookup[sb_type]; - const int wbits = wedge_params.bits; - const int wtypes = 1 << wbits; - int i, w; - if (wbits) { - for (w = 0; w < wtypes; ++w) { - // Get the mask master, i.e. index [0] - const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type); - int avg = 0; - for (i = 0; i < bw; ++i) avg += mask[i]; - for (i = 1; i < bh; ++i) avg += mask[i * MASK_MASTER_STRIDE]; - avg = (avg + (bw + bh - 1) / 2) / (bw + bh - 1); - // Default sign of this wedge is 1 if the average < 32, 0 otherwise. - // If default sign is 1: - // If sign requested is 0, we need to flip the sign and return - // the complement i.e. index [1] instead. If sign requested is 1 - // we need to flip the sign and return index [0] instead. - // If default sign is 0: - // If sign requested is 0, we need to return index [0] the master - // if sign requested is 1, we need to return the complement index [1] - // instead. - wedge_params.signflip[w] = (avg < 32); - } - } - } -} -#endif // !USE_PRECOMPUTED_WEDGE_SIGN - -static void init_wedge_masks() { - uint8_t *dst = wedge_mask_buf; - BLOCK_SIZE bsize; - memset(wedge_masks, 0, sizeof(wedge_masks)); - for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) { - const uint8_t *mask; - const int bw = block_size_wide[bsize]; - const int bh = block_size_high[bsize]; - const wedge_params_type *wedge_params = &wedge_params_lookup[bsize]; - const int wbits = wedge_params->bits; - const int wtypes = 1 << wbits; - int w; - if (wbits == 0) continue; - for (w = 0; w < wtypes; ++w) { - mask = get_wedge_mask_inplace(w, 0, bsize); - aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw, - bh); - wedge_params->masks[0][w] = dst; - dst += bw * bh; - - mask = get_wedge_mask_inplace(w, 1, bsize); - aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw, - bh); - wedge_params->masks[1][w] = dst; - dst += bw * bh; - } - assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf)); - } -} - -// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0 -void av1_init_wedge_masks() { - init_wedge_master_masks(); -#if !USE_PRECOMPUTED_WEDGE_SIGN - init_wedge_signs(); -#endif // !USE_PRECOMPUTED_WEDGE_SIGN - init_wedge_masks(); -} - -static void build_masked_compound_no_round( - uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride, - const CONV_BUF_TYPE *src1, int src1_stride, - const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h, - int w, ConvolveParams *conv_params, MACROBLOCKD *xd) { - // Derive subsampling from h and w passed in. May be refactored to - // pass in subsampling factors directly. - const int subh = (2 << mi_size_high_log2[sb_type]) == h; - const int subw = (2 << mi_size_wide_log2[sb_type]) == w; - const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type); - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1, - src1_stride, mask, block_size_wide[sb_type], - w, h, subw, subh, conv_params, xd->bd); - else - aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1, - src1_stride, mask, block_size_wide[sb_type], w, - h, subw, subh, conv_params); -} - -void av1_make_masked_inter_predictor( - const uint8_t *pre, int pre_stride, uint8_t *dst, int dst_stride, - const SubpelParams *subpel_params, const struct scale_factors *sf, int w, - int h, ConvolveParams *conv_params, InterpFilters interp_filters, int plane, - const WarpTypesAllowed *warp_types, int p_col, int p_row, int ref, - MACROBLOCKD *xd, int can_use_previous) { - MB_MODE_INFO *mi = xd->mi[0]; - (void)dst; - (void)dst_stride; - mi->interinter_comp.seg_mask = xd->seg_mask; - const INTERINTER_COMPOUND_DATA *comp_data = &mi->interinter_comp; - -// We're going to call av1_make_inter_predictor to generate a prediction into -// a temporary buffer, then will blend that temporary buffer with that from -// the other reference. -// -#define INTER_PRED_BYTES_PER_PIXEL 2 - - DECLARE_ALIGNED(32, uint8_t, - tmp_buf[INTER_PRED_BYTES_PER_PIXEL * MAX_SB_SQUARE]); -#undef INTER_PRED_BYTES_PER_PIXEL - - uint8_t *tmp_dst = get_buf_by_bd(xd, tmp_buf); - - const int tmp_buf_stride = MAX_SB_SIZE; - CONV_BUF_TYPE *org_dst = conv_params->dst; - int org_dst_stride = conv_params->dst_stride; - CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf; - conv_params->dst = tmp_buf16; - conv_params->dst_stride = tmp_buf_stride; - assert(conv_params->do_average == 0); - - // This will generate a prediction in tmp_buf for the second reference - av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_params, - sf, w, h, conv_params, interp_filters, warp_types, - p_col, p_row, plane, ref, mi, 0, xd, - can_use_previous); - - if (!plane && comp_data->type == COMPOUND_DIFFWTD) { - av1_build_compound_diffwtd_mask_d16( - comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride, - tmp_buf16, tmp_buf_stride, h, w, conv_params, xd->bd); - } - build_masked_compound_no_round(dst, dst_stride, org_dst, org_dst_stride, - tmp_buf16, tmp_buf_stride, comp_data, - mi->sb_type, h, w, conv_params, xd); -} - -void av1_jnt_comp_weight_assign(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi, - int order_idx, int *fwd_offset, int *bck_offset, - int *use_jnt_comp_avg, int is_compound) { - assert(fwd_offset != NULL && bck_offset != NULL); - if (!is_compound || mbmi->compound_idx) { - *use_jnt_comp_avg = 0; - return; - } - - *use_jnt_comp_avg = 1; - const int bck_idx = cm->frame_refs[mbmi->ref_frame[0] - LAST_FRAME].idx; - const int fwd_idx = cm->frame_refs[mbmi->ref_frame[1] - LAST_FRAME].idx; - const int cur_frame_index = cm->cur_frame->cur_frame_offset; - int bck_frame_index = 0, fwd_frame_index = 0; - - if (bck_idx >= 0) { - bck_frame_index = cm->buffer_pool->frame_bufs[bck_idx].cur_frame_offset; - } - - if (fwd_idx >= 0) { - fwd_frame_index = cm->buffer_pool->frame_bufs[fwd_idx].cur_frame_offset; - } - - int d0 = clamp(abs(get_relative_dist(cm, fwd_frame_index, cur_frame_index)), - 0, MAX_FRAME_DISTANCE); - int d1 = clamp(abs(get_relative_dist(cm, cur_frame_index, bck_frame_index)), - 0, MAX_FRAME_DISTANCE); - - const int order = d0 <= d1; - - if (d0 == 0 || d1 == 0) { - *fwd_offset = quant_dist_lookup_table[order_idx][3][order]; - *bck_offset = quant_dist_lookup_table[order_idx][3][1 - order]; - return; - } - - int i; - for (i = 0; i < 3; ++i) { - int c0 = quant_dist_weight[i][order]; - int c1 = quant_dist_weight[i][!order]; - int d0_c0 = d0 * c0; - int d1_c1 = d1 * c1; - if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break; - } - - *fwd_offset = quant_dist_lookup_table[order_idx][i][order]; - *bck_offset = quant_dist_lookup_table[order_idx][i][1 - order]; -} - -void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize, - const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, - const int plane_start, const int plane_end) { - // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet - // the static analysis warnings. - for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) { - struct macroblockd_plane *const pd = &planes[i]; - const int is_uv = i > 0; - setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv], - src->crop_heights[is_uv], src->strides[is_uv], mi_row, - mi_col, NULL, pd->subsampling_x, pd->subsampling_y); - } -} - -void av1_setup_pre_planes(MACROBLOCKD *xd, int idx, - const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, - const struct scale_factors *sf, - const int num_planes) { - if (src != NULL) { - // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet - // the static analysis warnings. - for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) { - struct macroblockd_plane *const pd = &xd->plane[i]; - const int is_uv = i > 0; - setup_pred_plane(&pd->pre[idx], xd->mi[0]->sb_type, src->buffers[i], - src->crop_widths[is_uv], src->crop_heights[is_uv], - src->strides[is_uv], mi_row, mi_col, sf, - pd->subsampling_x, pd->subsampling_y); - } - } -} - -// obmc_mask_N[overlap_position] -static const uint8_t obmc_mask_1[1] = { 64 }; - -static const uint8_t obmc_mask_2[2] = { 45, 64 }; - -static const uint8_t obmc_mask_4[4] = { 39, 50, 59, 64 }; - -static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 }; - -static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54, - 56, 58, 60, 61, 64, 64, 64, 64 }; - -static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44, - 45, 47, 48, 50, 51, 52, 53, 55, - 56, 57, 58, 59, 60, 60, 61, 62, - 64, 64, 64, 64, 64, 64, 64, 64 }; - -static const uint8_t obmc_mask_64[64] = { - 33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44, - 45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56, - 56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62, - 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, -}; - -const uint8_t *av1_get_obmc_mask(int length) { - switch (length) { - case 1: return obmc_mask_1; - case 2: return obmc_mask_2; - case 4: return obmc_mask_4; - case 8: return obmc_mask_8; - case 16: return obmc_mask_16; - case 32: return obmc_mask_32; - case 64: return obmc_mask_64; - default: assert(0); return NULL; - } -} - -static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_rc, - uint8_t mi_hw, MB_MODE_INFO *mi, - void *fun_ctxt, const int num_planes) { - (void)xd; - (void)rel_mi_rc; - (void)mi_hw; - (void)mi; - ++*(int *)fun_ctxt; - (void)num_planes; -} - -void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col) { - MB_MODE_INFO *mbmi = xd->mi[0]; - - mbmi->overlappable_neighbors[0] = 0; - mbmi->overlappable_neighbors[1] = 0; - - if (!is_motion_variation_allowed_bsize(mbmi->sb_type)) return; - - foreach_overlappable_nb_above(cm, xd, mi_col, INT_MAX, increment_int_ptr, - &mbmi->overlappable_neighbors[0]); - foreach_overlappable_nb_left(cm, xd, mi_row, INT_MAX, increment_int_ptr, - &mbmi->overlappable_neighbors[1]); -} - -// HW does not support < 4x4 prediction. To limit the bandwidth requirement, if -// block-size of current plane is smaller than 8x8, always only blend with the -// left neighbor(s) (skip blending with the above side). -#define DISABLE_CHROMA_U8X8_OBMC 0 // 0: one-sided obmc; 1: disable - -int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize, - const struct macroblockd_plane *pd, int dir) { - assert(is_motion_variation_allowed_bsize(bsize)); - - const BLOCK_SIZE bsize_plane = - get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); - switch (bsize_plane) { -#if DISABLE_CHROMA_U8X8_OBMC - case BLOCK_4X4: - case BLOCK_8X4: - case BLOCK_4X8: return 1; break; -#else - case BLOCK_4X4: - case BLOCK_8X4: - case BLOCK_4X8: return dir == 0; break; -#endif - default: return 0; - } -} - -void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) { - mbmi->ref_frame[1] = NONE_FRAME; - mbmi->interinter_comp.type = COMPOUND_AVERAGE; - - return; -} - -struct obmc_inter_pred_ctxt { - uint8_t **adjacent; - int *adjacent_stride; -}; - -static INLINE void build_obmc_inter_pred_above(MACROBLOCKD *xd, int rel_mi_col, - uint8_t above_mi_width, - MB_MODE_INFO *above_mi, - void *fun_ctxt, - const int num_planes) { - (void)above_mi; - struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt; - const BLOCK_SIZE bsize = xd->mi[0]->sb_type; - const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; - const int overlap = - AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1; - - for (int plane = 0; plane < num_planes; ++plane) { - const struct macroblockd_plane *pd = &xd->plane[plane]; - const int bw = (above_mi_width * MI_SIZE) >> pd->subsampling_x; - const int bh = overlap >> pd->subsampling_y; - const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x; - - if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue; - - const int dst_stride = pd->dst.stride; - uint8_t *const dst = &pd->dst.buf[plane_col]; - const int tmp_stride = ctxt->adjacent_stride[plane]; - const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col]; - const uint8_t *const mask = av1_get_obmc_mask(bh); - - if (is_hbd) - aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, - tmp_stride, mask, bw, bh, xd->bd); - else - aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, - mask, bw, bh); - } -} - -static INLINE void build_obmc_inter_pred_left(MACROBLOCKD *xd, int rel_mi_row, - uint8_t left_mi_height, - MB_MODE_INFO *left_mi, - void *fun_ctxt, - const int num_planes) { - (void)left_mi; - struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt; - const BLOCK_SIZE bsize = xd->mi[0]->sb_type; - const int overlap = - AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1; - const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; - - for (int plane = 0; plane < num_planes; ++plane) { - const struct macroblockd_plane *pd = &xd->plane[plane]; - const int bw = overlap >> pd->subsampling_x; - const int bh = (left_mi_height * MI_SIZE) >> pd->subsampling_y; - const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y; - - if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue; - - const int dst_stride = pd->dst.stride; - uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride]; - const int tmp_stride = ctxt->adjacent_stride[plane]; - const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride]; - const uint8_t *const mask = av1_get_obmc_mask(bw); - - if (is_hbd) - aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, - tmp_stride, mask, bw, bh, xd->bd); - else - aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, - mask, bw, bh); - } -} - -// This function combines motion compensated predictions that are generated by -// top/left neighboring blocks' inter predictors with the regular inter -// prediction. We assume the original prediction (bmc) is stored in -// xd->plane[].dst.buf -void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, - uint8_t *above[MAX_MB_PLANE], - int above_stride[MAX_MB_PLANE], - uint8_t *left[MAX_MB_PLANE], - int left_stride[MAX_MB_PLANE]) { - const BLOCK_SIZE bsize = xd->mi[0]->sb_type; - - // handle above row - struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride }; - foreach_overlappable_nb_above(cm, xd, mi_col, - max_neighbor_obmc[mi_size_wide_log2[bsize]], - build_obmc_inter_pred_above, &ctxt_above); - - // handle left column - struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride }; - foreach_overlappable_nb_left(cm, xd, mi_row, - max_neighbor_obmc[mi_size_high_log2[bsize]], - build_obmc_inter_pred_left, &ctxt_left); -} - -void av1_setup_build_prediction_by_above_pred( - MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width, - MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt, - const int num_planes) { - const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type); - const int above_mi_col = ctxt->mi_col + rel_mi_col; - - av1_modify_neighbor_predictor_for_obmc(above_mbmi); - - for (int j = 0; j < num_planes; ++j) { - struct macroblockd_plane *const pd = &xd->plane[j]; - setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j], - ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col, - NULL, pd->subsampling_x, pd->subsampling_y); - } - - const int num_refs = 1 + has_second_ref(above_mbmi); - - for (int ref = 0; ref < num_refs; ++ref) { - const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref]; - - const RefBuffer *const ref_buf = &ctxt->cm->frame_refs[frame - LAST_FRAME]; - - xd->block_refs[ref] = ref_buf; - if ((!av1_is_valid_scale(&ref_buf->sf))) - aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM, - "Reference frame has invalid dimensions"); - av1_setup_pre_planes(xd, ref, ref_buf->buf, ctxt->mi_row, above_mi_col, - &ref_buf->sf, num_planes); - } - - xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col); - xd->mb_to_right_edge = ctxt->mb_to_far_edge + - (xd->n4_w - rel_mi_col - above_mi_width) * MI_SIZE * 8; -} - -void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row, - uint8_t left_mi_height, - MB_MODE_INFO *left_mbmi, - struct build_prediction_ctxt *ctxt, - const int num_planes) { - const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->sb_type); - const int left_mi_row = ctxt->mi_row + rel_mi_row; - - av1_modify_neighbor_predictor_for_obmc(left_mbmi); - - for (int j = 0; j < num_planes; ++j) { - struct macroblockd_plane *const pd = &xd->plane[j]; - setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j], - ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0, - NULL, pd->subsampling_x, pd->subsampling_y); - } - - const int num_refs = 1 + has_second_ref(left_mbmi); - - for (int ref = 0; ref < num_refs; ++ref) { - const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref]; - - const RefBuffer *const ref_buf = &ctxt->cm->frame_refs[frame - LAST_FRAME]; - - xd->block_refs[ref] = ref_buf; - if ((!av1_is_valid_scale(&ref_buf->sf))) - aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM, - "Reference frame has invalid dimensions"); - av1_setup_pre_planes(xd, ref, ref_buf->buf, left_mi_row, ctxt->mi_col, - &ref_buf->sf, num_planes); - } - - xd->mb_to_top_edge = 8 * MI_SIZE * (-left_mi_row); - xd->mb_to_bottom_edge = - ctxt->mb_to_far_edge + - (xd->n4_h - rel_mi_row - left_mi_height) * MI_SIZE * 8; -} - -/* clang-format off */ -static const uint8_t ii_weights1d[MAX_SB_SIZE] = { - 60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32, - 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16, - 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8, - 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4, - 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 -}; -static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = { - 32, 16, 16, 16, 8, 8, 8, 4, - 4, 4, 2, 2, 2, 1, 1, 1, - 8, 8, 4, 4, 2, 2 -}; -/* clang-format on */ - -static void build_smooth_interintra_mask(uint8_t *mask, int stride, - BLOCK_SIZE plane_bsize, - INTERINTRA_MODE mode) { - int i, j; - const int bw = block_size_wide[plane_bsize]; - const int bh = block_size_high[plane_bsize]; - const int size_scale = ii_size_scales[plane_bsize]; - - switch (mode) { - case II_V_PRED: - for (i = 0; i < bh; ++i) { - memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0])); - mask += stride; - } - break; - - case II_H_PRED: - for (i = 0; i < bh; ++i) { - for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale]; - mask += stride; - } - break; - - case II_SMOOTH_PRED: - for (i = 0; i < bh; ++i) { - for (j = 0; j < bw; ++j) - mask[j] = ii_weights1d[(i < j ? i : j) * size_scale]; - mask += stride; - } - break; - - case II_DC_PRED: - default: - for (i = 0; i < bh; ++i) { - memset(mask, 32, bw * sizeof(mask[0])); - mask += stride; - } - break; - } -} - -static void combine_interintra(INTERINTRA_MODE mode, int use_wedge_interintra, - int wedge_index, int wedge_sign, - BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize, - uint8_t *comppred, int compstride, - const uint8_t *interpred, int interstride, - const uint8_t *intrapred, int intrastride) { - const int bw = block_size_wide[plane_bsize]; - const int bh = block_size_high[plane_bsize]; - - if (use_wedge_interintra) { - if (is_interintra_wedge_used(bsize)) { - const uint8_t *mask = - av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize); - const int subw = 2 * mi_size_wide[bsize] == bw; - const int subh = 2 * mi_size_high[bsize] == bh; - aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, - interpred, interstride, mask, block_size_wide[bsize], - bw, bh, subw, subh); - } - return; - } - - uint8_t mask[MAX_SB_SQUARE]; - build_smooth_interintra_mask(mask, bw, plane_bsize, mode); - aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred, - interstride, mask, bw, bw, bh, 0, 0); -} - -static void combine_interintra_highbd( - INTERINTRA_MODE mode, int use_wedge_interintra, int wedge_index, - int wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize, - uint8_t *comppred8, int compstride, const uint8_t *interpred8, - int interstride, const uint8_t *intrapred8, int intrastride, int bd) { - const int bw = block_size_wide[plane_bsize]; - const int bh = block_size_high[plane_bsize]; - - if (use_wedge_interintra) { - if (is_interintra_wedge_used(bsize)) { - const uint8_t *mask = - av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize); - const int subh = 2 * mi_size_high[bsize] == bh; - const int subw = 2 * mi_size_wide[bsize] == bw; - aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride, - interpred8, interstride, mask, - block_size_wide[bsize], bw, bh, subw, subh, bd); - } - return; - } - - uint8_t mask[MAX_SB_SQUARE]; - build_smooth_interintra_mask(mask, bw, plane_bsize, mode); - aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride, - interpred8, interstride, mask, bw, bw, bh, 0, 0, - bd); -} - -void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm, - MACROBLOCKD *xd, - BLOCK_SIZE bsize, int plane, - BUFFER_SET *ctx, uint8_t *dst, - int dst_stride) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - const int ssx = xd->plane[plane].subsampling_x; - const int ssy = xd->plane[plane].subsampling_y; - BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy); - PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode]; - assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0); - assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0); - assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0); - assert(xd->mi[0]->use_intrabc == 0); - - av1_predict_intra_block(cm, xd, pd->width, pd->height, - max_txsize_rect_lookup[plane_bsize], mode, 0, 0, - FILTER_INTRA_MODES, ctx->plane[plane], - ctx->stride[plane], dst, dst_stride, 0, 0, plane); -} - -void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane, - const uint8_t *inter_pred, int inter_stride, - const uint8_t *intra_pred, int intra_stride) { - const int ssx = xd->plane[plane].subsampling_x; - const int ssy = xd->plane[plane].subsampling_y; - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy); - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - combine_interintra_highbd( - xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra, - xd->mi[0]->interintra_wedge_index, xd->mi[0]->interintra_wedge_sign, - bsize, plane_bsize, xd->plane[plane].dst.buf, - xd->plane[plane].dst.stride, inter_pred, inter_stride, intra_pred, - intra_stride, xd->bd); - return; - } - combine_interintra( - xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra, - xd->mi[0]->interintra_wedge_index, xd->mi[0]->interintra_wedge_sign, - bsize, plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, - inter_pred, inter_stride, intra_pred, intra_stride); -} - -// build interintra_predictors for one plane -void av1_build_interintra_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd, - uint8_t *pred, int stride, - BUFFER_SET *ctx, int plane, - BLOCK_SIZE bsize) { - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]); - av1_build_intra_predictors_for_interintra( - cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor), - MAX_SB_SIZE); - av1_combine_interintra(xd, bsize, plane, pred, stride, - CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE); - } else { - DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]); - av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx, - intrapredictor, MAX_SB_SIZE); - av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor, - MAX_SB_SIZE); - } -} - -void av1_build_interintra_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd, - uint8_t *upred, uint8_t *vpred, - int ustride, int vstride, - BUFFER_SET *ctx, BLOCK_SIZE bsize) { - av1_build_interintra_predictors_sbp(cm, xd, upred, ustride, ctx, 1, bsize); - av1_build_interintra_predictors_sbp(cm, xd, vpred, vstride, ctx, 2, bsize); -} diff --git a/third_party/aom/av1/common/reconinter.h b/third_party/aom/av1/common/reconinter.h deleted file mode 100644 index db86c777e..000000000 --- a/third_party/aom/av1/common/reconinter.h +++ /dev/null @@ -1,365 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_RECONINTER_H_ -#define AOM_AV1_COMMON_RECONINTER_H_ - -#include "av1/common/filter.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/convolve.h" -#include "av1/common/warped_motion.h" -#include "aom/aom_integer.h" - -// Work out how many pixels off the edge of a reference frame we're allowed -// to go when forming an inter prediction. -// The outermost row/col of each referernce frame is extended by -// (AOM_BORDER_IN_PIXELS >> subsampling) pixels, but we need to keep -// at least AOM_INTERP_EXTEND pixels within that to account for filtering. -// -// We have to break this up into two macros to keep both clang-format and -// tools/lint-hunks.py happy. -#define AOM_LEFT_TOP_MARGIN_PX(subsampling) \ - ((AOM_BORDER_IN_PIXELS >> subsampling) - AOM_INTERP_EXTEND) -#define AOM_LEFT_TOP_MARGIN_SCALED(subsampling) \ - (AOM_LEFT_TOP_MARGIN_PX(subsampling) << SCALE_SUBPEL_BITS) - -#ifdef __cplusplus -extern "C" { -#endif - -// Set to (1 << 5) if the 32-ary codebooks are used for any bock size -#define MAX_WEDGE_TYPES (1 << 4) - -#define MAX_WEDGE_SIZE_LOG2 5 // 32x32 -#define MAX_WEDGE_SIZE (1 << MAX_WEDGE_SIZE_LOG2) -#define MAX_WEDGE_SQUARE (MAX_WEDGE_SIZE * MAX_WEDGE_SIZE) - -#define WEDGE_WEIGHT_BITS 6 - -#define WEDGE_NONE -1 - -// Angles are with respect to horizontal anti-clockwise -typedef enum { - WEDGE_HORIZONTAL = 0, - WEDGE_VERTICAL = 1, - WEDGE_OBLIQUE27 = 2, - WEDGE_OBLIQUE63 = 3, - WEDGE_OBLIQUE117 = 4, - WEDGE_OBLIQUE153 = 5, - WEDGE_DIRECTIONS -} WedgeDirectionType; - -// 3-tuple: {direction, x_offset, y_offset} -typedef struct { - WedgeDirectionType direction; - int x_offset; - int y_offset; -} wedge_code_type; - -typedef uint8_t *wedge_masks_type[MAX_WEDGE_TYPES]; - -typedef struct { - int bits; - const wedge_code_type *codebook; - uint8_t *signflip; - wedge_masks_type *masks; -} wedge_params_type; - -extern const wedge_params_type wedge_params_lookup[BLOCK_SIZES_ALL]; - -typedef struct SubpelParams { - int xs; - int ys; - int subpel_x; - int subpel_y; -} SubpelParams; - -struct build_prediction_ctxt { - const AV1_COMMON *cm; - int mi_row; - int mi_col; - uint8_t **tmp_buf; - int *tmp_width; - int *tmp_height; - int *tmp_stride; - int mb_to_far_edge; -}; - -static INLINE int has_scale(int xs, int ys) { - return xs != SCALE_SUBPEL_SHIFTS || ys != SCALE_SUBPEL_SHIFTS; -} - -static INLINE void revert_scale_extra_bits(SubpelParams *sp) { - sp->subpel_x >>= SCALE_EXTRA_BITS; - sp->subpel_y >>= SCALE_EXTRA_BITS; - sp->xs >>= SCALE_EXTRA_BITS; - sp->ys >>= SCALE_EXTRA_BITS; - assert(sp->subpel_x < SUBPEL_SHIFTS); - assert(sp->subpel_y < SUBPEL_SHIFTS); - assert(sp->xs <= SUBPEL_SHIFTS); - assert(sp->ys <= SUBPEL_SHIFTS); -} - -static INLINE void inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const SubpelParams *subpel_params, - const struct scale_factors *sf, int w, int h, - ConvolveParams *conv_params, - InterpFilters interp_filters, - int is_intrabc) { - assert(conv_params->do_average == 0 || conv_params->do_average == 1); - assert(sf); - const int is_scaled = has_scale(subpel_params->xs, subpel_params->ys); - assert(IMPLIES(is_intrabc, !is_scaled)); - if (is_scaled) { - av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h, - interp_filters, subpel_params->subpel_x, - subpel_params->xs, subpel_params->subpel_y, - subpel_params->ys, 1, conv_params, sf, is_intrabc); - } else { - SubpelParams sp = *subpel_params; - revert_scale_extra_bits(&sp); - av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h, - interp_filters, sp.subpel_x, sp.xs, sp.subpel_y, - sp.ys, 0, conv_params, sf, is_intrabc); - } -} - -static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const SubpelParams *subpel_params, - const struct scale_factors *sf, int w, - int h, ConvolveParams *conv_params, - InterpFilters interp_filters, - int is_intrabc, int bd) { - assert(conv_params->do_average == 0 || conv_params->do_average == 1); - assert(sf); - const int is_scaled = has_scale(subpel_params->xs, subpel_params->ys); - assert(IMPLIES(is_intrabc, !is_scaled)); - if (is_scaled) { - av1_highbd_convolve_2d_facade( - src, src_stride, dst, dst_stride, w, h, interp_filters, - subpel_params->subpel_x, subpel_params->xs, subpel_params->subpel_y, - subpel_params->ys, 1, conv_params, sf, is_intrabc, bd); - } else { - SubpelParams sp = *subpel_params; - revert_scale_extra_bits(&sp); - av1_highbd_convolve_2d_facade( - src, src_stride, dst, dst_stride, w, h, interp_filters, sp.subpel_x, - sp.xs, sp.subpel_y, sp.ys, 0, conv_params, sf, is_intrabc, bd); - } -} - -void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi); -int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize, - const struct macroblockd_plane *pd, int dir); - -static INLINE int is_interinter_compound_used(COMPOUND_TYPE type, - BLOCK_SIZE sb_type) { - const int comp_allowed = is_comp_ref_allowed(sb_type); - switch (type) { - case COMPOUND_AVERAGE: - case COMPOUND_DIFFWTD: return comp_allowed; - case COMPOUND_WEDGE: - return comp_allowed && wedge_params_lookup[sb_type].bits > 0; - default: assert(0); return 0; - } -} - -static INLINE int is_any_masked_compound_used(BLOCK_SIZE sb_type) { - COMPOUND_TYPE comp_type; - int i; - if (!is_comp_ref_allowed(sb_type)) return 0; - for (i = 0; i < COMPOUND_TYPES; i++) { - comp_type = (COMPOUND_TYPE)i; - if (is_masked_compound_type(comp_type) && - is_interinter_compound_used(comp_type, sb_type)) - return 1; - } - return 0; -} - -static INLINE int get_wedge_bits_lookup(BLOCK_SIZE sb_type) { - return wedge_params_lookup[sb_type].bits; -} - -static INLINE int get_interinter_wedge_bits(BLOCK_SIZE sb_type) { - const int wbits = wedge_params_lookup[sb_type].bits; - return (wbits > 0) ? wbits + 1 : 0; -} - -static INLINE int is_interintra_wedge_used(BLOCK_SIZE sb_type) { - return wedge_params_lookup[sb_type].bits > 0; -} - -static INLINE int get_interintra_wedge_bits(BLOCK_SIZE sb_type) { - return wedge_params_lookup[sb_type].bits; -} - -void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, const SubpelParams *subpel_params, - const struct scale_factors *sf, int w, int h, - ConvolveParams *conv_params, - InterpFilters interp_filters, - const WarpTypesAllowed *warp_types, int p_col, - int p_row, int plane, int ref, - const MB_MODE_INFO *mi, int build_for_obmc, - const MACROBLOCKD *xd, int can_use_previous); - -void av1_make_masked_inter_predictor( - const uint8_t *pre, int pre_stride, uint8_t *dst, int dst_stride, - const SubpelParams *subpel_params, const struct scale_factors *sf, int w, - int h, ConvolveParams *conv_params, InterpFilters interp_filters, int plane, - const WarpTypesAllowed *warp_types, int p_col, int p_row, int ref, - MACROBLOCKD *xd, int can_use_previous); - -// TODO(jkoleszar): yet another mv clamping function :-( -static INLINE MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, - const MV *src_mv, int bw, int bh, - int ss_x, int ss_y) { - // If the MV points so far into the UMV border that no visible pixels - // are used for reconstruction, the subpel part of the MV can be - // discarded and the MV limited to 16 pixels with equivalent results. - const int spel_left = (AOM_INTERP_EXTEND + bw) << SUBPEL_BITS; - const int spel_right = spel_left - SUBPEL_SHIFTS; - const int spel_top = (AOM_INTERP_EXTEND + bh) << SUBPEL_BITS; - const int spel_bottom = spel_top - SUBPEL_SHIFTS; - MV clamped_mv = { (int16_t)(src_mv->row * (1 << (1 - ss_y))), - (int16_t)(src_mv->col * (1 << (1 - ss_x))) }; - assert(ss_x <= 1); - assert(ss_y <= 1); - - clamp_mv(&clamped_mv, xd->mb_to_left_edge * (1 << (1 - ss_x)) - spel_left, - xd->mb_to_right_edge * (1 << (1 - ss_x)) + spel_right, - xd->mb_to_top_edge * (1 << (1 - ss_y)) - spel_top, - xd->mb_to_bottom_edge * (1 << (1 - ss_y)) + spel_bottom); - - return clamped_mv; -} - -static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride, - const struct scale_factors *sf) { - const int x = - sf ? sf->scale_value_x(x_offset, sf) >> SCALE_EXTRA_BITS : x_offset; - const int y = - sf ? sf->scale_value_y(y_offset, sf) >> SCALE_EXTRA_BITS : y_offset; - return y * stride + x; -} - -static INLINE void setup_pred_plane(struct buf_2d *dst, BLOCK_SIZE bsize, - uint8_t *src, int width, int height, - int stride, int mi_row, int mi_col, - const struct scale_factors *scale, - int subsampling_x, int subsampling_y) { - // Offset the buffer pointer - if (subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1)) - mi_row -= 1; - if (subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1)) - mi_col -= 1; - - const int x = (MI_SIZE * mi_col) >> subsampling_x; - const int y = (MI_SIZE * mi_row) >> subsampling_y; - dst->buf = src + scaled_buffer_offset(x, y, stride, scale); - dst->buf0 = src; - dst->width = width; - dst->height = height; - dst->stride = stride; -} - -void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize, - const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, - const int plane_start, const int plane_end); - -void av1_setup_pre_planes(MACROBLOCKD *xd, int idx, - const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, - const struct scale_factors *sf, const int num_planes); - -static INLINE void set_default_interp_filters( - MB_MODE_INFO *const mbmi, InterpFilter frame_interp_filter) { - mbmi->interp_filters = - av1_broadcast_interp_filter(av1_unswitchable_filter(frame_interp_filter)); -} - -static INLINE int av1_is_interp_needed(const MACROBLOCKD *const xd) { - const MB_MODE_INFO *const mbmi = xd->mi[0]; - if (mbmi->skip_mode) return 0; - if (mbmi->motion_mode == WARPED_CAUSAL) return 0; - if (is_nontrans_global_motion(xd, xd->mi[0])) return 0; - return 1; -} - -void av1_setup_build_prediction_by_above_pred( - MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width, - MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt, - const int num_planes); -void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row, - uint8_t left_mi_height, - MB_MODE_INFO *left_mbmi, - struct build_prediction_ctxt *ctxt, - const int num_planes); -void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, - uint8_t *above[MAX_MB_PLANE], - int above_stride[MAX_MB_PLANE], - uint8_t *left[MAX_MB_PLANE], - int left_stride[MAX_MB_PLANE]); - -const uint8_t *av1_get_obmc_mask(int length); -void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col); - -#define MASK_MASTER_SIZE ((MAX_WEDGE_SIZE) << 1) -#define MASK_MASTER_STRIDE (MASK_MASTER_SIZE) - -void av1_init_wedge_masks(); - -static INLINE const uint8_t *av1_get_contiguous_soft_mask(int wedge_index, - int wedge_sign, - BLOCK_SIZE sb_type) { - return wedge_params_lookup[sb_type].masks[wedge_sign][wedge_index]; -} - -const uint8_t *av1_get_compound_type_mask( - const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type); - -// build interintra_predictors for one plane -void av1_build_interintra_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd, - uint8_t *pred, int stride, - BUFFER_SET *ctx, int plane, - BLOCK_SIZE bsize); - -void av1_build_interintra_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd, - uint8_t *upred, uint8_t *vpred, - int ustride, int vstride, - BUFFER_SET *ctx, BLOCK_SIZE bsize); - -void av1_build_intra_predictors_for_interintra( - const AV1_COMMON *cm, MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane, - BUFFER_SET *ctx, uint8_t *intra_pred, int intra_stride); - -void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane, - const uint8_t *inter_pred, int inter_stride, - const uint8_t *intra_pred, int intra_stride); - -void av1_jnt_comp_weight_assign(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi, - int order_idx, int *fwd_offset, int *bck_offset, - int *use_jnt_comp_avg, int is_compound); -int av1_allow_warp(const MB_MODE_INFO *const mbmi, - const WarpTypesAllowed *const warp_types, - const WarpedMotionParams *const gm_params, - int build_for_obmc, int x_scale, int y_scale, - WarpedMotionParams *final_warp_params); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_RECONINTER_H_ diff --git a/third_party/aom/av1/common/reconintra.c b/third_party/aom/av1/common/reconintra.c deleted file mode 100644 index 71a52e73e..000000000 --- a/third_party/aom/av1/common/reconintra.c +++ /dev/null @@ -1,1640 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <math.h> - -#include "config/aom_config.h" -#include "config/aom_dsp_rtcd.h" -#include "config/av1_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_mem/aom_mem.h" -#include "aom_ports/aom_once.h" -#include "aom_ports/mem.h" -#include "aom_ports/system_state.h" -#include "av1/common/reconintra.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/cfl.h" - -enum { - NEED_LEFT = 1 << 1, - NEED_ABOVE = 1 << 2, - NEED_ABOVERIGHT = 1 << 3, - NEED_ABOVELEFT = 1 << 4, - NEED_BOTTOMLEFT = 1 << 5, -}; - -#define INTRA_EDGE_FILT 3 -#define INTRA_EDGE_TAPS 5 -#define MAX_UPSAMPLE_SZ 16 - -static const uint8_t extend_modes[INTRA_MODES] = { - NEED_ABOVE | NEED_LEFT, // DC - NEED_ABOVE, // V - NEED_LEFT, // H - NEED_ABOVE | NEED_ABOVERIGHT, // D45 - NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D135 - NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D113 - NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D157 - NEED_LEFT | NEED_BOTTOMLEFT, // D203 - NEED_ABOVE | NEED_ABOVERIGHT, // D67 - NEED_LEFT | NEED_ABOVE, // SMOOTH - NEED_LEFT | NEED_ABOVE, // SMOOTH_V - NEED_LEFT | NEED_ABOVE, // SMOOTH_H - NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // PAETH -}; - -// Tables to store if the top-right reference pixels are available. The flags -// are represented with bits, packed into 8-bit integers. E.g., for the 32x32 -// blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster -// order), so its flag is stored at the 3rd bit of the 2nd entry in the table, -// i.e. (table[10 / 8] >> (10 % 8)) & 1. -// . . . . -// . . . . -// . . o . -// . . . . -static uint8_t has_tr_4x4[128] = { - 255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85, - 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85, - 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85, - 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85, - 255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85, - 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85, - 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85, - 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85, -}; -static uint8_t has_tr_4x8[64] = { - 255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119, - 119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127, - 127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119, - 119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127, - 119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119, -}; -static uint8_t has_tr_8x4[64] = { - 255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0, - 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0, - 255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0, - 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0, -}; -static uint8_t has_tr_8x8[32] = { - 255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85, - 255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85, -}; -static uint8_t has_tr_8x16[16] = { - 255, 255, 119, 119, 127, 127, 119, 119, - 255, 127, 119, 119, 127, 127, 119, 119, -}; -static uint8_t has_tr_16x8[16] = { - 255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0, -}; -static uint8_t has_tr_16x16[8] = { - 255, 85, 119, 85, 127, 85, 119, 85, -}; -static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 }; -static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 }; -static uint8_t has_tr_32x32[2] = { 95, 87 }; -static uint8_t has_tr_32x64[1] = { 127 }; -static uint8_t has_tr_64x32[1] = { 19 }; -static uint8_t has_tr_64x64[1] = { 7 }; -static uint8_t has_tr_64x128[1] = { 3 }; -static uint8_t has_tr_128x64[1] = { 1 }; -static uint8_t has_tr_128x128[1] = { 1 }; -static uint8_t has_tr_4x16[32] = { - 255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255, - 127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127, - 127, 127, 255, 127, 255, 127, 127, 127, 127, 127, -}; -static uint8_t has_tr_16x4[32] = { - 255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0, - 127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0, -}; -static uint8_t has_tr_8x32[8] = { - 255, 255, 127, 127, 255, 127, 127, 127, -}; -static uint8_t has_tr_32x8[8] = { - 15, 0, 5, 0, 7, 0, 5, 0, -}; -static uint8_t has_tr_16x64[2] = { 255, 127 }; -static uint8_t has_tr_64x16[2] = { 3, 1 }; - -static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = { - // 4X4 - has_tr_4x4, - // 4X8, 8X4, 8X8 - has_tr_4x8, has_tr_8x4, has_tr_8x8, - // 8X16, 16X8, 16X16 - has_tr_8x16, has_tr_16x8, has_tr_16x16, - // 16X32, 32X16, 32X32 - has_tr_16x32, has_tr_32x16, has_tr_32x32, - // 32X64, 64X32, 64X64 - has_tr_32x64, has_tr_64x32, has_tr_64x64, - // 64x128, 128x64, 128x128 - has_tr_64x128, has_tr_128x64, has_tr_128x128, - // 4x16, 16x4, 8x32 - has_tr_4x16, has_tr_16x4, has_tr_8x32, - // 32x8, 16x64, 64x16 - has_tr_32x8, has_tr_16x64, has_tr_64x16 -}; - -static uint8_t has_tr_vert_8x8[32] = { - 255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0, - 255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0, -}; -static uint8_t has_tr_vert_16x16[8] = { - 255, 0, 119, 0, 127, 0, 119, 0, -}; -static uint8_t has_tr_vert_32x32[2] = { 15, 7 }; -static uint8_t has_tr_vert_64x64[1] = { 3 }; - -// The _vert_* tables are like the ordinary tables above, but describe the -// order we visit square blocks when doing a PARTITION_VERT_A or -// PARTITION_VERT_B. This is the same order as normal except for on the last -// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block -// as a pair of squares, which means that these tables work correctly for both -// mixed vertical partition types. -// -// There are tables for each of the square sizes. Vertical rectangles (like -// BLOCK_16X32) use their respective "non-vert" table -static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = { - // 4X4 - NULL, - // 4X8, 8X4, 8X8 - has_tr_4x8, NULL, has_tr_vert_8x8, - // 8X16, 16X8, 16X16 - has_tr_8x16, NULL, has_tr_vert_16x16, - // 16X32, 32X16, 32X32 - has_tr_16x32, NULL, has_tr_vert_32x32, - // 32X64, 64X32, 64X64 - has_tr_32x64, NULL, has_tr_vert_64x64, - // 64x128, 128x64, 128x128 - has_tr_64x128, NULL, has_tr_128x128 -}; - -static const uint8_t *get_has_tr_table(PARTITION_TYPE partition, - BLOCK_SIZE bsize) { - const uint8_t *ret = NULL; - // If this is a mixed vertical partition, look up bsize in orders_vert. - if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) { - assert(bsize < BLOCK_SIZES); - ret = has_tr_vert_tables[bsize]; - } else { - ret = has_tr_tables[bsize]; - } - assert(ret); - return ret; -} - -static int has_top_right(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row, - int mi_col, int top_available, int right_available, - PARTITION_TYPE partition, TX_SIZE txsz, int row_off, - int col_off, int ss_x, int ss_y) { - if (!top_available || !right_available) return 0; - - const int bw_unit = block_size_wide[bsize] >> tx_size_wide_log2[0]; - const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1); - const int top_right_count_unit = tx_size_wide_unit[txsz]; - - if (row_off > 0) { // Just need to check if enough pixels on the right. - if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) { - // Special case: For 128x128 blocks, the transform unit whose - // top-right corner is at the center of the block does in fact have - // pixels available at its top-right corner. - if (row_off == mi_size_high[BLOCK_64X64] >> ss_y && - col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) { - return 1; - } - const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x; - const int col_off_64 = col_off % plane_bw_unit_64; - return col_off_64 + top_right_count_unit < plane_bw_unit_64; - } - return col_off + top_right_count_unit < plane_bw_unit; - } else { - // All top-right pixels are in the block above, which is already available. - if (col_off + top_right_count_unit < plane_bw_unit) return 1; - - const int bw_in_mi_log2 = mi_size_wide_log2[bsize]; - const int bh_in_mi_log2 = mi_size_high_log2[bsize]; - const int sb_mi_size = mi_size_high[cm->seq_params.sb_size]; - const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2; - const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2; - - // Top row of superblock: so top-right pixels are in the top and/or - // top-right superblocks, both of which are already available. - if (blk_row_in_sb == 0) return 1; - - // Rightmost column of superblock (and not the top row): so top-right pixels - // fall in the right superblock, which is not available yet. - if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) { - return 0; - } - - // General case (neither top row nor rightmost column): check if the - // top-right block is coded before the current block. - const int this_blk_index = - ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) + - blk_col_in_sb + 0; - const int idx1 = this_blk_index / 8; - const int idx2 = this_blk_index % 8; - const uint8_t *has_tr_table = get_has_tr_table(partition, bsize); - return (has_tr_table[idx1] >> idx2) & 1; - } -} - -// Similar to the has_tr_* tables, but store if the bottom-left reference -// pixels are available. -static uint8_t has_bl_4x4[128] = { - 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85, - 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1, 0, 84, 85, 85, 85, 16, 17, - 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84, - 85, 85, 85, 0, 0, 0, 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, - 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1, - 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, - 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 0, 0, -}; -static uint8_t has_bl_4x8[64] = { - 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0, - 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0, - 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0, - 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0, -}; -static uint8_t has_bl_8x4[64] = { - 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1, - 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0, - 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1, - 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0, -}; -static uint8_t has_bl_8x8[32] = { - 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0, - 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0, -}; -static uint8_t has_bl_8x16[16] = { - 16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0, -}; -static uint8_t has_bl_16x8[16] = { - 254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0, -}; -static uint8_t has_bl_16x16[8] = { - 84, 16, 84, 0, 84, 16, 84, 0, -}; -static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 }; -static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 }; -static uint8_t has_bl_32x32[2] = { 4, 4 }; -static uint8_t has_bl_32x64[1] = { 0 }; -static uint8_t has_bl_64x32[1] = { 34 }; -static uint8_t has_bl_64x64[1] = { 0 }; -static uint8_t has_bl_64x128[1] = { 0 }; -static uint8_t has_bl_128x64[1] = { 0 }; -static uint8_t has_bl_128x128[1] = { 0 }; -static uint8_t has_bl_4x16[32] = { - 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, - 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, -}; -static uint8_t has_bl_16x4[32] = { - 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0, - 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0, -}; -static uint8_t has_bl_8x32[8] = { - 0, 1, 0, 0, 0, 1, 0, 0, -}; -static uint8_t has_bl_32x8[8] = { - 238, 78, 238, 14, 238, 78, 238, 14, -}; -static uint8_t has_bl_16x64[2] = { 0, 0 }; -static uint8_t has_bl_64x16[2] = { 42, 42 }; - -static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = { - // 4X4 - has_bl_4x4, - // 4X8, 8X4, 8X8 - has_bl_4x8, has_bl_8x4, has_bl_8x8, - // 8X16, 16X8, 16X16 - has_bl_8x16, has_bl_16x8, has_bl_16x16, - // 16X32, 32X16, 32X32 - has_bl_16x32, has_bl_32x16, has_bl_32x32, - // 32X64, 64X32, 64X64 - has_bl_32x64, has_bl_64x32, has_bl_64x64, - // 64x128, 128x64, 128x128 - has_bl_64x128, has_bl_128x64, has_bl_128x128, - // 4x16, 16x4, 8x32 - has_bl_4x16, has_bl_16x4, has_bl_8x32, - // 32x8, 16x64, 64x16 - has_bl_32x8, has_bl_16x64, has_bl_64x16 -}; - -static uint8_t has_bl_vert_8x8[32] = { - 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0, - 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0, -}; -static uint8_t has_bl_vert_16x16[8] = { - 254, 16, 254, 0, 254, 16, 254, 0, -}; -static uint8_t has_bl_vert_32x32[2] = { 14, 14 }; -static uint8_t has_bl_vert_64x64[1] = { 2 }; - -// The _vert_* tables are like the ordinary tables above, but describe the -// order we visit square blocks when doing a PARTITION_VERT_A or -// PARTITION_VERT_B. This is the same order as normal except for on the last -// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block -// as a pair of squares, which means that these tables work correctly for both -// mixed vertical partition types. -// -// There are tables for each of the square sizes. Vertical rectangles (like -// BLOCK_16X32) use their respective "non-vert" table -static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = { - // 4X4 - NULL, - // 4X8, 8X4, 8X8 - has_bl_4x8, NULL, has_bl_vert_8x8, - // 8X16, 16X8, 16X16 - has_bl_8x16, NULL, has_bl_vert_16x16, - // 16X32, 32X16, 32X32 - has_bl_16x32, NULL, has_bl_vert_32x32, - // 32X64, 64X32, 64X64 - has_bl_32x64, NULL, has_bl_vert_64x64, - // 64x128, 128x64, 128x128 - has_bl_64x128, NULL, has_bl_128x128 -}; - -static const uint8_t *get_has_bl_table(PARTITION_TYPE partition, - BLOCK_SIZE bsize) { - const uint8_t *ret = NULL; - // If this is a mixed vertical partition, look up bsize in orders_vert. - if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) { - assert(bsize < BLOCK_SIZES); - ret = has_bl_vert_tables[bsize]; - } else { - ret = has_bl_tables[bsize]; - } - assert(ret); - return ret; -} - -static int has_bottom_left(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row, - int mi_col, int bottom_available, int left_available, - PARTITION_TYPE partition, TX_SIZE txsz, int row_off, - int col_off, int ss_x, int ss_y) { - if (!bottom_available || !left_available) return 0; - - // Special case for 128x* blocks, when col_off is half the block width. - // This is needed because 128x* superblocks are divided into 64x* blocks in - // raster order - if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) { - const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x; - const int col_off_64 = col_off % plane_bw_unit_64; - if (col_off_64 == 0) { - // We are at the left edge of top-right or bottom-right 64x* block. - const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y; - const int row_off_64 = row_off % plane_bh_unit_64; - const int plane_bh_unit = - AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64); - // Check if all bottom-left pixels are in the left 64x* block (which is - // already coded). - return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit; - } - } - - if (col_off > 0) { - // Bottom-left pixels are in the bottom-left block, which is not available. - return 0; - } else { - const int bh_unit = block_size_high[bsize] >> tx_size_high_log2[0]; - const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1); - const int bottom_left_count_unit = tx_size_high_unit[txsz]; - - // All bottom-left pixels are in the left block, which is already available. - if (row_off + bottom_left_count_unit < plane_bh_unit) return 1; - - const int bw_in_mi_log2 = mi_size_wide_log2[bsize]; - const int bh_in_mi_log2 = mi_size_high_log2[bsize]; - const int sb_mi_size = mi_size_high[cm->seq_params.sb_size]; - const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2; - const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2; - - // Leftmost column of superblock: so bottom-left pixels maybe in the left - // and/or bottom-left superblocks. But only the left superblock is - // available, so check if all required pixels fall in that superblock. - if (blk_col_in_sb == 0) { - const int blk_start_row_off = blk_row_in_sb - << (bh_in_mi_log2 + MI_SIZE_LOG2 - - tx_size_wide_log2[0]) >> - ss_y; - const int row_off_in_sb = blk_start_row_off + row_off; - const int sb_height_unit = sb_mi_size >> ss_y; - return row_off_in_sb + bottom_left_count_unit < sb_height_unit; - } - - // Bottom row of superblock (and not the leftmost column): so bottom-left - // pixels fall in the bottom superblock, which is not available yet. - if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0; - - // General case (neither leftmost column nor bottom row): check if the - // bottom-left block is coded before the current block. - const int this_blk_index = - ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) + - blk_col_in_sb + 0; - const int idx1 = this_blk_index / 8; - const int idx2 = this_blk_index % 8; - const uint8_t *has_bl_table = get_has_bl_table(partition, bsize); - return (has_bl_table[idx1] >> idx2) & 1; - } -} - -typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left); - -static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL]; -static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL]; - -typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride, - const uint16_t *above, const uint16_t *left, - int bd); -static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL]; -static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL]; - -static void init_intra_predictors_internal(void) { - assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES); - -#define INIT_RECTANGULAR(p, type) \ - p[TX_4X8] = aom_##type##_predictor_4x8; \ - p[TX_8X4] = aom_##type##_predictor_8x4; \ - p[TX_8X16] = aom_##type##_predictor_8x16; \ - p[TX_16X8] = aom_##type##_predictor_16x8; \ - p[TX_16X32] = aom_##type##_predictor_16x32; \ - p[TX_32X16] = aom_##type##_predictor_32x16; \ - p[TX_32X64] = aom_##type##_predictor_32x64; \ - p[TX_64X32] = aom_##type##_predictor_64x32; \ - p[TX_4X16] = aom_##type##_predictor_4x16; \ - p[TX_16X4] = aom_##type##_predictor_16x4; \ - p[TX_8X32] = aom_##type##_predictor_8x32; \ - p[TX_32X8] = aom_##type##_predictor_32x8; \ - p[TX_16X64] = aom_##type##_predictor_16x64; \ - p[TX_64X16] = aom_##type##_predictor_64x16; - -#define INIT_NO_4X4(p, type) \ - p[TX_8X8] = aom_##type##_predictor_8x8; \ - p[TX_16X16] = aom_##type##_predictor_16x16; \ - p[TX_32X32] = aom_##type##_predictor_32x32; \ - p[TX_64X64] = aom_##type##_predictor_64x64; \ - INIT_RECTANGULAR(p, type) - -#define INIT_ALL_SIZES(p, type) \ - p[TX_4X4] = aom_##type##_predictor_4x4; \ - INIT_NO_4X4(p, type) - - INIT_ALL_SIZES(pred[V_PRED], v); - INIT_ALL_SIZES(pred[H_PRED], h); - INIT_ALL_SIZES(pred[PAETH_PRED], paeth); - INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth); - INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v); - INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h); - INIT_ALL_SIZES(dc_pred[0][0], dc_128); - INIT_ALL_SIZES(dc_pred[0][1], dc_top); - INIT_ALL_SIZES(dc_pred[1][0], dc_left); - INIT_ALL_SIZES(dc_pred[1][1], dc); - - INIT_ALL_SIZES(pred_high[V_PRED], highbd_v); - INIT_ALL_SIZES(pred_high[H_PRED], highbd_h); - INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth); - INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth); - INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v); - INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h); - INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128); - INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top); - INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left); - INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc); -#undef intra_pred_allsizes -} - -// Directional prediction, zone 1: 0 < angle < 90 -void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, - const uint8_t *above, const uint8_t *left, - int upsample_above, int dx, int dy) { - int r, c, x, base, shift, val; - - (void)left; - (void)dy; - assert(dy == 1); - assert(dx > 0); - - const int max_base_x = ((bw + bh) - 1) << upsample_above; - const int frac_bits = 6 - upsample_above; - const int base_inc = 1 << upsample_above; - x = dx; - for (r = 0; r < bh; ++r, dst += stride, x += dx) { - base = x >> frac_bits; - shift = ((x << upsample_above) & 0x3F) >> 1; - - if (base >= max_base_x) { - for (int i = r; i < bh; ++i) { - memset(dst, above[max_base_x], bw * sizeof(dst[0])); - dst += stride; - } - return; - } - - for (c = 0; c < bw; ++c, base += base_inc) { - if (base < max_base_x) { - val = above[base] * (32 - shift) + above[base + 1] * shift; - dst[c] = ROUND_POWER_OF_TWO(val, 5); - } else { - dst[c] = above[max_base_x]; - } - } - } -} - -// Directional prediction, zone 2: 90 < angle < 180 -void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, - const uint8_t *above, const uint8_t *left, - int upsample_above, int upsample_left, int dx, - int dy) { - int r, c, x, y, shift1, shift2, val, base1, base2; - - assert(dx > 0); - assert(dy > 0); - - const int min_base_x = -(1 << upsample_above); - const int frac_bits_x = 6 - upsample_above; - const int frac_bits_y = 6 - upsample_left; - const int base_inc_x = 1 << upsample_above; - x = -dx; - for (r = 0; r < bh; ++r, x -= dx, dst += stride) { - base1 = x >> frac_bits_x; - y = (r << 6) - dy; - for (c = 0; c < bw; ++c, base1 += base_inc_x, y -= dy) { - if (base1 >= min_base_x) { - shift1 = ((x * (1 << upsample_above)) & 0x3F) >> 1; - val = above[base1] * (32 - shift1) + above[base1 + 1] * shift1; - val = ROUND_POWER_OF_TWO(val, 5); - } else { - base2 = y >> frac_bits_y; - assert(base2 >= -(1 << upsample_left)); - shift2 = ((y * (1 << upsample_left)) & 0x3F) >> 1; - val = left[base2] * (32 - shift2) + left[base2 + 1] * shift2; - val = ROUND_POWER_OF_TWO(val, 5); - } - dst[c] = val; - } - } -} - -// Directional prediction, zone 3: 180 < angle < 270 -void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, - const uint8_t *above, const uint8_t *left, - int upsample_left, int dx, int dy) { - int r, c, y, base, shift, val; - - (void)above; - (void)dx; - - assert(dx == 1); - assert(dy > 0); - - const int max_base_y = (bw + bh - 1) << upsample_left; - const int frac_bits = 6 - upsample_left; - const int base_inc = 1 << upsample_left; - y = dy; - for (c = 0; c < bw; ++c, y += dy) { - base = y >> frac_bits; - shift = ((y << upsample_left) & 0x3F) >> 1; - - for (r = 0; r < bh; ++r, base += base_inc) { - if (base < max_base_y) { - val = left[base] * (32 - shift) + left[base + 1] * shift; - dst[r * stride + c] = val = ROUND_POWER_OF_TWO(val, 5); - } else { - for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y]; - break; - } - } - } -} - -static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, - const uint8_t *above, const uint8_t *left, - int upsample_above, int upsample_left, int angle) { - const int dx = av1_get_dx(angle); - const int dy = av1_get_dy(angle); - const int bw = tx_size_wide[tx_size]; - const int bh = tx_size_high[tx_size]; - assert(angle > 0 && angle < 270); - - if (angle > 0 && angle < 90) { - av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx, - dy); - } else if (angle > 90 && angle < 180) { - av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above, - upsample_left, dx, dy); - } else if (angle > 180 && angle < 270) { - av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx, - dy); - } else if (angle == 90) { - pred[V_PRED][tx_size](dst, stride, above, left); - } else if (angle == 180) { - pred[H_PRED][tx_size](dst, stride, above, left); - } -} - -// Directional prediction, zone 1: 0 < angle < 90 -void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, - int bh, const uint16_t *above, - const uint16_t *left, int upsample_above, - int dx, int dy, int bd) { - int r, c, x, base, shift, val; - - (void)left; - (void)dy; - (void)bd; - assert(dy == 1); - assert(dx > 0); - - const int max_base_x = ((bw + bh) - 1) << upsample_above; - const int frac_bits = 6 - upsample_above; - const int base_inc = 1 << upsample_above; - x = dx; - for (r = 0; r < bh; ++r, dst += stride, x += dx) { - base = x >> frac_bits; - shift = ((x << upsample_above) & 0x3F) >> 1; - - if (base >= max_base_x) { - for (int i = r; i < bh; ++i) { - aom_memset16(dst, above[max_base_x], bw); - dst += stride; - } - return; - } - - for (c = 0; c < bw; ++c, base += base_inc) { - if (base < max_base_x) { - val = above[base] * (32 - shift) + above[base + 1] * shift; - dst[c] = ROUND_POWER_OF_TWO(val, 5); - } else { - dst[c] = above[max_base_x]; - } - } - } -} - -// Directional prediction, zone 2: 90 < angle < 180 -void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw, - int bh, const uint16_t *above, - const uint16_t *left, int upsample_above, - int upsample_left, int dx, int dy, int bd) { - int r, c, x, y, shift, val, base; - - (void)bd; - assert(dx > 0); - assert(dy > 0); - - const int min_base_x = -(1 << upsample_above); - const int frac_bits_x = 6 - upsample_above; - const int frac_bits_y = 6 - upsample_left; - for (r = 0; r < bh; ++r) { - for (c = 0; c < bw; ++c) { - y = r + 1; - x = (c << 6) - y * dx; - base = x >> frac_bits_x; - if (base >= min_base_x) { - shift = ((x * (1 << upsample_above)) & 0x3F) >> 1; - val = above[base] * (32 - shift) + above[base + 1] * shift; - val = ROUND_POWER_OF_TWO(val, 5); - } else { - x = c + 1; - y = (r << 6) - x * dy; - base = y >> frac_bits_y; - shift = ((y * (1 << upsample_left)) & 0x3F) >> 1; - val = left[base] * (32 - shift) + left[base + 1] * shift; - val = ROUND_POWER_OF_TWO(val, 5); - } - dst[c] = val; - } - dst += stride; - } -} - -// Directional prediction, zone 3: 180 < angle < 270 -void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw, - int bh, const uint16_t *above, - const uint16_t *left, int upsample_left, - int dx, int dy, int bd) { - int r, c, y, base, shift, val; - - (void)above; - (void)dx; - (void)bd; - assert(dx == 1); - assert(dy > 0); - - const int max_base_y = (bw + bh - 1) << upsample_left; - const int frac_bits = 6 - upsample_left; - const int base_inc = 1 << upsample_left; - y = dy; - for (c = 0; c < bw; ++c, y += dy) { - base = y >> frac_bits; - shift = ((y << upsample_left) & 0x3F) >> 1; - - for (r = 0; r < bh; ++r, base += base_inc) { - if (base < max_base_y) { - val = left[base] * (32 - shift) + left[base + 1] * shift; - dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5); - } else { - for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y]; - break; - } - } - } -} - -static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride, - TX_SIZE tx_size, const uint16_t *above, - const uint16_t *left, int upsample_above, - int upsample_left, int angle, int bd) { - const int dx = av1_get_dx(angle); - const int dy = av1_get_dy(angle); - const int bw = tx_size_wide[tx_size]; - const int bh = tx_size_high[tx_size]; - assert(angle > 0 && angle < 270); - - if (angle > 0 && angle < 90) { - av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left, - upsample_above, dx, dy, bd); - } else if (angle > 90 && angle < 180) { - av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left, - upsample_above, upsample_left, dx, dy, bd); - } else if (angle > 180 && angle < 270) { - av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, - dx, dy, bd); - } else if (angle == 90) { - pred_high[V_PRED][tx_size](dst, stride, above, left, bd); - } else if (angle == 180) { - pred_high[H_PRED][tx_size](dst, stride, above, left, bd); - } -} - -DECLARE_ALIGNED(16, const int8_t, - av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = { - { - { -6, 10, 0, 0, 0, 12, 0, 0 }, - { -5, 2, 10, 0, 0, 9, 0, 0 }, - { -3, 1, 1, 10, 0, 7, 0, 0 }, - { -3, 1, 1, 2, 10, 5, 0, 0 }, - { -4, 6, 0, 0, 0, 2, 12, 0 }, - { -3, 2, 6, 0, 0, 2, 9, 0 }, - { -3, 2, 2, 6, 0, 2, 7, 0 }, - { -3, 1, 2, 2, 6, 3, 5, 0 }, - }, - { - { -10, 16, 0, 0, 0, 10, 0, 0 }, - { -6, 0, 16, 0, 0, 6, 0, 0 }, - { -4, 0, 0, 16, 0, 4, 0, 0 }, - { -2, 0, 0, 0, 16, 2, 0, 0 }, - { -10, 16, 0, 0, 0, 0, 10, 0 }, - { -6, 0, 16, 0, 0, 0, 6, 0 }, - { -4, 0, 0, 16, 0, 0, 4, 0 }, - { -2, 0, 0, 0, 16, 0, 2, 0 }, - }, - { - { -8, 8, 0, 0, 0, 16, 0, 0 }, - { -8, 0, 8, 0, 0, 16, 0, 0 }, - { -8, 0, 0, 8, 0, 16, 0, 0 }, - { -8, 0, 0, 0, 8, 16, 0, 0 }, - { -4, 4, 0, 0, 0, 0, 16, 0 }, - { -4, 0, 4, 0, 0, 0, 16, 0 }, - { -4, 0, 0, 4, 0, 0, 16, 0 }, - { -4, 0, 0, 0, 4, 0, 16, 0 }, - }, - { - { -2, 8, 0, 0, 0, 10, 0, 0 }, - { -1, 3, 8, 0, 0, 6, 0, 0 }, - { -1, 2, 3, 8, 0, 4, 0, 0 }, - { 0, 1, 2, 3, 8, 2, 0, 0 }, - { -1, 4, 0, 0, 0, 3, 10, 0 }, - { -1, 3, 4, 0, 0, 4, 6, 0 }, - { -1, 2, 3, 4, 0, 4, 4, 0 }, - { -1, 2, 2, 3, 4, 3, 3, 0 }, - }, - { - { -12, 14, 0, 0, 0, 14, 0, 0 }, - { -10, 0, 14, 0, 0, 12, 0, 0 }, - { -9, 0, 0, 14, 0, 11, 0, 0 }, - { -8, 0, 0, 0, 14, 10, 0, 0 }, - { -10, 12, 0, 0, 0, 0, 14, 0 }, - { -9, 1, 12, 0, 0, 0, 12, 0 }, - { -8, 0, 0, 12, 0, 1, 11, 0 }, - { -7, 0, 0, 1, 12, 1, 9, 0 }, - }, -}; - -void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride, - TX_SIZE tx_size, const uint8_t *above, - const uint8_t *left, int mode) { - int r, c; - uint8_t buffer[33][33]; - const int bw = tx_size_wide[tx_size]; - const int bh = tx_size_high[tx_size]; - - assert(bw <= 32 && bh <= 32); - - // The initialization is just for silencing Jenkins static analysis warnings - for (r = 0; r < bh + 1; ++r) - memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0])); - - for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r]; - memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t)); - - for (r = 1; r < bh + 1; r += 2) - for (c = 1; c < bw + 1; c += 4) { - const uint8_t p0 = buffer[r - 1][c - 1]; - const uint8_t p1 = buffer[r - 1][c]; - const uint8_t p2 = buffer[r - 1][c + 1]; - const uint8_t p3 = buffer[r - 1][c + 2]; - const uint8_t p4 = buffer[r - 1][c + 3]; - const uint8_t p5 = buffer[r][c - 1]; - const uint8_t p6 = buffer[r + 1][c - 1]; - for (int k = 0; k < 8; ++k) { - int r_offset = k >> 2; - int c_offset = k & 0x03; - buffer[r + r_offset][c + c_offset] = - clip_pixel(ROUND_POWER_OF_TWO_SIGNED( - av1_filter_intra_taps[mode][k][0] * p0 + - av1_filter_intra_taps[mode][k][1] * p1 + - av1_filter_intra_taps[mode][k][2] * p2 + - av1_filter_intra_taps[mode][k][3] * p3 + - av1_filter_intra_taps[mode][k][4] * p4 + - av1_filter_intra_taps[mode][k][5] * p5 + - av1_filter_intra_taps[mode][k][6] * p6, - FILTER_INTRA_SCALE_BITS)); - } - } - - for (r = 0; r < bh; ++r) { - memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t)); - dst += stride; - } -} - -static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride, - TX_SIZE tx_size, - const uint16_t *above, - const uint16_t *left, int mode, - int bd) { - int r, c; - uint16_t buffer[33][33]; - const int bw = tx_size_wide[tx_size]; - const int bh = tx_size_high[tx_size]; - - assert(bw <= 32 && bh <= 32); - - // The initialization is just for silencing Jenkins static analysis warnings - for (r = 0; r < bh + 1; ++r) - memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0])); - - for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r]; - memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0])); - - for (r = 1; r < bh + 1; r += 2) - for (c = 1; c < bw + 1; c += 4) { - const uint16_t p0 = buffer[r - 1][c - 1]; - const uint16_t p1 = buffer[r - 1][c]; - const uint16_t p2 = buffer[r - 1][c + 1]; - const uint16_t p3 = buffer[r - 1][c + 2]; - const uint16_t p4 = buffer[r - 1][c + 3]; - const uint16_t p5 = buffer[r][c - 1]; - const uint16_t p6 = buffer[r + 1][c - 1]; - for (int k = 0; k < 8; ++k) { - int r_offset = k >> 2; - int c_offset = k & 0x03; - buffer[r + r_offset][c + c_offset] = - clip_pixel_highbd(ROUND_POWER_OF_TWO_SIGNED( - av1_filter_intra_taps[mode][k][0] * p0 + - av1_filter_intra_taps[mode][k][1] * p1 + - av1_filter_intra_taps[mode][k][2] * p2 + - av1_filter_intra_taps[mode][k][3] * p3 + - av1_filter_intra_taps[mode][k][4] * p4 + - av1_filter_intra_taps[mode][k][5] * p5 + - av1_filter_intra_taps[mode][k][6] * p6, - FILTER_INTRA_SCALE_BITS), - bd); - } - } - - for (r = 0; r < bh; ++r) { - memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0])); - dst += stride; - } -} - -static int is_smooth(const MB_MODE_INFO *mbmi, int plane) { - if (plane == 0) { - const PREDICTION_MODE mode = mbmi->mode; - return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED || - mode == SMOOTH_H_PRED); - } else { - // uv_mode is not set for inter blocks, so need to explicitly - // detect that case. - if (is_inter_block(mbmi)) return 0; - - const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode; - return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED || - uv_mode == UV_SMOOTH_H_PRED); - } -} - -static int get_filt_type(const MACROBLOCKD *xd, int plane) { - int ab_sm, le_sm; - - if (plane == 0) { - const MB_MODE_INFO *ab = xd->above_mbmi; - const MB_MODE_INFO *le = xd->left_mbmi; - ab_sm = ab ? is_smooth(ab, plane) : 0; - le_sm = le ? is_smooth(le, plane) : 0; - } else { - const MB_MODE_INFO *ab = xd->chroma_above_mbmi; - const MB_MODE_INFO *le = xd->chroma_left_mbmi; - ab_sm = ab ? is_smooth(ab, plane) : 0; - le_sm = le ? is_smooth(le, plane) : 0; - } - - return (ab_sm || le_sm) ? 1 : 0; -} - -static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) { - const int d = abs(delta); - int strength = 0; - - const int blk_wh = bs0 + bs1; - if (type == 0) { - if (blk_wh <= 8) { - if (d >= 56) strength = 1; - } else if (blk_wh <= 12) { - if (d >= 40) strength = 1; - } else if (blk_wh <= 16) { - if (d >= 40) strength = 1; - } else if (blk_wh <= 24) { - if (d >= 8) strength = 1; - if (d >= 16) strength = 2; - if (d >= 32) strength = 3; - } else if (blk_wh <= 32) { - if (d >= 1) strength = 1; - if (d >= 4) strength = 2; - if (d >= 32) strength = 3; - } else { - if (d >= 1) strength = 3; - } - } else { - if (blk_wh <= 8) { - if (d >= 40) strength = 1; - if (d >= 64) strength = 2; - } else if (blk_wh <= 16) { - if (d >= 20) strength = 1; - if (d >= 48) strength = 2; - } else if (blk_wh <= 24) { - if (d >= 4) strength = 3; - } else { - if (d >= 1) strength = 3; - } - } - return strength; -} - -void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) { - if (!strength) return; - - const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { - { 0, 4, 8, 4, 0 }, { 0, 5, 6, 5, 0 }, { 2, 4, 4, 4, 2 } - }; - const int filt = strength - 1; - uint8_t edge[129]; - - memcpy(edge, p, sz * sizeof(*p)); - for (int i = 1; i < sz; i++) { - int s = 0; - for (int j = 0; j < INTRA_EDGE_TAPS; j++) { - int k = i - 2 + j; - k = (k < 0) ? 0 : k; - k = (k > sz - 1) ? sz - 1 : k; - s += edge[k] * kernel[filt][j]; - } - s = (s + 8) >> 4; - p[i] = s; - } -} - -static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) { - const int kernel[3] = { 5, 6, 5 }; - - int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) + - (p_above[0] * kernel[2]); - s = (s + 8) >> 4; - p_above[-1] = s; - p_left[-1] = s; -} - -void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) { - if (!strength) return; - - const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { - { 0, 4, 8, 4, 0 }, { 0, 5, 6, 5, 0 }, { 2, 4, 4, 4, 2 } - }; - const int filt = strength - 1; - uint16_t edge[129]; - - memcpy(edge, p, sz * sizeof(*p)); - for (int i = 1; i < sz; i++) { - int s = 0; - for (int j = 0; j < INTRA_EDGE_TAPS; j++) { - int k = i - 2 + j; - k = (k < 0) ? 0 : k; - k = (k > sz - 1) ? sz - 1 : k; - s += edge[k] * kernel[filt][j]; - } - s = (s + 8) >> 4; - p[i] = s; - } -} - -static void filter_intra_edge_corner_high(uint16_t *p_above, uint16_t *p_left) { - const int kernel[3] = { 5, 6, 5 }; - - int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) + - (p_above[0] * kernel[2]); - s = (s + 8) >> 4; - p_above[-1] = s; - p_left[-1] = s; -} - -void av1_upsample_intra_edge_c(uint8_t *p, int sz) { - // interpolate half-sample positions - assert(sz <= MAX_UPSAMPLE_SZ); - - uint8_t in[MAX_UPSAMPLE_SZ + 3]; - // copy p[-1..(sz-1)] and extend first and last samples - in[0] = p[-1]; - in[1] = p[-1]; - for (int i = 0; i < sz; i++) { - in[i + 2] = p[i]; - } - in[sz + 2] = p[sz - 1]; - - // interpolate half-sample edge positions - p[-2] = in[0]; - for (int i = 0; i < sz; i++) { - int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3]; - s = clip_pixel((s + 8) >> 4); - p[2 * i - 1] = s; - p[2 * i] = in[i + 2]; - } -} - -void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) { - // interpolate half-sample positions - assert(sz <= MAX_UPSAMPLE_SZ); - - uint16_t in[MAX_UPSAMPLE_SZ + 3]; - // copy p[-1..(sz-1)] and extend first and last samples - in[0] = p[-1]; - in[1] = p[-1]; - for (int i = 0; i < sz; i++) { - in[i + 2] = p[i]; - } - in[sz + 2] = p[sz - 1]; - - // interpolate half-sample edge positions - p[-2] = in[0]; - for (int i = 0; i < sz; i++) { - int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3]; - s = (s + 8) >> 4; - s = clip_pixel_highbd(s, bd); - p[2 * i - 1] = s; - p[2 * i] = in[i + 2]; - } -} - -static void build_intra_predictors_high( - const MACROBLOCKD *xd, const uint8_t *ref8, int ref_stride, uint8_t *dst8, - int dst_stride, PREDICTION_MODE mode, int angle_delta, - FILTER_INTRA_MODE filter_intra_mode, TX_SIZE tx_size, - int disable_edge_filter, int n_top_px, int n_topright_px, int n_left_px, - int n_bottomleft_px, int plane) { - int i; - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - DECLARE_ALIGNED(16, uint16_t, left_data[MAX_TX_SIZE * 2 + 32]); - DECLARE_ALIGNED(16, uint16_t, above_data[MAX_TX_SIZE * 2 + 32]); - uint16_t *const above_row = above_data + 16; - uint16_t *const left_col = left_data + 16; - const int txwpx = tx_size_wide[tx_size]; - const int txhpx = tx_size_high[tx_size]; - int need_left = extend_modes[mode] & NEED_LEFT; - int need_above = extend_modes[mode] & NEED_ABOVE; - int need_above_left = extend_modes[mode] & NEED_ABOVELEFT; - const uint16_t *above_ref = ref - ref_stride; - const uint16_t *left_ref = ref - 1; - int p_angle = 0; - const int is_dr_mode = av1_is_directional_mode(mode); - const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES; - int base = 128 << (xd->bd - 8); - - // The default values if ref pixels are not available: - // base-1 base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1 - // base+1 A B .. Y Z - // base+1 C D .. W X - // base+1 E F .. U V - // base+1 G H .. S T T T T T - - if (is_dr_mode) { - p_angle = mode_to_angle_map[mode] + angle_delta; - if (p_angle <= 90) - need_above = 1, need_left = 0, need_above_left = 1; - else if (p_angle < 180) - need_above = 1, need_left = 1, need_above_left = 1; - else - need_above = 0, need_left = 1, need_above_left = 1; - } - if (use_filter_intra) need_left = need_above = need_above_left = 1; - - assert(n_top_px >= 0); - assert(n_topright_px >= 0); - assert(n_left_px >= 0); - assert(n_bottomleft_px >= 0); - - if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) { - int val; - if (need_left) { - val = (n_top_px > 0) ? above_ref[0] : base + 1; - } else { - val = (n_left_px > 0) ? left_ref[0] : base - 1; - } - for (i = 0; i < txhpx; ++i) { - aom_memset16(dst, val, txwpx); - dst += dst_stride; - } - return; - } - - // NEED_LEFT - if (need_left) { - int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT); - if (use_filter_intra) need_bottom = 0; - if (is_dr_mode) need_bottom = p_angle > 180; - const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0); - i = 0; - if (n_left_px > 0) { - for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride]; - if (need_bottom && n_bottomleft_px > 0) { - assert(i == txhpx); - for (; i < txhpx + n_bottomleft_px; i++) - left_col[i] = left_ref[i * ref_stride]; - } - if (i < num_left_pixels_needed) - aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i); - } else { - if (n_top_px > 0) { - aom_memset16(left_col, above_ref[0], num_left_pixels_needed); - } else { - aom_memset16(left_col, base + 1, num_left_pixels_needed); - } - } - } - - // NEED_ABOVE - if (need_above) { - int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT); - if (use_filter_intra) need_right = 0; - if (is_dr_mode) need_right = p_angle < 90; - const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0); - if (n_top_px > 0) { - memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0])); - i = n_top_px; - if (need_right && n_topright_px > 0) { - assert(n_top_px == txwpx); - memcpy(above_row + txwpx, above_ref + txwpx, - n_topright_px * sizeof(above_ref[0])); - i += n_topright_px; - } - if (i < num_top_pixels_needed) - aom_memset16(&above_row[i], above_row[i - 1], - num_top_pixels_needed - i); - } else { - if (n_left_px > 0) { - aom_memset16(above_row, left_ref[0], num_top_pixels_needed); - } else { - aom_memset16(above_row, base - 1, num_top_pixels_needed); - } - } - } - - if (need_above_left) { - if (n_top_px > 0 && n_left_px > 0) { - above_row[-1] = above_ref[-1]; - } else if (n_top_px > 0) { - above_row[-1] = above_ref[0]; - } else if (n_left_px > 0) { - above_row[-1] = left_ref[0]; - } else { - above_row[-1] = base; - } - left_col[-1] = above_row[-1]; - } - - if (use_filter_intra) { - highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col, - filter_intra_mode, xd->bd); - return; - } - - if (is_dr_mode) { - int upsample_above = 0; - int upsample_left = 0; - if (!disable_edge_filter) { - const int need_right = p_angle < 90; - const int need_bottom = p_angle > 180; - const int filt_type = get_filt_type(xd, plane); - if (p_angle != 90 && p_angle != 180) { - const int ab_le = need_above_left ? 1 : 0; - if (need_above && need_left && (txwpx + txhpx >= 24)) { - filter_intra_edge_corner_high(above_row, left_col); - } - if (need_above && n_top_px > 0) { - const int strength = - intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type); - const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0); - av1_filter_intra_edge_high(above_row - ab_le, n_px, strength); - } - if (need_left && n_left_px > 0) { - const int strength = intra_edge_filter_strength( - txhpx, txwpx, p_angle - 180, filt_type); - const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0); - av1_filter_intra_edge_high(left_col - ab_le, n_px, strength); - } - } - upsample_above = - av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type); - if (need_above && upsample_above) { - const int n_px = txwpx + (need_right ? txhpx : 0); - av1_upsample_intra_edge_high(above_row, n_px, xd->bd); - } - upsample_left = - av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type); - if (need_left && upsample_left) { - const int n_px = txhpx + (need_bottom ? txwpx : 0); - av1_upsample_intra_edge_high(left_col, n_px, xd->bd); - } - } - highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col, - upsample_above, upsample_left, p_angle, xd->bd); - return; - } - - // predict - if (mode == DC_PRED) { - dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size]( - dst, dst_stride, above_row, left_col, xd->bd); - } else { - pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, xd->bd); - } -} - -static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, - int ref_stride, uint8_t *dst, int dst_stride, - PREDICTION_MODE mode, int angle_delta, - FILTER_INTRA_MODE filter_intra_mode, - TX_SIZE tx_size, int disable_edge_filter, - int n_top_px, int n_topright_px, - int n_left_px, int n_bottomleft_px, - int plane) { - int i; - const uint8_t *above_ref = ref - ref_stride; - const uint8_t *left_ref = ref - 1; - DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]); - DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]); - uint8_t *const above_row = above_data + 16; - uint8_t *const left_col = left_data + 16; - const int txwpx = tx_size_wide[tx_size]; - const int txhpx = tx_size_high[tx_size]; - int need_left = extend_modes[mode] & NEED_LEFT; - int need_above = extend_modes[mode] & NEED_ABOVE; - int need_above_left = extend_modes[mode] & NEED_ABOVELEFT; - int p_angle = 0; - const int is_dr_mode = av1_is_directional_mode(mode); - const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES; - - // The default values if ref pixels are not available: - // 127 127 127 .. 127 127 127 127 127 127 - // 129 A B .. Y Z - // 129 C D .. W X - // 129 E F .. U V - // 129 G H .. S T T T T T - // .. - - if (is_dr_mode) { - p_angle = mode_to_angle_map[mode] + angle_delta; - if (p_angle <= 90) - need_above = 1, need_left = 0, need_above_left = 1; - else if (p_angle < 180) - need_above = 1, need_left = 1, need_above_left = 1; - else - need_above = 0, need_left = 1, need_above_left = 1; - } - if (use_filter_intra) need_left = need_above = need_above_left = 1; - - assert(n_top_px >= 0); - assert(n_topright_px >= 0); - assert(n_left_px >= 0); - assert(n_bottomleft_px >= 0); - - if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) { - int val; - if (need_left) { - val = (n_top_px > 0) ? above_ref[0] : 129; - } else { - val = (n_left_px > 0) ? left_ref[0] : 127; - } - for (i = 0; i < txhpx; ++i) { - memset(dst, val, txwpx); - dst += dst_stride; - } - return; - } - - // NEED_LEFT - if (need_left) { - int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT); - if (use_filter_intra) need_bottom = 0; - if (is_dr_mode) need_bottom = p_angle > 180; - const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0); - i = 0; - if (n_left_px > 0) { - for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride]; - if (need_bottom && n_bottomleft_px > 0) { - assert(i == txhpx); - for (; i < txhpx + n_bottomleft_px; i++) - left_col[i] = left_ref[i * ref_stride]; - } - if (i < num_left_pixels_needed) - memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i); - } else { - if (n_top_px > 0) { - memset(left_col, above_ref[0], num_left_pixels_needed); - } else { - memset(left_col, 129, num_left_pixels_needed); - } - } - } - - // NEED_ABOVE - if (need_above) { - int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT); - if (use_filter_intra) need_right = 0; - if (is_dr_mode) need_right = p_angle < 90; - const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0); - if (n_top_px > 0) { - memcpy(above_row, above_ref, n_top_px); - i = n_top_px; - if (need_right && n_topright_px > 0) { - assert(n_top_px == txwpx); - memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px); - i += n_topright_px; - } - if (i < num_top_pixels_needed) - memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i); - } else { - if (n_left_px > 0) { - memset(above_row, left_ref[0], num_top_pixels_needed); - } else { - memset(above_row, 127, num_top_pixels_needed); - } - } - } - - if (need_above_left) { - if (n_top_px > 0 && n_left_px > 0) { - above_row[-1] = above_ref[-1]; - } else if (n_top_px > 0) { - above_row[-1] = above_ref[0]; - } else if (n_left_px > 0) { - above_row[-1] = left_ref[0]; - } else { - above_row[-1] = 128; - } - left_col[-1] = above_row[-1]; - } - - if (use_filter_intra) { - av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col, - filter_intra_mode); - return; - } - - if (is_dr_mode) { - int upsample_above = 0; - int upsample_left = 0; - if (!disable_edge_filter) { - const int need_right = p_angle < 90; - const int need_bottom = p_angle > 180; - const int filt_type = get_filt_type(xd, plane); - if (p_angle != 90 && p_angle != 180) { - const int ab_le = need_above_left ? 1 : 0; - if (need_above && need_left && (txwpx + txhpx >= 24)) { - filter_intra_edge_corner(above_row, left_col); - } - if (need_above && n_top_px > 0) { - const int strength = - intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type); - const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0); - av1_filter_intra_edge(above_row - ab_le, n_px, strength); - } - if (need_left && n_left_px > 0) { - const int strength = intra_edge_filter_strength( - txhpx, txwpx, p_angle - 180, filt_type); - const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0); - av1_filter_intra_edge(left_col - ab_le, n_px, strength); - } - } - upsample_above = - av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type); - if (need_above && upsample_above) { - const int n_px = txwpx + (need_right ? txhpx : 0); - av1_upsample_intra_edge(above_row, n_px); - } - upsample_left = - av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type); - if (need_left && upsample_left) { - const int n_px = txhpx + (need_bottom ? txwpx : 0); - av1_upsample_intra_edge(left_col, n_px); - } - } - dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above, - upsample_left, p_angle); - return; - } - - // predict - if (mode == DC_PRED) { - dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row, - left_col); - } else { - pred[mode][tx_size](dst, dst_stride, above_row, left_col); - } -} - -void av1_predict_intra_block( - const AV1_COMMON *cm, const MACROBLOCKD *xd, int wpx, int hpx, - TX_SIZE tx_size, PREDICTION_MODE mode, int angle_delta, int use_palette, - FILTER_INTRA_MODE filter_intra_mode, const uint8_t *ref, int ref_stride, - uint8_t *dst, int dst_stride, int col_off, int row_off, int plane) { - const MB_MODE_INFO *const mbmi = xd->mi[0]; - const int txwpx = tx_size_wide[tx_size]; - const int txhpx = tx_size_high[tx_size]; - const int x = col_off << tx_size_wide_log2[0]; - const int y = row_off << tx_size_high_log2[0]; - - if (use_palette) { - int r, c; - const uint8_t *const map = xd->plane[plane != 0].color_index_map + - xd->color_index_map_offset[plane != 0]; - const uint16_t *const palette = - mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE; - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst); - for (r = 0; r < txhpx; ++r) { - for (c = 0; c < txwpx; ++c) { - dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]]; - } - } - } else { - for (r = 0; r < txhpx; ++r) { - for (c = 0; c < txwpx; ++c) { - dst[r * dst_stride + c] = - (uint8_t)palette[map[(r + y) * wpx + c + x]]; - } - } - } - return; - } - - BLOCK_SIZE bsize = mbmi->sb_type; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const int txw = tx_size_wide_unit[tx_size]; - const int txh = tx_size_high_unit[tx_size]; - const int have_top = row_off || (pd->subsampling_y ? xd->chroma_up_available - : xd->up_available); - const int have_left = - col_off || - (pd->subsampling_x ? xd->chroma_left_available : xd->left_available); - const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); - const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2); - const int xr_chr_offset = 0; - const int yd_chr_offset = 0; - - // Distance between the right edge of this prediction block to - // the frame right edge - const int xr = (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + - (wpx - x - txwpx) - xr_chr_offset; - // Distance between the bottom edge of this prediction block to - // the frame bottom edge - const int yd = (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + - (hpx - y - txhpx) - yd_chr_offset; - const int right_available = - mi_col + ((col_off + txw) << pd->subsampling_x) < xd->tile.mi_col_end; - const int bottom_available = - (yd > 0) && - (mi_row + ((row_off + txh) << pd->subsampling_y) < xd->tile.mi_row_end); - - const PARTITION_TYPE partition = mbmi->partition; - - // force 4x4 chroma component block size. - bsize = scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y); - - const int have_top_right = has_top_right( - cm, bsize, mi_row, mi_col, have_top, right_available, partition, tx_size, - row_off, col_off, pd->subsampling_x, pd->subsampling_y); - const int have_bottom_left = has_bottom_left( - cm, bsize, mi_row, mi_col, bottom_available, have_left, partition, - tx_size, row_off, col_off, pd->subsampling_x, pd->subsampling_y); - - const int disable_edge_filter = !cm->seq_params.enable_intra_edge_filter; - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - build_intra_predictors_high( - xd, ref, ref_stride, dst, dst_stride, mode, angle_delta, - filter_intra_mode, tx_size, disable_edge_filter, - have_top ? AOMMIN(txwpx, xr + txwpx) : 0, - have_top_right ? AOMMIN(txwpx, xr) : 0, - have_left ? AOMMIN(txhpx, yd + txhpx) : 0, - have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane); - return; - } - - build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, - angle_delta, filter_intra_mode, tx_size, - disable_edge_filter, - have_top ? AOMMIN(txwpx, xr + txwpx) : 0, - have_top_right ? AOMMIN(txwpx, xr) : 0, - have_left ? AOMMIN(txhpx, yd + txhpx) : 0, - have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane); -} - -void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd, - int plane, int blk_col, int blk_row, - TX_SIZE tx_size) { - const MB_MODE_INFO *const mbmi = xd->mi[0]; - struct macroblockd_plane *const pd = &xd->plane[plane]; - const int dst_stride = pd->dst.stride; - uint8_t *dst = - &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; - const PREDICTION_MODE mode = - (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode); - const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0; - const FILTER_INTRA_MODE filter_intra_mode = - (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra) - ? mbmi->filter_intra_mode_info.filter_intra_mode - : FILTER_INTRA_MODES; - const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP; - - if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) { -#if CONFIG_DEBUG - assert(is_cfl_allowed(xd)); - const BLOCK_SIZE plane_bsize = get_plane_block_size( - mbmi->sb_type, pd->subsampling_x, pd->subsampling_y); - (void)plane_bsize; - assert(plane_bsize < BLOCK_SIZES_ALL); - if (!xd->lossless[mbmi->segment_id]) { - assert(blk_col == 0); - assert(blk_row == 0); - assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]); - assert(block_size_high[plane_bsize] == tx_size_high[tx_size]); - } -#endif - CFL_CTX *const cfl = &xd->cfl; - CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane); - if (cfl->dc_pred_is_cached[pred_plane] == 0) { - av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode, - angle_delta, use_palette, filter_intra_mode, dst, - dst_stride, dst, dst_stride, blk_col, blk_row, - plane); - if (cfl->use_dc_pred_cache) { - cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]); - cfl->dc_pred_is_cached[pred_plane] = 1; - } - } else { - cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane); - } - cfl_predict_block(xd, dst, dst_stride, tx_size, plane); - return; - } - av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode, - angle_delta, use_palette, filter_intra_mode, dst, - dst_stride, dst, dst_stride, blk_col, blk_row, plane); -} - -void av1_init_intra_predictors(void) { - aom_once(init_intra_predictors_internal); -} diff --git a/third_party/aom/av1/common/reconintra.h b/third_party/aom/av1/common/reconintra.h deleted file mode 100644 index 07853aba0..000000000 --- a/third_party/aom/av1/common/reconintra.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_RECONINTRA_H_ -#define AOM_AV1_COMMON_RECONINTRA_H_ - -#include <stdlib.h> - -#include "aom/aom_integer.h" -#include "av1/common/blockd.h" -#include "av1/common/onyxc_int.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void av1_init_intra_predictors(void); -void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd, - int plane, int blk_col, int blk_row, - TX_SIZE tx_size); -void av1_predict_intra_block(const AV1_COMMON *cm, const MACROBLOCKD *xd, - int bw, int bh, TX_SIZE tx_size, - PREDICTION_MODE mode, int angle_delta, - int use_palette, - FILTER_INTRA_MODE filter_intra_mode, - const uint8_t *ref, int ref_stride, uint8_t *dst, - int dst_stride, int aoff, int loff, int plane); - -// Mapping of interintra to intra mode for use in the intra component -static const PREDICTION_MODE interintra_to_intra_mode[INTERINTRA_MODES] = { - DC_PRED, V_PRED, H_PRED, SMOOTH_PRED -}; - -// Mapping of intra mode to the interintra mode -static const INTERINTRA_MODE intra_to_interintra_mode[INTRA_MODES] = { - II_DC_PRED, II_V_PRED, II_H_PRED, II_V_PRED, II_SMOOTH_PRED, II_V_PRED, - II_H_PRED, II_H_PRED, II_V_PRED, II_SMOOTH_PRED, II_SMOOTH_PRED -}; - -#define FILTER_INTRA_SCALE_BITS 4 - -static INLINE int av1_is_directional_mode(PREDICTION_MODE mode) { - return mode >= V_PRED && mode <= D67_PRED; -} - -static INLINE int av1_use_angle_delta(BLOCK_SIZE bsize) { - return bsize >= BLOCK_8X8; -} - -static INLINE int av1_allow_intrabc(const AV1_COMMON *const cm) { - return frame_is_intra_only(cm) && cm->allow_screen_content_tools && - cm->allow_intrabc; -} - -static INLINE int av1_filter_intra_allowed_bsize(const AV1_COMMON *const cm, - BLOCK_SIZE bs) { - if (!cm->seq_params.enable_filter_intra || bs == BLOCK_INVALID) return 0; - - return block_size_wide[bs] <= 32 && block_size_high[bs] <= 32; -} - -static INLINE int av1_filter_intra_allowed(const AV1_COMMON *const cm, - const MB_MODE_INFO *mbmi) { - return mbmi->mode == DC_PRED && - mbmi->palette_mode_info.palette_size[0] == 0 && - av1_filter_intra_allowed_bsize(cm, mbmi->sb_type); -} - -extern const int8_t av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]; - -// Get the shift (up-scaled by 256) in X w.r.t a unit change in Y. -// If angle > 0 && angle < 90, dx = -((int)(256 / t)); -// If angle > 90 && angle < 180, dx = (int)(256 / t); -// If angle > 180 && angle < 270, dx = 1; -static INLINE int av1_get_dx(int angle) { - if (angle > 0 && angle < 90) { - return dr_intra_derivative[angle]; - } else if (angle > 90 && angle < 180) { - return dr_intra_derivative[180 - angle]; - } else { - // In this case, we are not really going to use dx. We may return any value. - return 1; - } -} - -// Get the shift (up-scaled by 256) in Y w.r.t a unit change in X. -// If angle > 0 && angle < 90, dy = 1; -// If angle > 90 && angle < 180, dy = (int)(256 * t); -// If angle > 180 && angle < 270, dy = -((int)(256 * t)); -static INLINE int av1_get_dy(int angle) { - if (angle > 90 && angle < 180) { - return dr_intra_derivative[angle - 90]; - } else if (angle > 180 && angle < 270) { - return dr_intra_derivative[270 - angle]; - } else { - // In this case, we are not really going to use dy. We may return any value. - return 1; - } -} - -static INLINE int av1_use_intra_edge_upsample(int bs0, int bs1, int delta, - int type) { - const int d = abs(delta); - const int blk_wh = bs0 + bs1; - if (d <= 0 || d >= 40) return 0; - return type ? (blk_wh <= 8) : (blk_wh <= 16); -} -#ifdef __cplusplus -} // extern "C" -#endif -#endif // AOM_AV1_COMMON_RECONINTRA_H_ diff --git a/third_party/aom/av1/common/resize.c b/third_party/aom/av1/common/resize.c deleted file mode 100644 index d61a20aa2..000000000 --- a/third_party/aom/av1/common/resize.c +++ /dev/null @@ -1,1280 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <assert.h> -#include <limits.h> -#include <math.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "config/aom_config.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_ports/mem.h" -#include "aom_scale/aom_scale.h" -#include "av1/common/common.h" -#include "av1/common/resize.h" - -#include "config/aom_scale_rtcd.h" - -// Filters for interpolation (0.5-band) - note this also filters integer pels. -static const InterpKernel filteredinterp_filters500[(1 << RS_SUBPEL_BITS)] = { - { -3, 0, 35, 64, 35, 0, -3, 0 }, { -3, 0, 34, 64, 36, 0, -3, 0 }, - { -3, -1, 34, 64, 36, 1, -3, 0 }, { -3, -1, 33, 64, 37, 1, -3, 0 }, - { -3, -1, 32, 64, 38, 1, -3, 0 }, { -3, -1, 31, 64, 39, 1, -3, 0 }, - { -3, -1, 31, 63, 39, 2, -3, 0 }, { -2, -2, 30, 63, 40, 2, -3, 0 }, - { -2, -2, 29, 63, 41, 2, -3, 0 }, { -2, -2, 29, 63, 41, 3, -4, 0 }, - { -2, -2, 28, 63, 42, 3, -4, 0 }, { -2, -2, 27, 63, 43, 3, -4, 0 }, - { -2, -3, 27, 63, 43, 4, -4, 0 }, { -2, -3, 26, 62, 44, 5, -4, 0 }, - { -2, -3, 25, 62, 45, 5, -4, 0 }, { -2, -3, 25, 62, 45, 5, -4, 0 }, - { -2, -3, 24, 62, 46, 5, -4, 0 }, { -2, -3, 23, 61, 47, 6, -4, 0 }, - { -2, -3, 23, 61, 47, 6, -4, 0 }, { -2, -3, 22, 61, 48, 7, -4, -1 }, - { -2, -3, 21, 60, 49, 7, -4, 0 }, { -1, -4, 20, 60, 49, 8, -4, 0 }, - { -1, -4, 20, 60, 50, 8, -4, -1 }, { -1, -4, 19, 59, 51, 9, -4, -1 }, - { -1, -4, 19, 59, 51, 9, -4, -1 }, { -1, -4, 18, 58, 52, 10, -4, -1 }, - { -1, -4, 17, 58, 52, 11, -4, -1 }, { -1, -4, 16, 58, 53, 11, -4, -1 }, - { -1, -4, 16, 57, 53, 12, -4, -1 }, { -1, -4, 15, 57, 54, 12, -4, -1 }, - { -1, -4, 15, 56, 54, 13, -4, -1 }, { -1, -4, 14, 56, 55, 13, -4, -1 }, - { -1, -4, 14, 55, 55, 14, -4, -1 }, { -1, -4, 13, 55, 56, 14, -4, -1 }, - { -1, -4, 13, 54, 56, 15, -4, -1 }, { -1, -4, 12, 54, 57, 15, -4, -1 }, - { -1, -4, 12, 53, 57, 16, -4, -1 }, { -1, -4, 11, 53, 58, 16, -4, -1 }, - { -1, -4, 11, 52, 58, 17, -4, -1 }, { -1, -4, 10, 52, 58, 18, -4, -1 }, - { -1, -4, 9, 51, 59, 19, -4, -1 }, { -1, -4, 9, 51, 59, 19, -4, -1 }, - { -1, -4, 8, 50, 60, 20, -4, -1 }, { 0, -4, 8, 49, 60, 20, -4, -1 }, - { 0, -4, 7, 49, 60, 21, -3, -2 }, { -1, -4, 7, 48, 61, 22, -3, -2 }, - { 0, -4, 6, 47, 61, 23, -3, -2 }, { 0, -4, 6, 47, 61, 23, -3, -2 }, - { 0, -4, 5, 46, 62, 24, -3, -2 }, { 0, -4, 5, 45, 62, 25, -3, -2 }, - { 0, -4, 5, 45, 62, 25, -3, -2 }, { 0, -4, 5, 44, 62, 26, -3, -2 }, - { 0, -4, 4, 43, 63, 27, -3, -2 }, { 0, -4, 3, 43, 63, 27, -2, -2 }, - { 0, -4, 3, 42, 63, 28, -2, -2 }, { 0, -4, 3, 41, 63, 29, -2, -2 }, - { 0, -3, 2, 41, 63, 29, -2, -2 }, { 0, -3, 2, 40, 63, 30, -2, -2 }, - { 0, -3, 2, 39, 63, 31, -1, -3 }, { 0, -3, 1, 39, 64, 31, -1, -3 }, - { 0, -3, 1, 38, 64, 32, -1, -3 }, { 0, -3, 1, 37, 64, 33, -1, -3 }, - { 0, -3, 1, 36, 64, 34, -1, -3 }, { 0, -3, 0, 36, 64, 34, 0, -3 }, -}; - -// Filters for interpolation (0.625-band) - note this also filters integer pels. -static const InterpKernel filteredinterp_filters625[(1 << RS_SUBPEL_BITS)] = { - { -1, -8, 33, 80, 33, -8, -1, 0 }, { -1, -8, 31, 80, 34, -8, -1, 1 }, - { -1, -8, 30, 80, 35, -8, -1, 1 }, { -1, -8, 29, 80, 36, -7, -2, 1 }, - { -1, -8, 28, 80, 37, -7, -2, 1 }, { -1, -8, 27, 80, 38, -7, -2, 1 }, - { 0, -8, 26, 79, 39, -7, -2, 1 }, { 0, -8, 25, 79, 40, -7, -2, 1 }, - { 0, -8, 24, 79, 41, -7, -2, 1 }, { 0, -8, 23, 78, 42, -6, -2, 1 }, - { 0, -8, 22, 78, 43, -6, -2, 1 }, { 0, -8, 21, 78, 44, -6, -2, 1 }, - { 0, -8, 20, 78, 45, -5, -3, 1 }, { 0, -8, 19, 77, 47, -5, -3, 1 }, - { 0, -8, 18, 77, 48, -5, -3, 1 }, { 0, -8, 17, 77, 49, -5, -3, 1 }, - { 0, -8, 16, 76, 50, -4, -3, 1 }, { 0, -8, 15, 76, 51, -4, -3, 1 }, - { 0, -8, 15, 75, 52, -3, -4, 1 }, { 0, -7, 14, 74, 53, -3, -4, 1 }, - { 0, -7, 13, 74, 54, -3, -4, 1 }, { 0, -7, 12, 73, 55, -2, -4, 1 }, - { 0, -7, 11, 73, 56, -2, -4, 1 }, { 0, -7, 10, 72, 57, -1, -4, 1 }, - { 1, -7, 10, 71, 58, -1, -5, 1 }, { 0, -7, 9, 71, 59, 0, -5, 1 }, - { 1, -7, 8, 70, 60, 0, -5, 1 }, { 1, -7, 7, 69, 61, 1, -5, 1 }, - { 1, -6, 6, 68, 62, 1, -5, 1 }, { 0, -6, 6, 68, 62, 2, -5, 1 }, - { 1, -6, 5, 67, 63, 2, -5, 1 }, { 1, -6, 5, 66, 64, 3, -6, 1 }, - { 1, -6, 4, 65, 65, 4, -6, 1 }, { 1, -6, 3, 64, 66, 5, -6, 1 }, - { 1, -5, 2, 63, 67, 5, -6, 1 }, { 1, -5, 2, 62, 68, 6, -6, 0 }, - { 1, -5, 1, 62, 68, 6, -6, 1 }, { 1, -5, 1, 61, 69, 7, -7, 1 }, - { 1, -5, 0, 60, 70, 8, -7, 1 }, { 1, -5, 0, 59, 71, 9, -7, 0 }, - { 1, -5, -1, 58, 71, 10, -7, 1 }, { 1, -4, -1, 57, 72, 10, -7, 0 }, - { 1, -4, -2, 56, 73, 11, -7, 0 }, { 1, -4, -2, 55, 73, 12, -7, 0 }, - { 1, -4, -3, 54, 74, 13, -7, 0 }, { 1, -4, -3, 53, 74, 14, -7, 0 }, - { 1, -4, -3, 52, 75, 15, -8, 0 }, { 1, -3, -4, 51, 76, 15, -8, 0 }, - { 1, -3, -4, 50, 76, 16, -8, 0 }, { 1, -3, -5, 49, 77, 17, -8, 0 }, - { 1, -3, -5, 48, 77, 18, -8, 0 }, { 1, -3, -5, 47, 77, 19, -8, 0 }, - { 1, -3, -5, 45, 78, 20, -8, 0 }, { 1, -2, -6, 44, 78, 21, -8, 0 }, - { 1, -2, -6, 43, 78, 22, -8, 0 }, { 1, -2, -6, 42, 78, 23, -8, 0 }, - { 1, -2, -7, 41, 79, 24, -8, 0 }, { 1, -2, -7, 40, 79, 25, -8, 0 }, - { 1, -2, -7, 39, 79, 26, -8, 0 }, { 1, -2, -7, 38, 80, 27, -8, -1 }, - { 1, -2, -7, 37, 80, 28, -8, -1 }, { 1, -2, -7, 36, 80, 29, -8, -1 }, - { 1, -1, -8, 35, 80, 30, -8, -1 }, { 1, -1, -8, 34, 80, 31, -8, -1 }, -}; - -// Filters for interpolation (0.75-band) - note this also filters integer pels. -static const InterpKernel filteredinterp_filters750[(1 << RS_SUBPEL_BITS)] = { - { 2, -11, 25, 96, 25, -11, 2, 0 }, { 2, -11, 24, 96, 26, -11, 2, 0 }, - { 2, -11, 22, 96, 28, -11, 2, 0 }, { 2, -10, 21, 96, 29, -12, 2, 0 }, - { 2, -10, 19, 96, 31, -12, 2, 0 }, { 2, -10, 18, 95, 32, -11, 2, 0 }, - { 2, -10, 17, 95, 34, -12, 2, 0 }, { 2, -9, 15, 95, 35, -12, 2, 0 }, - { 2, -9, 14, 94, 37, -12, 2, 0 }, { 2, -9, 13, 94, 38, -12, 2, 0 }, - { 2, -8, 12, 93, 40, -12, 1, 0 }, { 2, -8, 11, 93, 41, -12, 1, 0 }, - { 2, -8, 9, 92, 43, -12, 1, 1 }, { 2, -8, 8, 92, 44, -12, 1, 1 }, - { 2, -7, 7, 91, 46, -12, 1, 0 }, { 2, -7, 6, 90, 47, -12, 1, 1 }, - { 2, -7, 5, 90, 49, -12, 1, 0 }, { 2, -6, 4, 89, 50, -12, 1, 0 }, - { 2, -6, 3, 88, 52, -12, 0, 1 }, { 2, -6, 2, 87, 54, -12, 0, 1 }, - { 2, -5, 1, 86, 55, -12, 0, 1 }, { 2, -5, 0, 85, 57, -12, 0, 1 }, - { 2, -5, -1, 84, 58, -11, 0, 1 }, { 2, -5, -2, 83, 60, -11, 0, 1 }, - { 2, -4, -2, 82, 61, -11, -1, 1 }, { 1, -4, -3, 81, 63, -10, -1, 1 }, - { 2, -4, -4, 80, 64, -10, -1, 1 }, { 1, -4, -4, 79, 66, -10, -1, 1 }, - { 1, -3, -5, 77, 67, -9, -1, 1 }, { 1, -3, -6, 76, 69, -9, -1, 1 }, - { 1, -3, -6, 75, 70, -8, -2, 1 }, { 1, -2, -7, 74, 71, -8, -2, 1 }, - { 1, -2, -7, 72, 72, -7, -2, 1 }, { 1, -2, -8, 71, 74, -7, -2, 1 }, - { 1, -2, -8, 70, 75, -6, -3, 1 }, { 1, -1, -9, 69, 76, -6, -3, 1 }, - { 1, -1, -9, 67, 77, -5, -3, 1 }, { 1, -1, -10, 66, 79, -4, -4, 1 }, - { 1, -1, -10, 64, 80, -4, -4, 2 }, { 1, -1, -10, 63, 81, -3, -4, 1 }, - { 1, -1, -11, 61, 82, -2, -4, 2 }, { 1, 0, -11, 60, 83, -2, -5, 2 }, - { 1, 0, -11, 58, 84, -1, -5, 2 }, { 1, 0, -12, 57, 85, 0, -5, 2 }, - { 1, 0, -12, 55, 86, 1, -5, 2 }, { 1, 0, -12, 54, 87, 2, -6, 2 }, - { 1, 0, -12, 52, 88, 3, -6, 2 }, { 0, 1, -12, 50, 89, 4, -6, 2 }, - { 0, 1, -12, 49, 90, 5, -7, 2 }, { 1, 1, -12, 47, 90, 6, -7, 2 }, - { 0, 1, -12, 46, 91, 7, -7, 2 }, { 1, 1, -12, 44, 92, 8, -8, 2 }, - { 1, 1, -12, 43, 92, 9, -8, 2 }, { 0, 1, -12, 41, 93, 11, -8, 2 }, - { 0, 1, -12, 40, 93, 12, -8, 2 }, { 0, 2, -12, 38, 94, 13, -9, 2 }, - { 0, 2, -12, 37, 94, 14, -9, 2 }, { 0, 2, -12, 35, 95, 15, -9, 2 }, - { 0, 2, -12, 34, 95, 17, -10, 2 }, { 0, 2, -11, 32, 95, 18, -10, 2 }, - { 0, 2, -12, 31, 96, 19, -10, 2 }, { 0, 2, -12, 29, 96, 21, -10, 2 }, - { 0, 2, -11, 28, 96, 22, -11, 2 }, { 0, 2, -11, 26, 96, 24, -11, 2 }, -}; - -// Filters for interpolation (0.875-band) - note this also filters integer pels. -static const InterpKernel filteredinterp_filters875[(1 << RS_SUBPEL_BITS)] = { - { 3, -8, 13, 112, 13, -8, 3, 0 }, { 2, -7, 12, 112, 15, -8, 3, -1 }, - { 3, -7, 10, 112, 17, -9, 3, -1 }, { 2, -6, 8, 112, 19, -9, 3, -1 }, - { 2, -6, 7, 112, 21, -10, 3, -1 }, { 2, -5, 6, 111, 22, -10, 3, -1 }, - { 2, -5, 4, 111, 24, -10, 3, -1 }, { 2, -4, 3, 110, 26, -11, 3, -1 }, - { 2, -4, 1, 110, 28, -11, 3, -1 }, { 2, -4, 0, 109, 30, -12, 4, -1 }, - { 1, -3, -1, 108, 32, -12, 4, -1 }, { 1, -3, -2, 108, 34, -13, 4, -1 }, - { 1, -2, -4, 107, 36, -13, 4, -1 }, { 1, -2, -5, 106, 38, -13, 4, -1 }, - { 1, -1, -6, 105, 40, -14, 4, -1 }, { 1, -1, -7, 104, 42, -14, 4, -1 }, - { 1, -1, -7, 103, 44, -15, 4, -1 }, { 1, 0, -8, 101, 46, -15, 4, -1 }, - { 1, 0, -9, 100, 48, -15, 4, -1 }, { 1, 0, -10, 99, 50, -15, 4, -1 }, - { 1, 1, -11, 97, 53, -16, 4, -1 }, { 0, 1, -11, 96, 55, -16, 4, -1 }, - { 0, 1, -12, 95, 57, -16, 4, -1 }, { 0, 2, -13, 93, 59, -16, 4, -1 }, - { 0, 2, -13, 91, 61, -16, 4, -1 }, { 0, 2, -14, 90, 63, -16, 4, -1 }, - { 0, 2, -14, 88, 65, -16, 4, -1 }, { 0, 2, -15, 86, 67, -16, 4, 0 }, - { 0, 3, -15, 84, 69, -17, 4, 0 }, { 0, 3, -16, 83, 71, -17, 4, 0 }, - { 0, 3, -16, 81, 73, -16, 3, 0 }, { 0, 3, -16, 79, 75, -16, 3, 0 }, - { 0, 3, -16, 77, 77, -16, 3, 0 }, { 0, 3, -16, 75, 79, -16, 3, 0 }, - { 0, 3, -16, 73, 81, -16, 3, 0 }, { 0, 4, -17, 71, 83, -16, 3, 0 }, - { 0, 4, -17, 69, 84, -15, 3, 0 }, { 0, 4, -16, 67, 86, -15, 2, 0 }, - { -1, 4, -16, 65, 88, -14, 2, 0 }, { -1, 4, -16, 63, 90, -14, 2, 0 }, - { -1, 4, -16, 61, 91, -13, 2, 0 }, { -1, 4, -16, 59, 93, -13, 2, 0 }, - { -1, 4, -16, 57, 95, -12, 1, 0 }, { -1, 4, -16, 55, 96, -11, 1, 0 }, - { -1, 4, -16, 53, 97, -11, 1, 1 }, { -1, 4, -15, 50, 99, -10, 0, 1 }, - { -1, 4, -15, 48, 100, -9, 0, 1 }, { -1, 4, -15, 46, 101, -8, 0, 1 }, - { -1, 4, -15, 44, 103, -7, -1, 1 }, { -1, 4, -14, 42, 104, -7, -1, 1 }, - { -1, 4, -14, 40, 105, -6, -1, 1 }, { -1, 4, -13, 38, 106, -5, -2, 1 }, - { -1, 4, -13, 36, 107, -4, -2, 1 }, { -1, 4, -13, 34, 108, -2, -3, 1 }, - { -1, 4, -12, 32, 108, -1, -3, 1 }, { -1, 4, -12, 30, 109, 0, -4, 2 }, - { -1, 3, -11, 28, 110, 1, -4, 2 }, { -1, 3, -11, 26, 110, 3, -4, 2 }, - { -1, 3, -10, 24, 111, 4, -5, 2 }, { -1, 3, -10, 22, 111, 6, -5, 2 }, - { -1, 3, -10, 21, 112, 7, -6, 2 }, { -1, 3, -9, 19, 112, 8, -6, 2 }, - { -1, 3, -9, 17, 112, 10, -7, 3 }, { -1, 3, -8, 15, 112, 12, -7, 2 }, -}; - -const int16_t av1_resize_filter_normative[( - 1 << RS_SUBPEL_BITS)][UPSCALE_NORMATIVE_TAPS] = { -#if UPSCALE_NORMATIVE_TAPS == 8 - { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -1, 128, 2, -1, 0, 0 }, - { 0, 1, -3, 127, 4, -2, 1, 0 }, { 0, 1, -4, 127, 6, -3, 1, 0 }, - { 0, 2, -6, 126, 8, -3, 1, 0 }, { 0, 2, -7, 125, 11, -4, 1, 0 }, - { -1, 2, -8, 125, 13, -5, 2, 0 }, { -1, 3, -9, 124, 15, -6, 2, 0 }, - { -1, 3, -10, 123, 18, -6, 2, -1 }, { -1, 3, -11, 122, 20, -7, 3, -1 }, - { -1, 4, -12, 121, 22, -8, 3, -1 }, { -1, 4, -13, 120, 25, -9, 3, -1 }, - { -1, 4, -14, 118, 28, -9, 3, -1 }, { -1, 4, -15, 117, 30, -10, 4, -1 }, - { -1, 5, -16, 116, 32, -11, 4, -1 }, { -1, 5, -16, 114, 35, -12, 4, -1 }, - { -1, 5, -17, 112, 38, -12, 4, -1 }, { -1, 5, -18, 111, 40, -13, 5, -1 }, - { -1, 5, -18, 109, 43, -14, 5, -1 }, { -1, 6, -19, 107, 45, -14, 5, -1 }, - { -1, 6, -19, 105, 48, -15, 5, -1 }, { -1, 6, -19, 103, 51, -16, 5, -1 }, - { -1, 6, -20, 101, 53, -16, 6, -1 }, { -1, 6, -20, 99, 56, -17, 6, -1 }, - { -1, 6, -20, 97, 58, -17, 6, -1 }, { -1, 6, -20, 95, 61, -18, 6, -1 }, - { -2, 7, -20, 93, 64, -18, 6, -2 }, { -2, 7, -20, 91, 66, -19, 6, -1 }, - { -2, 7, -20, 88, 69, -19, 6, -1 }, { -2, 7, -20, 86, 71, -19, 6, -1 }, - { -2, 7, -20, 84, 74, -20, 7, -2 }, { -2, 7, -20, 81, 76, -20, 7, -1 }, - { -2, 7, -20, 79, 79, -20, 7, -2 }, { -1, 7, -20, 76, 81, -20, 7, -2 }, - { -2, 7, -20, 74, 84, -20, 7, -2 }, { -1, 6, -19, 71, 86, -20, 7, -2 }, - { -1, 6, -19, 69, 88, -20, 7, -2 }, { -1, 6, -19, 66, 91, -20, 7, -2 }, - { -2, 6, -18, 64, 93, -20, 7, -2 }, { -1, 6, -18, 61, 95, -20, 6, -1 }, - { -1, 6, -17, 58, 97, -20, 6, -1 }, { -1, 6, -17, 56, 99, -20, 6, -1 }, - { -1, 6, -16, 53, 101, -20, 6, -1 }, { -1, 5, -16, 51, 103, -19, 6, -1 }, - { -1, 5, -15, 48, 105, -19, 6, -1 }, { -1, 5, -14, 45, 107, -19, 6, -1 }, - { -1, 5, -14, 43, 109, -18, 5, -1 }, { -1, 5, -13, 40, 111, -18, 5, -1 }, - { -1, 4, -12, 38, 112, -17, 5, -1 }, { -1, 4, -12, 35, 114, -16, 5, -1 }, - { -1, 4, -11, 32, 116, -16, 5, -1 }, { -1, 4, -10, 30, 117, -15, 4, -1 }, - { -1, 3, -9, 28, 118, -14, 4, -1 }, { -1, 3, -9, 25, 120, -13, 4, -1 }, - { -1, 3, -8, 22, 121, -12, 4, -1 }, { -1, 3, -7, 20, 122, -11, 3, -1 }, - { -1, 2, -6, 18, 123, -10, 3, -1 }, { 0, 2, -6, 15, 124, -9, 3, -1 }, - { 0, 2, -5, 13, 125, -8, 2, -1 }, { 0, 1, -4, 11, 125, -7, 2, 0 }, - { 0, 1, -3, 8, 126, -6, 2, 0 }, { 0, 1, -3, 6, 127, -4, 1, 0 }, - { 0, 1, -2, 4, 127, -3, 1, 0 }, { 0, 0, -1, 2, 128, -1, 0, 0 }, -#else -#error "Invalid value of UPSCALE_NORMATIVE_TAPS" -#endif // UPSCALE_NORMATIVE_TAPS == 8 -}; - -// Filters for interpolation (full-band) - no filtering for integer pixels -#define filteredinterp_filters1000 av1_resize_filter_normative - -// Filters for factor of 2 downsampling. -static const int16_t av1_down2_symeven_half_filter[] = { 56, 12, -3, -1 }; -static const int16_t av1_down2_symodd_half_filter[] = { 64, 35, 0, -3 }; - -static const InterpKernel *choose_interp_filter(int in_length, int out_length) { - int out_length16 = out_length * 16; - if (out_length16 >= in_length * 16) - return filteredinterp_filters1000; - else if (out_length16 >= in_length * 13) - return filteredinterp_filters875; - else if (out_length16 >= in_length * 11) - return filteredinterp_filters750; - else if (out_length16 >= in_length * 9) - return filteredinterp_filters625; - else - return filteredinterp_filters500; -} - -static void interpolate_core(const uint8_t *const input, int in_length, - uint8_t *output, int out_length, - const int16_t *interp_filters, int interp_taps) { - const int32_t delta = - (((uint32_t)in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) / - out_length; - const int32_t offset = - in_length > out_length - ? (((int32_t)(in_length - out_length) << (RS_SCALE_SUBPEL_BITS - 1)) + - out_length / 2) / - out_length - : -(((int32_t)(out_length - in_length) - << (RS_SCALE_SUBPEL_BITS - 1)) + - out_length / 2) / - out_length; - uint8_t *optr = output; - int x, x1, x2, sum, k, int_pel, sub_pel; - int32_t y; - - x = 0; - y = offset + RS_SCALE_EXTRA_OFF; - while ((y >> RS_SCALE_SUBPEL_BITS) < (interp_taps / 2 - 1)) { - x++; - y += delta; - } - x1 = x; - x = out_length - 1; - y = delta * x + offset + RS_SCALE_EXTRA_OFF; - while ((y >> RS_SCALE_SUBPEL_BITS) + (int32_t)(interp_taps / 2) >= - in_length) { - x--; - y -= delta; - } - x2 = x; - if (x1 > x2) { - for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < out_length; - ++x, y += delta) { - int_pel = y >> RS_SCALE_SUBPEL_BITS; - sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK; - const int16_t *filter = &interp_filters[sub_pel * interp_taps]; - sum = 0; - for (k = 0; k < interp_taps; ++k) { - const int pk = int_pel - interp_taps / 2 + 1 + k; - sum += filter[k] * input[AOMMAX(AOMMIN(pk, in_length - 1), 0)]; - } - *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); - } - } else { - // Initial part. - for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < x1; ++x, y += delta) { - int_pel = y >> RS_SCALE_SUBPEL_BITS; - sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK; - const int16_t *filter = &interp_filters[sub_pel * interp_taps]; - sum = 0; - for (k = 0; k < interp_taps; ++k) - sum += filter[k] * input[AOMMAX(int_pel - interp_taps / 2 + 1 + k, 0)]; - *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); - } - // Middle part. - for (; x <= x2; ++x, y += delta) { - int_pel = y >> RS_SCALE_SUBPEL_BITS; - sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK; - const int16_t *filter = &interp_filters[sub_pel * interp_taps]; - sum = 0; - for (k = 0; k < interp_taps; ++k) - sum += filter[k] * input[int_pel - interp_taps / 2 + 1 + k]; - *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); - } - // End part. - for (; x < out_length; ++x, y += delta) { - int_pel = y >> RS_SCALE_SUBPEL_BITS; - sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK; - const int16_t *filter = &interp_filters[sub_pel * interp_taps]; - sum = 0; - for (k = 0; k < interp_taps; ++k) - sum += filter[k] * - input[AOMMIN(int_pel - interp_taps / 2 + 1 + k, in_length - 1)]; - *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); - } - } -} - -static void interpolate(const uint8_t *const input, int in_length, - uint8_t *output, int out_length) { - const InterpKernel *interp_filters = - choose_interp_filter(in_length, out_length); - - interpolate_core(input, in_length, output, out_length, &interp_filters[0][0], - SUBPEL_TAPS); -} - -int32_t av1_get_upscale_convolve_step(int in_length, int out_length) { - return ((in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) / out_length; -} - -static int32_t get_upscale_convolve_x0(int in_length, int out_length, - int32_t x_step_qn) { - const int err = out_length * x_step_qn - (in_length << RS_SCALE_SUBPEL_BITS); - const int32_t x0 = - (-((out_length - in_length) << (RS_SCALE_SUBPEL_BITS - 1)) + - out_length / 2) / - out_length + - RS_SCALE_EXTRA_OFF - err / 2; - return (int32_t)((uint32_t)x0 & RS_SCALE_SUBPEL_MASK); -} - -#ifndef __clang_analyzer__ -static void down2_symeven(const uint8_t *const input, int length, - uint8_t *output) { - // Actual filter len = 2 * filter_len_half. - const int16_t *filter = av1_down2_symeven_half_filter; - const int filter_len_half = sizeof(av1_down2_symeven_half_filter) / 2; - int i, j; - uint8_t *optr = output; - int l1 = filter_len_half; - int l2 = (length - filter_len_half); - l1 += (l1 & 1); - l2 += (l2 & 1); - if (l1 > l2) { - // Short input length. - for (i = 0; i < length; i += 2) { - int sum = (1 << (FILTER_BITS - 1)); - for (j = 0; j < filter_len_half; ++j) { - sum += - (input[AOMMAX(i - j, 0)] + input[AOMMIN(i + 1 + j, length - 1)]) * - filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel(sum); - } - } else { - // Initial part. - for (i = 0; i < l1; i += 2) { - int sum = (1 << (FILTER_BITS - 1)); - for (j = 0; j < filter_len_half; ++j) { - sum += (input[AOMMAX(i - j, 0)] + input[i + 1 + j]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel(sum); - } - // Middle part. - for (; i < l2; i += 2) { - int sum = (1 << (FILTER_BITS - 1)); - for (j = 0; j < filter_len_half; ++j) { - sum += (input[i - j] + input[i + 1 + j]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel(sum); - } - // End part. - for (; i < length; i += 2) { - int sum = (1 << (FILTER_BITS - 1)); - for (j = 0; j < filter_len_half; ++j) { - sum += - (input[i - j] + input[AOMMIN(i + 1 + j, length - 1)]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel(sum); - } - } -} -#endif - -static void down2_symodd(const uint8_t *const input, int length, - uint8_t *output) { - // Actual filter len = 2 * filter_len_half - 1. - const int16_t *filter = av1_down2_symodd_half_filter; - const int filter_len_half = sizeof(av1_down2_symodd_half_filter) / 2; - int i, j; - uint8_t *optr = output; - int l1 = filter_len_half - 1; - int l2 = (length - filter_len_half + 1); - l1 += (l1 & 1); - l2 += (l2 & 1); - if (l1 > l2) { - // Short input length. - for (i = 0; i < length; i += 2) { - int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; - for (j = 1; j < filter_len_half; ++j) { - sum += (input[(i - j < 0 ? 0 : i - j)] + - input[(i + j >= length ? length - 1 : i + j)]) * - filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel(sum); - } - } else { - // Initial part. - for (i = 0; i < l1; i += 2) { - int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; - for (j = 1; j < filter_len_half; ++j) { - sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + j]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel(sum); - } - // Middle part. - for (; i < l2; i += 2) { - int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; - for (j = 1; j < filter_len_half; ++j) { - sum += (input[i - j] + input[i + j]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel(sum); - } - // End part. - for (; i < length; i += 2) { - int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; - for (j = 1; j < filter_len_half; ++j) { - sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) * - filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel(sum); - } - } -} - -static int get_down2_length(int length, int steps) { - for (int s = 0; s < steps; ++s) length = (length + 1) >> 1; - return length; -} - -static int get_down2_steps(int in_length, int out_length) { - int steps = 0; - int proj_in_length; - while ((proj_in_length = get_down2_length(in_length, 1)) >= out_length) { - ++steps; - in_length = proj_in_length; - if (in_length == 1) { - // Special case: we break because any further calls to get_down2_length() - // with be with length == 1, which return 1, resulting in an infinite - // loop. - break; - } - } - return steps; -} - -static void resize_multistep(const uint8_t *const input, int length, - uint8_t *output, int olength, uint8_t *otmp) { - if (length == olength) { - memcpy(output, input, sizeof(output[0]) * length); - return; - } - const int steps = get_down2_steps(length, olength); - - if (steps > 0) { - uint8_t *out = NULL; - int filteredlength = length; - - assert(otmp != NULL); - uint8_t *otmp2 = otmp + get_down2_length(length, 1); - for (int s = 0; s < steps; ++s) { - const int proj_filteredlength = get_down2_length(filteredlength, 1); - const uint8_t *const in = (s == 0 ? input : out); - if (s == steps - 1 && proj_filteredlength == olength) - out = output; - else - out = (s & 1 ? otmp2 : otmp); - if (filteredlength & 1) - down2_symodd(in, filteredlength, out); - else - down2_symeven(in, filteredlength, out); - filteredlength = proj_filteredlength; - } - if (filteredlength != olength) { - interpolate(out, filteredlength, output, olength); - } - } else { - interpolate(input, length, output, olength); - } -} - -static void fill_col_to_arr(uint8_t *img, int stride, int len, uint8_t *arr) { - int i; - uint8_t *iptr = img; - uint8_t *aptr = arr; - for (i = 0; i < len; ++i, iptr += stride) { - *aptr++ = *iptr; - } -} - -static void fill_arr_to_col(uint8_t *img, int stride, int len, uint8_t *arr) { - int i; - uint8_t *iptr = img; - uint8_t *aptr = arr; - for (i = 0; i < len; ++i, iptr += stride) { - *iptr = *aptr++; - } -} - -static void resize_plane(const uint8_t *const input, int height, int width, - int in_stride, uint8_t *output, int height2, - int width2, int out_stride) { - int i; - uint8_t *intbuf = (uint8_t *)aom_malloc(sizeof(uint8_t) * width2 * height); - uint8_t *tmpbuf = - (uint8_t *)aom_malloc(sizeof(uint8_t) * AOMMAX(width, height)); - uint8_t *arrbuf = (uint8_t *)aom_malloc(sizeof(uint8_t) * height); - uint8_t *arrbuf2 = (uint8_t *)aom_malloc(sizeof(uint8_t) * height2); - if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) - goto Error; - assert(width > 0); - assert(height > 0); - assert(width2 > 0); - assert(height2 > 0); - for (i = 0; i < height; ++i) - resize_multistep(input + in_stride * i, width, intbuf + width2 * i, width2, - tmpbuf); - for (i = 0; i < width2; ++i) { - fill_col_to_arr(intbuf + i, width2, height, arrbuf); - resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf); - fill_arr_to_col(output + i, out_stride, height2, arrbuf2); - } - -Error: - aom_free(intbuf); - aom_free(tmpbuf); - aom_free(arrbuf); - aom_free(arrbuf2); -} - -static void upscale_normative_rect(const uint8_t *const input, int height, - int width, int in_stride, uint8_t *output, - int height2, int width2, int out_stride, - int x_step_qn, int x0_qn, int pad_left, - int pad_right) { - assert(width > 0); - assert(height > 0); - assert(width2 > 0); - assert(height2 > 0); - assert(height2 == height); - - // Extend the left/right pixels of the tile column if needed - // (either because we can't sample from other tiles, or because we're at - // a frame edge). - // Save the overwritten pixels into tmp_left and tmp_right. - // Note: Because we pass input-1 to av1_convolve_horiz_rs, we need one extra - // column of border pixels compared to what we'd naively think. - const int border_cols = UPSCALE_NORMATIVE_TAPS / 2 + 1; - uint8_t *tmp_left = - NULL; // Silence spurious "may be used uninitialized" warnings - uint8_t *tmp_right = NULL; - uint8_t *const in_tl = (uint8_t *)(input - border_cols); // Cast off 'const' - uint8_t *const in_tr = (uint8_t *)(input + width); - if (pad_left) { - tmp_left = (uint8_t *)aom_malloc(sizeof(*tmp_left) * border_cols * height); - for (int i = 0; i < height; i++) { - memcpy(tmp_left + i * border_cols, in_tl + i * in_stride, border_cols); - memset(in_tl + i * in_stride, input[i * in_stride], border_cols); - } - } - if (pad_right) { - tmp_right = - (uint8_t *)aom_malloc(sizeof(*tmp_right) * border_cols * height); - for (int i = 0; i < height; i++) { - memcpy(tmp_right + i * border_cols, in_tr + i * in_stride, border_cols); - memset(in_tr + i * in_stride, input[i * in_stride + width - 1], - border_cols); - } - } - - av1_convolve_horiz_rs(input - 1, in_stride, output, out_stride, width2, - height2, &av1_resize_filter_normative[0][0], x0_qn, - x_step_qn); - - // Restore the left/right border pixels - if (pad_left) { - for (int i = 0; i < height; i++) { - memcpy(in_tl + i * in_stride, tmp_left + i * border_cols, border_cols); - } - aom_free(tmp_left); - } - if (pad_right) { - for (int i = 0; i < height; i++) { - memcpy(in_tr + i * in_stride, tmp_right + i * border_cols, border_cols); - } - aom_free(tmp_right); - } -} - -static void highbd_interpolate_core(const uint16_t *const input, int in_length, - uint16_t *output, int out_length, int bd, - const int16_t *interp_filters, - int interp_taps) { - const int32_t delta = - (((uint32_t)in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) / - out_length; - const int32_t offset = - in_length > out_length - ? (((int32_t)(in_length - out_length) << (RS_SCALE_SUBPEL_BITS - 1)) + - out_length / 2) / - out_length - : -(((int32_t)(out_length - in_length) - << (RS_SCALE_SUBPEL_BITS - 1)) + - out_length / 2) / - out_length; - uint16_t *optr = output; - int x, x1, x2, sum, k, int_pel, sub_pel; - int32_t y; - - x = 0; - y = offset + RS_SCALE_EXTRA_OFF; - while ((y >> RS_SCALE_SUBPEL_BITS) < (interp_taps / 2 - 1)) { - x++; - y += delta; - } - x1 = x; - x = out_length - 1; - y = delta * x + offset + RS_SCALE_EXTRA_OFF; - while ((y >> RS_SCALE_SUBPEL_BITS) + (int32_t)(interp_taps / 2) >= - in_length) { - x--; - y -= delta; - } - x2 = x; - if (x1 > x2) { - for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < out_length; - ++x, y += delta) { - int_pel = y >> RS_SCALE_SUBPEL_BITS; - sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK; - const int16_t *filter = &interp_filters[sub_pel * interp_taps]; - sum = 0; - for (k = 0; k < interp_taps; ++k) { - const int pk = int_pel - interp_taps / 2 + 1 + k; - sum += filter[k] * input[AOMMAX(AOMMIN(pk, in_length - 1), 0)]; - } - *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); - } - } else { - // Initial part. - for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < x1; ++x, y += delta) { - int_pel = y >> RS_SCALE_SUBPEL_BITS; - sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK; - const int16_t *filter = &interp_filters[sub_pel * interp_taps]; - sum = 0; - for (k = 0; k < interp_taps; ++k) - sum += filter[k] * input[AOMMAX(int_pel - interp_taps / 2 + 1 + k, 0)]; - *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); - } - // Middle part. - for (; x <= x2; ++x, y += delta) { - int_pel = y >> RS_SCALE_SUBPEL_BITS; - sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK; - const int16_t *filter = &interp_filters[sub_pel * interp_taps]; - sum = 0; - for (k = 0; k < interp_taps; ++k) - sum += filter[k] * input[int_pel - interp_taps / 2 + 1 + k]; - *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); - } - // End part. - for (; x < out_length; ++x, y += delta) { - int_pel = y >> RS_SCALE_SUBPEL_BITS; - sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK; - const int16_t *filter = &interp_filters[sub_pel * interp_taps]; - sum = 0; - for (k = 0; k < interp_taps; ++k) - sum += filter[k] * - input[AOMMIN(int_pel - interp_taps / 2 + 1 + k, in_length - 1)]; - *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); - } - } -} - -static void highbd_interpolate(const uint16_t *const input, int in_length, - uint16_t *output, int out_length, int bd) { - const InterpKernel *interp_filters = - choose_interp_filter(in_length, out_length); - - highbd_interpolate_core(input, in_length, output, out_length, bd, - &interp_filters[0][0], SUBPEL_TAPS); -} - -#ifndef __clang_analyzer__ -static void highbd_down2_symeven(const uint16_t *const input, int length, - uint16_t *output, int bd) { - // Actual filter len = 2 * filter_len_half. - static const int16_t *filter = av1_down2_symeven_half_filter; - const int filter_len_half = sizeof(av1_down2_symeven_half_filter) / 2; - int i, j; - uint16_t *optr = output; - int l1 = filter_len_half; - int l2 = (length - filter_len_half); - l1 += (l1 & 1); - l2 += (l2 & 1); - if (l1 > l2) { - // Short input length. - for (i = 0; i < length; i += 2) { - int sum = (1 << (FILTER_BITS - 1)); - for (j = 0; j < filter_len_half; ++j) { - sum += - (input[AOMMAX(0, i - j)] + input[AOMMIN(i + 1 + j, length - 1)]) * - filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel_highbd(sum, bd); - } - } else { - // Initial part. - for (i = 0; i < l1; i += 2) { - int sum = (1 << (FILTER_BITS - 1)); - for (j = 0; j < filter_len_half; ++j) { - sum += (input[AOMMAX(0, i - j)] + input[i + 1 + j]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel_highbd(sum, bd); - } - // Middle part. - for (; i < l2; i += 2) { - int sum = (1 << (FILTER_BITS - 1)); - for (j = 0; j < filter_len_half; ++j) { - sum += (input[i - j] + input[i + 1 + j]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel_highbd(sum, bd); - } - // End part. - for (; i < length; i += 2) { - int sum = (1 << (FILTER_BITS - 1)); - for (j = 0; j < filter_len_half; ++j) { - sum += - (input[i - j] + input[AOMMIN(i + 1 + j, length - 1)]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel_highbd(sum, bd); - } - } -} - -static void highbd_down2_symodd(const uint16_t *const input, int length, - uint16_t *output, int bd) { - // Actual filter len = 2 * filter_len_half - 1. - static const int16_t *filter = av1_down2_symodd_half_filter; - const int filter_len_half = sizeof(av1_down2_symodd_half_filter) / 2; - int i, j; - uint16_t *optr = output; - int l1 = filter_len_half - 1; - int l2 = (length - filter_len_half + 1); - l1 += (l1 & 1); - l2 += (l2 & 1); - if (l1 > l2) { - // Short input length. - for (i = 0; i < length; i += 2) { - int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; - for (j = 1; j < filter_len_half; ++j) { - sum += (input[AOMMAX(i - j, 0)] + input[AOMMIN(i + j, length - 1)]) * - filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel_highbd(sum, bd); - } - } else { - // Initial part. - for (i = 0; i < l1; i += 2) { - int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; - for (j = 1; j < filter_len_half; ++j) { - sum += (input[AOMMAX(i - j, 0)] + input[i + j]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel_highbd(sum, bd); - } - // Middle part. - for (; i < l2; i += 2) { - int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; - for (j = 1; j < filter_len_half; ++j) { - sum += (input[i - j] + input[i + j]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel_highbd(sum, bd); - } - // End part. - for (; i < length; i += 2) { - int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; - for (j = 1; j < filter_len_half; ++j) { - sum += (input[i - j] + input[AOMMIN(i + j, length - 1)]) * filter[j]; - } - sum >>= FILTER_BITS; - *optr++ = clip_pixel_highbd(sum, bd); - } - } -} -#endif - -static void highbd_resize_multistep(const uint16_t *const input, int length, - uint16_t *output, int olength, - uint16_t *otmp, int bd) { - if (length == olength) { - memcpy(output, input, sizeof(output[0]) * length); - return; - } - const int steps = get_down2_steps(length, olength); - - if (steps > 0) { - uint16_t *out = NULL; - int filteredlength = length; - - assert(otmp != NULL); - uint16_t *otmp2 = otmp + get_down2_length(length, 1); - for (int s = 0; s < steps; ++s) { - const int proj_filteredlength = get_down2_length(filteredlength, 1); - const uint16_t *const in = (s == 0 ? input : out); - if (s == steps - 1 && proj_filteredlength == olength) - out = output; - else - out = (s & 1 ? otmp2 : otmp); - if (filteredlength & 1) - highbd_down2_symodd(in, filteredlength, out, bd); - else - highbd_down2_symeven(in, filteredlength, out, bd); - filteredlength = proj_filteredlength; - } - if (filteredlength != olength) { - highbd_interpolate(out, filteredlength, output, olength, bd); - } - } else { - highbd_interpolate(input, length, output, olength, bd); - } -} - -static void highbd_fill_col_to_arr(uint16_t *img, int stride, int len, - uint16_t *arr) { - int i; - uint16_t *iptr = img; - uint16_t *aptr = arr; - for (i = 0; i < len; ++i, iptr += stride) { - *aptr++ = *iptr; - } -} - -static void highbd_fill_arr_to_col(uint16_t *img, int stride, int len, - uint16_t *arr) { - int i; - uint16_t *iptr = img; - uint16_t *aptr = arr; - for (i = 0; i < len; ++i, iptr += stride) { - *iptr = *aptr++; - } -} - -static void highbd_resize_plane(const uint8_t *const input, int height, - int width, int in_stride, uint8_t *output, - int height2, int width2, int out_stride, - int bd) { - int i; - uint16_t *intbuf = (uint16_t *)aom_malloc(sizeof(uint16_t) * width2 * height); - uint16_t *tmpbuf = - (uint16_t *)aom_malloc(sizeof(uint16_t) * AOMMAX(width, height)); - uint16_t *arrbuf = (uint16_t *)aom_malloc(sizeof(uint16_t) * height); - uint16_t *arrbuf2 = (uint16_t *)aom_malloc(sizeof(uint16_t) * height2); - if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) - goto Error; - for (i = 0; i < height; ++i) { - highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width, - intbuf + width2 * i, width2, tmpbuf, bd); - } - for (i = 0; i < width2; ++i) { - highbd_fill_col_to_arr(intbuf + i, width2, height, arrbuf); - highbd_resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf, bd); - highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2, - arrbuf2); - } - -Error: - aom_free(intbuf); - aom_free(tmpbuf); - aom_free(arrbuf); - aom_free(arrbuf2); -} - -static void highbd_upscale_normative_rect(const uint8_t *const input, - int height, int width, int in_stride, - uint8_t *output, int height2, - int width2, int out_stride, - int x_step_qn, int x0_qn, - int pad_left, int pad_right, int bd) { - assert(width > 0); - assert(height > 0); - assert(width2 > 0); - assert(height2 > 0); - assert(height2 == height); - - // Extend the left/right pixels of the tile column if needed - // (either because we can't sample from other tiles, or because we're at - // a frame edge). - // Save the overwritten pixels into tmp_left and tmp_right. - // Note: Because we pass input-1 to av1_convolve_horiz_rs, we need one extra - // column of border pixels compared to what we'd naively think. - const int border_cols = UPSCALE_NORMATIVE_TAPS / 2 + 1; - const int border_size = border_cols * sizeof(uint16_t); - uint16_t *tmp_left = - NULL; // Silence spurious "may be used uninitialized" warnings - uint16_t *tmp_right = NULL; - uint16_t *const input16 = CONVERT_TO_SHORTPTR(input); - uint16_t *const in_tl = input16 - border_cols; - uint16_t *const in_tr = input16 + width; - if (pad_left) { - tmp_left = (uint16_t *)aom_malloc(sizeof(*tmp_left) * border_cols * height); - for (int i = 0; i < height; i++) { - memcpy(tmp_left + i * border_cols, in_tl + i * in_stride, border_size); - aom_memset16(in_tl + i * in_stride, input16[i * in_stride], border_cols); - } - } - if (pad_right) { - tmp_right = - (uint16_t *)aom_malloc(sizeof(*tmp_right) * border_cols * height); - for (int i = 0; i < height; i++) { - memcpy(tmp_right + i * border_cols, in_tr + i * in_stride, border_size); - aom_memset16(in_tr + i * in_stride, input16[i * in_stride + width - 1], - border_cols); - } - } - - av1_highbd_convolve_horiz_rs(CONVERT_TO_SHORTPTR(input - 1), in_stride, - CONVERT_TO_SHORTPTR(output), out_stride, width2, - height2, &av1_resize_filter_normative[0][0], - x0_qn, x_step_qn, bd); - - // Restore the left/right border pixels - if (pad_left) { - for (int i = 0; i < height; i++) { - memcpy(in_tl + i * in_stride, tmp_left + i * border_cols, border_size); - } - aom_free(tmp_left); - } - if (pad_right) { - for (int i = 0; i < height; i++) { - memcpy(in_tr + i * in_stride, tmp_right + i * border_cols, border_size); - } - aom_free(tmp_right); - } -} - -void av1_resize_frame420(const uint8_t *const y, int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, int height, int width, uint8_t *oy, - int oy_stride, uint8_t *ou, uint8_t *ov, - int ouv_stride, int oheight, int owidth) { - resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride); - resize_plane(u, height / 2, width / 2, uv_stride, ou, oheight / 2, owidth / 2, - ouv_stride); - resize_plane(v, height / 2, width / 2, uv_stride, ov, oheight / 2, owidth / 2, - ouv_stride); -} - -void av1_resize_frame422(const uint8_t *const y, int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, int height, int width, uint8_t *oy, - int oy_stride, uint8_t *ou, uint8_t *ov, - int ouv_stride, int oheight, int owidth) { - resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride); - resize_plane(u, height, width / 2, uv_stride, ou, oheight, owidth / 2, - ouv_stride); - resize_plane(v, height, width / 2, uv_stride, ov, oheight, owidth / 2, - ouv_stride); -} - -void av1_resize_frame444(const uint8_t *const y, int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, int height, int width, uint8_t *oy, - int oy_stride, uint8_t *ou, uint8_t *ov, - int ouv_stride, int oheight, int owidth) { - resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride); - resize_plane(u, height, width, uv_stride, ou, oheight, owidth, ouv_stride); - resize_plane(v, height, width, uv_stride, ov, oheight, owidth, ouv_stride); -} - -void av1_highbd_resize_frame420(const uint8_t *const y, int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, int height, int width, - uint8_t *oy, int oy_stride, uint8_t *ou, - uint8_t *ov, int ouv_stride, int oheight, - int owidth, int bd) { - highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth, - oy_stride, bd); - highbd_resize_plane(u, height / 2, width / 2, uv_stride, ou, oheight / 2, - owidth / 2, ouv_stride, bd); - highbd_resize_plane(v, height / 2, width / 2, uv_stride, ov, oheight / 2, - owidth / 2, ouv_stride, bd); -} - -void av1_highbd_resize_frame422(const uint8_t *const y, int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, int height, int width, - uint8_t *oy, int oy_stride, uint8_t *ou, - uint8_t *ov, int ouv_stride, int oheight, - int owidth, int bd) { - highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth, - oy_stride, bd); - highbd_resize_plane(u, height, width / 2, uv_stride, ou, oheight, owidth / 2, - ouv_stride, bd); - highbd_resize_plane(v, height, width / 2, uv_stride, ov, oheight, owidth / 2, - ouv_stride, bd); -} - -void av1_highbd_resize_frame444(const uint8_t *const y, int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, int height, int width, - uint8_t *oy, int oy_stride, uint8_t *ou, - uint8_t *ov, int ouv_stride, int oheight, - int owidth, int bd) { - highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth, - oy_stride, bd); - highbd_resize_plane(u, height, width, uv_stride, ou, oheight, owidth, - ouv_stride, bd); - highbd_resize_plane(v, height, width, uv_stride, ov, oheight, owidth, - ouv_stride, bd); -} - -void av1_resize_and_extend_frame(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, int bd, - const int num_planes) { - // TODO(dkovalev): replace YV12_BUFFER_CONFIG with aom_image_t - - // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet - // the static analysis warnings. - for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) { - const int is_uv = i > 0; - if (src->flags & YV12_FLAG_HIGHBITDEPTH) - highbd_resize_plane(src->buffers[i], src->crop_heights[is_uv], - src->crop_widths[is_uv], src->strides[is_uv], - dst->buffers[i], dst->crop_heights[is_uv], - dst->crop_widths[is_uv], dst->strides[is_uv], bd); - else - resize_plane(src->buffers[i], src->crop_heights[is_uv], - src->crop_widths[is_uv], src->strides[is_uv], - dst->buffers[i], dst->crop_heights[is_uv], - dst->crop_widths[is_uv], dst->strides[is_uv]); - } - aom_extend_frame_borders(dst, num_planes); -} - -void av1_upscale_normative_rows(const AV1_COMMON *cm, const uint8_t *src, - int src_stride, uint8_t *dst, int dst_stride, - int plane, int rows) { - const int is_uv = (plane > 0); - const int ss_x = is_uv && cm->seq_params.subsampling_x; - const int downscaled_plane_width = ROUND_POWER_OF_TWO(cm->width, ss_x); - const int upscaled_plane_width = - ROUND_POWER_OF_TWO(cm->superres_upscaled_width, ss_x); - const int superres_denom = cm->superres_scale_denominator; - - TileInfo tile_col; - const int32_t x_step_qn = av1_get_upscale_convolve_step( - downscaled_plane_width, upscaled_plane_width); - int32_t x0_qn = get_upscale_convolve_x0(downscaled_plane_width, - upscaled_plane_width, x_step_qn); - - for (int j = 0; j < cm->tile_cols; j++) { - av1_tile_set_col(&tile_col, cm, j); - // Determine the limits of this tile column in both the source - // and destination images. - // Note: The actual location which we start sampling from is - // (downscaled_x0 - 1 + (x0_qn/2^14)), and this quantity increases - // by exactly dst_width * (x_step_qn/2^14) pixels each iteration. - const int downscaled_x0 = tile_col.mi_col_start << (MI_SIZE_LOG2 - ss_x); - const int downscaled_x1 = tile_col.mi_col_end << (MI_SIZE_LOG2 - ss_x); - const int src_width = downscaled_x1 - downscaled_x0; - - const int upscaled_x0 = (downscaled_x0 * superres_denom) / SCALE_NUMERATOR; - int upscaled_x1; - if (j == cm->tile_cols - 1) { - // Note that we can't just use AOMMIN here - due to rounding, - // (downscaled_x1 * superres_denom) / SCALE_NUMERATOR may be less than - // upscaled_plane_width. - upscaled_x1 = upscaled_plane_width; - } else { - upscaled_x1 = (downscaled_x1 * superres_denom) / SCALE_NUMERATOR; - } - - const uint8_t *const src_ptr = src + downscaled_x0; - uint8_t *const dst_ptr = dst + upscaled_x0; - const int dst_width = upscaled_x1 - upscaled_x0; - - const int pad_left = (j == 0); - const int pad_right = (j == cm->tile_cols - 1); - - if (cm->seq_params.use_highbitdepth) - highbd_upscale_normative_rect(src_ptr, rows, src_width, src_stride, - dst_ptr, rows, dst_width, dst_stride, - x_step_qn, x0_qn, pad_left, pad_right, - cm->seq_params.bit_depth); - else - upscale_normative_rect(src_ptr, rows, src_width, src_stride, dst_ptr, - rows, dst_width, dst_stride, x_step_qn, x0_qn, - pad_left, pad_right); - - // Update the fractional pixel offset to prepare for the next tile column. - x0_qn += (dst_width * x_step_qn) - (src_width << RS_SCALE_SUBPEL_BITS); - } -} - -void av1_upscale_normative_and_extend_frame(const AV1_COMMON *cm, - const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst) { - const int num_planes = av1_num_planes(cm); - for (int i = 0; i < num_planes; ++i) { - const int is_uv = (i > 0); - av1_upscale_normative_rows(cm, src->buffers[i], src->strides[is_uv], - dst->buffers[i], dst->strides[is_uv], i, - src->crop_heights[is_uv]); - } - - aom_extend_frame_borders(dst, num_planes); -} - -YV12_BUFFER_CONFIG *av1_scale_if_required(AV1_COMMON *cm, - YV12_BUFFER_CONFIG *unscaled, - YV12_BUFFER_CONFIG *scaled) { - const int num_planes = av1_num_planes(cm); - if (cm->width != unscaled->y_crop_width || - cm->height != unscaled->y_crop_height) { - av1_resize_and_extend_frame(unscaled, scaled, (int)cm->seq_params.bit_depth, - num_planes); - return scaled; - } else { - return unscaled; - } -} - -// Calculates the scaled dimension given the original dimension and the scale -// denominator. -static void calculate_scaled_size_helper(int *dim, int denom) { - if (denom != SCALE_NUMERATOR) { - // Use this version if we need *dim to be even - // *width = (*width * SCALE_NUMERATOR + denom) / (2 * denom); - // *width <<= 1; - *dim = (*dim * SCALE_NUMERATOR + denom / 2) / (denom); - } -} - -void av1_calculate_scaled_size(int *width, int *height, int resize_denom) { - calculate_scaled_size_helper(width, resize_denom); - calculate_scaled_size_helper(height, resize_denom); -} - -void av1_calculate_scaled_superres_size(int *width, int *height, - int superres_denom) { - (void)height; - calculate_scaled_size_helper(width, superres_denom); -} - -void av1_calculate_unscaled_superres_size(int *width, int *height, int denom) { - if (denom != SCALE_NUMERATOR) { - // Note: av1_calculate_scaled_superres_size() rounds *up* after division - // when the resulting dimensions are odd. So here, we round *down*. - *width = *width * denom / SCALE_NUMERATOR; - (void)height; - } -} - -// Copy only the config data from 'src' to 'dst'. -static void copy_buffer_config(const YV12_BUFFER_CONFIG *const src, - YV12_BUFFER_CONFIG *const dst) { - dst->bit_depth = src->bit_depth; - dst->color_primaries = src->color_primaries; - dst->transfer_characteristics = src->transfer_characteristics; - dst->matrix_coefficients = src->matrix_coefficients; - dst->monochrome = src->monochrome; - dst->chroma_sample_position = src->chroma_sample_position; - dst->color_range = src->color_range; -} - -// TODO(afergs): Look for in-place upscaling -// TODO(afergs): aom_ vs av1_ functions? Which can I use? -// Upscale decoded image. -void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool) { - const int num_planes = av1_num_planes(cm); - if (!av1_superres_scaled(cm)) return; - const SequenceHeader *const seq_params = &cm->seq_params; - - YV12_BUFFER_CONFIG copy_buffer; - memset(©_buffer, 0, sizeof(copy_buffer)); - - YV12_BUFFER_CONFIG *const frame_to_show = get_frame_new_buffer(cm); - - const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, 3); - if (aom_alloc_frame_buffer( - ©_buffer, aligned_width, cm->height, seq_params->subsampling_x, - seq_params->subsampling_y, seq_params->use_highbitdepth, - AOM_BORDER_IN_PIXELS, cm->byte_alignment)) - aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, - "Failed to allocate copy buffer for superres upscaling"); - - // Copy function assumes the frames are the same size. - // Note that it does not copy YV12_BUFFER_CONFIG config data. - aom_yv12_copy_frame(frame_to_show, ©_buffer, num_planes); - - assert(copy_buffer.y_crop_width == aligned_width); - assert(copy_buffer.y_crop_height == cm->height); - - // Realloc the current frame buffer at a higher resolution in place. - if (pool != NULL) { - // Use callbacks if on the decoder. - aom_codec_frame_buffer_t *fb = - &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer; - aom_release_frame_buffer_cb_fn_t release_fb_cb = pool->release_fb_cb; - aom_get_frame_buffer_cb_fn_t cb = pool->get_fb_cb; - void *cb_priv = pool->cb_priv; - - // Realloc with callback does not release the frame buffer - release first. - if (release_fb_cb(cb_priv, fb)) - aom_internal_error( - &cm->error, AOM_CODEC_MEM_ERROR, - "Failed to free current frame buffer before superres upscaling"); - - // aom_realloc_frame_buffer() leaves config data for frame_to_show intact - if (aom_realloc_frame_buffer( - frame_to_show, cm->superres_upscaled_width, - cm->superres_upscaled_height, seq_params->subsampling_x, - seq_params->subsampling_y, seq_params->use_highbitdepth, - AOM_BORDER_IN_PIXELS, cm->byte_alignment, fb, cb, cb_priv)) - aom_internal_error( - &cm->error, AOM_CODEC_MEM_ERROR, - "Failed to allocate current frame buffer for superres upscaling"); - } else { - // Make a copy of the config data for frame_to_show in copy_buffer - copy_buffer_config(frame_to_show, ©_buffer); - - // Don't use callbacks on the encoder. - // aom_alloc_frame_buffer() clears the config data for frame_to_show - if (aom_alloc_frame_buffer( - frame_to_show, cm->superres_upscaled_width, - cm->superres_upscaled_height, seq_params->subsampling_x, - seq_params->subsampling_y, seq_params->use_highbitdepth, - AOM_BORDER_IN_PIXELS, cm->byte_alignment)) - aom_internal_error( - &cm->error, AOM_CODEC_MEM_ERROR, - "Failed to reallocate current frame buffer for superres upscaling"); - - // Restore config data back to frame_to_show - copy_buffer_config(©_buffer, frame_to_show); - } - // TODO(afergs): verify frame_to_show is correct after realloc - // encoder: - // decoder: - - assert(frame_to_show->y_crop_width == cm->superres_upscaled_width); - assert(frame_to_show->y_crop_height == cm->superres_upscaled_height); - - // Scale up and back into frame_to_show. - assert(frame_to_show->y_crop_width != cm->width); - av1_upscale_normative_and_extend_frame(cm, ©_buffer, frame_to_show); - - // Free the copy buffer - aom_free_frame_buffer(©_buffer); -} diff --git a/third_party/aom/av1/common/resize.h b/third_party/aom/av1/common/resize.h deleted file mode 100644 index 9a59a8d63..000000000 --- a/third_party/aom/av1/common/resize.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_RESIZE_H_ -#define AOM_AV1_COMMON_RESIZE_H_ - -#include <stdio.h> -#include "aom/aom_integer.h" -#include "av1/common/onyxc_int.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void av1_resize_plane(const uint8_t *const input, int height, int width, - int in_stride, uint8_t *output, int height2, int width2, - int out_stride); -void av1_resize_frame420(const uint8_t *const y, int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, int height, int width, uint8_t *oy, - int oy_stride, uint8_t *ou, uint8_t *ov, - int ouv_stride, int oheight, int owidth); -void av1_resize_frame422(const uint8_t *const y, int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, int height, int width, uint8_t *oy, - int oy_stride, uint8_t *ou, uint8_t *ov, - int ouv_stride, int oheight, int owidth); -void av1_resize_frame444(const uint8_t *const y, int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, int height, int width, uint8_t *oy, - int oy_stride, uint8_t *ou, uint8_t *ov, - int ouv_stride, int oheight, int owidth); - -void av1_highbd_resize_plane(const uint8_t *const input, int height, int width, - int in_stride, uint8_t *output, int height2, - int width2, int out_stride, int bd); -void av1_highbd_resize_frame420(const uint8_t *const y, int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, int height, int width, - uint8_t *oy, int oy_stride, uint8_t *ou, - uint8_t *ov, int ouv_stride, int oheight, - int owidth, int bd); -void av1_highbd_resize_frame422(const uint8_t *const y, int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, int height, int width, - uint8_t *oy, int oy_stride, uint8_t *ou, - uint8_t *ov, int ouv_stride, int oheight, - int owidth, int bd); -void av1_highbd_resize_frame444(const uint8_t *const y, int y_stride, - const uint8_t *const u, const uint8_t *const v, - int uv_stride, int height, int width, - uint8_t *oy, int oy_stride, uint8_t *ou, - uint8_t *ov, int ouv_stride, int oheight, - int owidth, int bd); -void av1_resize_and_extend_frame(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, int bd, - const int num_planes); - -void av1_upscale_normative_rows(const AV1_COMMON *cm, const uint8_t *src, - int src_stride, uint8_t *dst, int dst_stride, - int plane, int rows); -void av1_upscale_normative_and_extend_frame(const AV1_COMMON *cm, - const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst); - -YV12_BUFFER_CONFIG *av1_scale_if_required(AV1_COMMON *cm, - YV12_BUFFER_CONFIG *unscaled, - YV12_BUFFER_CONFIG *scaled); - -// Calculates the scaled dimensions from the given original dimensions and the -// resize scale denominator. -void av1_calculate_scaled_size(int *width, int *height, int resize_denom); - -// Similar to above, but calculates scaled dimensions after superres from the -// given original dimensions and superres scale denominator. -void av1_calculate_scaled_superres_size(int *width, int *height, - int superres_denom); - -// Inverse of av1_calculate_scaled_superres_size() above: calculates the -// original dimensions from the given scaled dimensions and the scale -// denominator. -void av1_calculate_unscaled_superres_size(int *width, int *height, int denom); - -void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool); - -// Returns 1 if a superres upscaled frame is scaled and 0 otherwise. -static INLINE int av1_superres_scaled(const AV1_COMMON *cm) { - // Note: for some corner cases (e.g. cm->width of 1), there may be no scaling - // required even though cm->superres_scale_denominator != SCALE_NUMERATOR. - // So, the following check is more accurate. - return !(cm->width == cm->superres_upscaled_width); -} - -#define UPSCALE_NORMATIVE_TAPS 8 -extern const int16_t av1_resize_filter_normative[1 << RS_SUBPEL_BITS] - [UPSCALE_NORMATIVE_TAPS]; - -int32_t av1_get_upscale_convolve_step(int in_length, int out_length); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_RESIZE_H_ diff --git a/third_party/aom/av1/common/restoration.c b/third_party/aom/av1/common/restoration.c deleted file mode 100644 index d276a915b..000000000 --- a/third_party/aom/av1/common/restoration.c +++ /dev/null @@ -1,1556 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - * - */ - -#include <math.h> - -#include "config/aom_config.h" -#include "config/aom_dsp_rtcd.h" -#include "config/aom_scale_rtcd.h" - -#include "aom_mem/aom_mem.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/resize.h" -#include "av1/common/restoration.h" -#include "aom_dsp/aom_dsp_common.h" -#include "aom_mem/aom_mem.h" - -#include "aom_ports/mem.h" - -// The 's' values are calculated based on original 'r' and 'e' values in the -// spec using GenSgrprojVtable(). -// Note: Setting r = 0 skips the filter; with corresponding s = -1 (invalid). -const sgr_params_type sgr_params[SGRPROJ_PARAMS] = { - { { 2, 1 }, { 140, 3236 } }, { { 2, 1 }, { 112, 2158 } }, - { { 2, 1 }, { 93, 1618 } }, { { 2, 1 }, { 80, 1438 } }, - { { 2, 1 }, { 70, 1295 } }, { { 2, 1 }, { 58, 1177 } }, - { { 2, 1 }, { 47, 1079 } }, { { 2, 1 }, { 37, 996 } }, - { { 2, 1 }, { 30, 925 } }, { { 2, 1 }, { 25, 863 } }, - { { 0, 1 }, { -1, 2589 } }, { { 0, 1 }, { -1, 1618 } }, - { { 0, 1 }, { -1, 1177 } }, { { 0, 1 }, { -1, 925 } }, - { { 2, 0 }, { 56, -1 } }, { { 2, 0 }, { 22, -1 } }, -}; - -AV1PixelRect av1_whole_frame_rect(const AV1_COMMON *cm, int is_uv) { - AV1PixelRect rect; - - int ss_x = is_uv && cm->seq_params.subsampling_x; - int ss_y = is_uv && cm->seq_params.subsampling_y; - - rect.top = 0; - rect.bottom = ROUND_POWER_OF_TWO(cm->height, ss_y); - rect.left = 0; - rect.right = ROUND_POWER_OF_TWO(cm->superres_upscaled_width, ss_x); - return rect; -} - -// Count horizontal or vertical units per tile (use a width or height for -// tile_size, respectively). We basically want to divide the tile size by the -// size of a restoration unit. Rather than rounding up unconditionally as you -// might expect, we round to nearest, which models the way a right or bottom -// restoration unit can extend to up to 150% its normal width or height. The -// max with 1 is to deal with tiles that are smaller than half of a restoration -// unit. -int av1_lr_count_units_in_tile(int unit_size, int tile_size) { - return AOMMAX((tile_size + (unit_size >> 1)) / unit_size, 1); -} - -void av1_alloc_restoration_struct(AV1_COMMON *cm, RestorationInfo *rsi, - int is_uv) { - // We need to allocate enough space for restoration units to cover the - // largest tile. Without CONFIG_MAX_TILE, this is always the tile at the - // top-left and we can use av1_get_tile_rect(). With CONFIG_MAX_TILE, we have - // to do the computation ourselves, iterating over the tiles and keeping - // track of the largest width and height, then upscaling. - const AV1PixelRect tile_rect = av1_whole_frame_rect(cm, is_uv); - const int max_tile_w = tile_rect.right - tile_rect.left; - const int max_tile_h = tile_rect.bottom - tile_rect.top; - - // To calculate hpertile and vpertile (horizontal and vertical units per - // tile), we basically want to divide the largest tile width or height by the - // size of a restoration unit. Rather than rounding up unconditionally as you - // might expect, we round to nearest, which models the way a right or bottom - // restoration unit can extend to up to 150% its normal width or height. The - // max with 1 is to deal with tiles that are smaller than half of a - // restoration unit. - const int unit_size = rsi->restoration_unit_size; - const int hpertile = av1_lr_count_units_in_tile(unit_size, max_tile_w); - const int vpertile = av1_lr_count_units_in_tile(unit_size, max_tile_h); - - rsi->units_per_tile = hpertile * vpertile; - rsi->horz_units_per_tile = hpertile; - rsi->vert_units_per_tile = vpertile; - - const int ntiles = 1; - const int nunits = ntiles * rsi->units_per_tile; - - aom_free(rsi->unit_info); - CHECK_MEM_ERROR(cm, rsi->unit_info, - (RestorationUnitInfo *)aom_memalign( - 16, sizeof(*rsi->unit_info) * nunits)); -} - -void av1_free_restoration_struct(RestorationInfo *rst_info) { - aom_free(rst_info->unit_info); - rst_info->unit_info = NULL; -} - -#if 0 -// Pair of values for each sgrproj parameter: -// Index 0 corresponds to r[0], e[0] -// Index 1 corresponds to r[1], e[1] -int sgrproj_mtable[SGRPROJ_PARAMS][2]; - -static void GenSgrprojVtable() { - for (int i = 0; i < SGRPROJ_PARAMS; ++i) { - const sgr_params_type *const params = &sgr_params[i]; - for (int j = 0; j < 2; ++j) { - const int e = params->e[j]; - const int r = params->r[j]; - if (r == 0) { // filter is disabled - sgrproj_mtable[i][j] = -1; // mark invalid - } else { // filter is enabled - const int n = (2 * r + 1) * (2 * r + 1); - const int n2e = n * n * e; - assert(n2e != 0); - sgrproj_mtable[i][j] = (((1 << SGRPROJ_MTABLE_BITS) + n2e / 2) / n2e); - } - } - } -} -#endif - -void av1_loop_restoration_precal() { -#if 0 - GenSgrprojVtable(); -#endif -} - -static void extend_frame_lowbd(uint8_t *data, int width, int height, int stride, - int border_horz, int border_vert) { - uint8_t *data_p; - int i; - for (i = 0; i < height; ++i) { - data_p = data + i * stride; - memset(data_p - border_horz, data_p[0], border_horz); - memset(data_p + width, data_p[width - 1], border_horz); - } - data_p = data - border_horz; - for (i = -border_vert; i < 0; ++i) { - memcpy(data_p + i * stride, data_p, width + 2 * border_horz); - } - for (i = height; i < height + border_vert; ++i) { - memcpy(data_p + i * stride, data_p + (height - 1) * stride, - width + 2 * border_horz); - } -} - -static void extend_frame_highbd(uint16_t *data, int width, int height, - int stride, int border_horz, int border_vert) { - uint16_t *data_p; - int i, j; - for (i = 0; i < height; ++i) { - data_p = data + i * stride; - for (j = -border_horz; j < 0; ++j) data_p[j] = data_p[0]; - for (j = width; j < width + border_horz; ++j) data_p[j] = data_p[width - 1]; - } - data_p = data - border_horz; - for (i = -border_vert; i < 0; ++i) { - memcpy(data_p + i * stride, data_p, - (width + 2 * border_horz) * sizeof(uint16_t)); - } - for (i = height; i < height + border_vert; ++i) { - memcpy(data_p + i * stride, data_p + (height - 1) * stride, - (width + 2 * border_horz) * sizeof(uint16_t)); - } -} - -void extend_frame(uint8_t *data, int width, int height, int stride, - int border_horz, int border_vert, int highbd) { - if (highbd) - extend_frame_highbd(CONVERT_TO_SHORTPTR(data), width, height, stride, - border_horz, border_vert); - else - extend_frame_lowbd(data, width, height, stride, border_horz, border_vert); -} - -static void copy_tile_lowbd(int width, int height, const uint8_t *src, - int src_stride, uint8_t *dst, int dst_stride) { - for (int i = 0; i < height; ++i) - memcpy(dst + i * dst_stride, src + i * src_stride, width); -} - -static void copy_tile_highbd(int width, int height, const uint16_t *src, - int src_stride, uint16_t *dst, int dst_stride) { - for (int i = 0; i < height; ++i) - memcpy(dst + i * dst_stride, src + i * src_stride, width * sizeof(*dst)); -} - -static void copy_tile(int width, int height, const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, int highbd) { - if (highbd) - copy_tile_highbd(width, height, CONVERT_TO_SHORTPTR(src), src_stride, - CONVERT_TO_SHORTPTR(dst), dst_stride); - else - copy_tile_lowbd(width, height, src, src_stride, dst, dst_stride); -} - -#define REAL_PTR(hbd, d) ((hbd) ? (uint8_t *)CONVERT_TO_SHORTPTR(d) : (d)) - -// With striped loop restoration, the filtering for each 64-pixel stripe gets -// most of its input from the output of CDEF (stored in data8), but we need to -// fill out a border of 3 pixels above/below the stripe according to the -// following -// rules: -// -// * At a frame boundary, we copy the outermost row of CDEF pixels three times. -// This extension is done by a call to extend_frame() at the start of the loop -// restoration process, so the value of copy_above/copy_below doesn't strictly -// matter. -// However, by setting *copy_above = *copy_below = 1 whenever loop filtering -// across tiles is disabled, we can allow -// {setup,restore}_processing_stripe_boundary to assume that the top/bottom -// data has always been copied, simplifying the behaviour at the left and -// right edges of tiles. -// -// * If we're at a tile boundary and loop filtering across tiles is enabled, -// then there is a logical stripe which is 64 pixels high, but which is split -// into an 8px high and a 56px high stripe so that the processing (and -// coefficient set usage) can be aligned to tiles. -// In this case, we use the 3 rows of CDEF output across the boundary for -// context; this corresponds to leaving the frame buffer as-is. -// -// * If we're at a tile boundary and loop filtering across tiles is disabled, -// then we take the outermost row of CDEF pixels *within the current tile* -// and copy it three times. Thus we behave exactly as if the tile were a full -// frame. -// -// * Otherwise, we're at a stripe boundary within a tile. In that case, we -// take 2 rows of deblocked pixels and extend them to 3 rows of context. -// -// The distinction between the latter two cases is handled by the -// av1_loop_restoration_save_boundary_lines() function, so here we just need -// to decide if we're overwriting the above/below boundary pixels or not. -static void get_stripe_boundary_info(const RestorationTileLimits *limits, - const AV1PixelRect *tile_rect, int ss_y, - int *copy_above, int *copy_below) { - *copy_above = 1; - *copy_below = 1; - - const int full_stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y; - const int runit_offset = RESTORATION_UNIT_OFFSET >> ss_y; - - const int first_stripe_in_tile = (limits->v_start == tile_rect->top); - const int this_stripe_height = - full_stripe_height - (first_stripe_in_tile ? runit_offset : 0); - const int last_stripe_in_tile = - (limits->v_start + this_stripe_height >= tile_rect->bottom); - - if (first_stripe_in_tile) *copy_above = 0; - if (last_stripe_in_tile) *copy_below = 0; -} - -// Overwrite the border pixels around a processing stripe so that the conditions -// listed above get_stripe_boundary_info() are preserved. -// We save the pixels which get overwritten into a temporary buffer, so that -// they can be restored by restore_processing_stripe_boundary() after we've -// processed the stripe. -// -// limits gives the rectangular limits of the remaining stripes for the current -// restoration unit. rsb is the stored stripe boundaries (taken from either -// deblock or CDEF output as necessary). -// -// tile_rect is the limits of the current tile and tile_stripe0 is the index of -// the first stripe in this tile (needed to convert the tile-relative stripe -// index we get from limits into something we can look up in rsb). -static void setup_processing_stripe_boundary( - const RestorationTileLimits *limits, const RestorationStripeBoundaries *rsb, - int rsb_row, int use_highbd, int h, uint8_t *data8, int data_stride, - RestorationLineBuffers *rlbs, int copy_above, int copy_below, int opt) { - // Offsets within the line buffers. The buffer logically starts at column - // -RESTORATION_EXTRA_HORZ so the 1st column (at x0 - RESTORATION_EXTRA_HORZ) - // has column x0 in the buffer. - const int buf_stride = rsb->stripe_boundary_stride; - const int buf_x0_off = limits->h_start; - const int line_width = - (limits->h_end - limits->h_start) + 2 * RESTORATION_EXTRA_HORZ; - const int line_size = line_width << use_highbd; - - const int data_x0 = limits->h_start - RESTORATION_EXTRA_HORZ; - - // Replace RESTORATION_BORDER pixels above the top of the stripe - // We expand RESTORATION_CTX_VERT=2 lines from rsb->stripe_boundary_above - // to fill RESTORATION_BORDER=3 lines of above pixels. This is done by - // duplicating the topmost of the 2 lines (see the AOMMAX call when - // calculating src_row, which gets the values 0, 0, 1 for i = -3, -2, -1). - // - // Special case: If we're at the top of a tile, which isn't on the topmost - // tile row, and we're allowed to loop filter across tiles, then we have a - // logical 64-pixel-high stripe which has been split into an 8-pixel high - // stripe and a 56-pixel high stripe (the current one). So, in this case, - // we want to leave the boundary alone! - if (!opt) { - if (copy_above) { - uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride; - - for (int i = -RESTORATION_BORDER; i < 0; ++i) { - const int buf_row = rsb_row + AOMMAX(i + RESTORATION_CTX_VERT, 0); - const int buf_off = buf_x0_off + buf_row * buf_stride; - const uint8_t *buf = - rsb->stripe_boundary_above + (buf_off << use_highbd); - uint8_t *dst8 = data8_tl + i * data_stride; - // Save old pixels, then replace with data from stripe_boundary_above - memcpy(rlbs->tmp_save_above[i + RESTORATION_BORDER], - REAL_PTR(use_highbd, dst8), line_size); - memcpy(REAL_PTR(use_highbd, dst8), buf, line_size); - } - } - - // Replace RESTORATION_BORDER pixels below the bottom of the stripe. - // The second buffer row is repeated, so src_row gets the values 0, 1, 1 - // for i = 0, 1, 2. - if (copy_below) { - const int stripe_end = limits->v_start + h; - uint8_t *data8_bl = data8 + data_x0 + stripe_end * data_stride; - - for (int i = 0; i < RESTORATION_BORDER; ++i) { - const int buf_row = rsb_row + AOMMIN(i, RESTORATION_CTX_VERT - 1); - const int buf_off = buf_x0_off + buf_row * buf_stride; - const uint8_t *src = - rsb->stripe_boundary_below + (buf_off << use_highbd); - - uint8_t *dst8 = data8_bl + i * data_stride; - // Save old pixels, then replace with data from stripe_boundary_below - memcpy(rlbs->tmp_save_below[i], REAL_PTR(use_highbd, dst8), line_size); - memcpy(REAL_PTR(use_highbd, dst8), src, line_size); - } - } - } else { - if (copy_above) { - uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride; - - // Only save and overwrite i=-RESTORATION_BORDER line. - uint8_t *dst8 = data8_tl + (-RESTORATION_BORDER) * data_stride; - // Save old pixels, then replace with data from stripe_boundary_above - memcpy(rlbs->tmp_save_above[0], REAL_PTR(use_highbd, dst8), line_size); - memcpy(REAL_PTR(use_highbd, dst8), - REAL_PTR(use_highbd, - data8_tl + (-RESTORATION_BORDER + 1) * data_stride), - line_size); - } - - if (copy_below) { - const int stripe_end = limits->v_start + h; - uint8_t *data8_bl = data8 + data_x0 + stripe_end * data_stride; - - // Only save and overwrite i=2 line. - uint8_t *dst8 = data8_bl + 2 * data_stride; - // Save old pixels, then replace with data from stripe_boundary_below - memcpy(rlbs->tmp_save_below[2], REAL_PTR(use_highbd, dst8), line_size); - memcpy(REAL_PTR(use_highbd, dst8), - REAL_PTR(use_highbd, data8_bl + (2 - 1) * data_stride), line_size); - } - } -} - -// This function restores the boundary lines modified by -// setup_processing_stripe_boundary. -// -// Note: We need to be careful when handling the corners of the processing -// unit, because (eg.) the top-left corner is considered to be part of -// both the left and top borders. This means that, depending on the -// loop_filter_across_tiles_enabled flag, the corner pixels might get -// overwritten twice, once as part of the "top" border and once as part -// of the "left" border (or similar for other corners). -// -// Everything works out fine as long as we make sure to reverse the order -// when restoring, ie. we need to restore the left/right borders followed -// by the top/bottom borders. -static void restore_processing_stripe_boundary( - const RestorationTileLimits *limits, const RestorationLineBuffers *rlbs, - int use_highbd, int h, uint8_t *data8, int data_stride, int copy_above, - int copy_below, int opt) { - const int line_width = - (limits->h_end - limits->h_start) + 2 * RESTORATION_EXTRA_HORZ; - const int line_size = line_width << use_highbd; - - const int data_x0 = limits->h_start - RESTORATION_EXTRA_HORZ; - - if (!opt) { - if (copy_above) { - uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride; - for (int i = -RESTORATION_BORDER; i < 0; ++i) { - uint8_t *dst8 = data8_tl + i * data_stride; - memcpy(REAL_PTR(use_highbd, dst8), - rlbs->tmp_save_above[i + RESTORATION_BORDER], line_size); - } - } - - if (copy_below) { - const int stripe_bottom = limits->v_start + h; - uint8_t *data8_bl = data8 + data_x0 + stripe_bottom * data_stride; - - for (int i = 0; i < RESTORATION_BORDER; ++i) { - if (stripe_bottom + i >= limits->v_end + RESTORATION_BORDER) break; - - uint8_t *dst8 = data8_bl + i * data_stride; - memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_below[i], line_size); - } - } - } else { - if (copy_above) { - uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride; - - // Only restore i=-RESTORATION_BORDER line. - uint8_t *dst8 = data8_tl + (-RESTORATION_BORDER) * data_stride; - memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_above[0], line_size); - } - - if (copy_below) { - const int stripe_bottom = limits->v_start + h; - uint8_t *data8_bl = data8 + data_x0 + stripe_bottom * data_stride; - - // Only restore i=2 line. - if (stripe_bottom + 2 < limits->v_end + RESTORATION_BORDER) { - uint8_t *dst8 = data8_bl + 2 * data_stride; - memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_below[2], line_size); - } - } - } -} - -static void wiener_filter_stripe(const RestorationUnitInfo *rui, - int stripe_width, int stripe_height, - int procunit_width, const uint8_t *src, - int src_stride, uint8_t *dst, int dst_stride, - int32_t *tmpbuf, int bit_depth) { - (void)tmpbuf; - (void)bit_depth; - assert(bit_depth == 8); - const ConvolveParams conv_params = get_conv_params_wiener(8); - - for (int j = 0; j < stripe_width; j += procunit_width) { - int w = AOMMIN(procunit_width, (stripe_width - j + 15) & ~15); - const uint8_t *src_p = src + j; - uint8_t *dst_p = dst + j; - av1_wiener_convolve_add_src( - src_p, src_stride, dst_p, dst_stride, rui->wiener_info.hfilter, 16, - rui->wiener_info.vfilter, 16, w, stripe_height, &conv_params); - } -} - -/* Calculate windowed sums (if sqr=0) or sums of squares (if sqr=1) - over the input. The window is of size (2r + 1)x(2r + 1), and we - specialize to r = 1, 2, 3. A default function is used for r > 3. - - Each loop follows the same format: We keep a window's worth of input - in individual variables and select data out of that as appropriate. -*/ -static void boxsum1(int32_t *src, int width, int height, int src_stride, - int sqr, int32_t *dst, int dst_stride) { - int i, j, a, b, c; - assert(width > 2 * SGRPROJ_BORDER_HORZ); - assert(height > 2 * SGRPROJ_BORDER_VERT); - - // Vertical sum over 3-pixel regions, from src into dst. - if (!sqr) { - for (j = 0; j < width; ++j) { - a = src[j]; - b = src[src_stride + j]; - c = src[2 * src_stride + j]; - - dst[j] = a + b; - for (i = 1; i < height - 2; ++i) { - // Loop invariant: At the start of each iteration, - // a = src[(i - 1) * src_stride + j] - // b = src[(i ) * src_stride + j] - // c = src[(i + 1) * src_stride + j] - dst[i * dst_stride + j] = a + b + c; - a = b; - b = c; - c = src[(i + 2) * src_stride + j]; - } - dst[i * dst_stride + j] = a + b + c; - dst[(i + 1) * dst_stride + j] = b + c; - } - } else { - for (j = 0; j < width; ++j) { - a = src[j] * src[j]; - b = src[src_stride + j] * src[src_stride + j]; - c = src[2 * src_stride + j] * src[2 * src_stride + j]; - - dst[j] = a + b; - for (i = 1; i < height - 2; ++i) { - dst[i * dst_stride + j] = a + b + c; - a = b; - b = c; - c = src[(i + 2) * src_stride + j] * src[(i + 2) * src_stride + j]; - } - dst[i * dst_stride + j] = a + b + c; - dst[(i + 1) * dst_stride + j] = b + c; - } - } - - // Horizontal sum over 3-pixel regions of dst - for (i = 0; i < height; ++i) { - a = dst[i * dst_stride]; - b = dst[i * dst_stride + 1]; - c = dst[i * dst_stride + 2]; - - dst[i * dst_stride] = a + b; - for (j = 1; j < width - 2; ++j) { - // Loop invariant: At the start of each iteration, - // a = src[i * src_stride + (j - 1)] - // b = src[i * src_stride + (j )] - // c = src[i * src_stride + (j + 1)] - dst[i * dst_stride + j] = a + b + c; - a = b; - b = c; - c = dst[i * dst_stride + (j + 2)]; - } - dst[i * dst_stride + j] = a + b + c; - dst[i * dst_stride + (j + 1)] = b + c; - } -} - -static void boxsum2(int32_t *src, int width, int height, int src_stride, - int sqr, int32_t *dst, int dst_stride) { - int i, j, a, b, c, d, e; - assert(width > 2 * SGRPROJ_BORDER_HORZ); - assert(height > 2 * SGRPROJ_BORDER_VERT); - - // Vertical sum over 5-pixel regions, from src into dst. - if (!sqr) { - for (j = 0; j < width; ++j) { - a = src[j]; - b = src[src_stride + j]; - c = src[2 * src_stride + j]; - d = src[3 * src_stride + j]; - e = src[4 * src_stride + j]; - - dst[j] = a + b + c; - dst[dst_stride + j] = a + b + c + d; - for (i = 2; i < height - 3; ++i) { - // Loop invariant: At the start of each iteration, - // a = src[(i - 2) * src_stride + j] - // b = src[(i - 1) * src_stride + j] - // c = src[(i ) * src_stride + j] - // d = src[(i + 1) * src_stride + j] - // e = src[(i + 2) * src_stride + j] - dst[i * dst_stride + j] = a + b + c + d + e; - a = b; - b = c; - c = d; - d = e; - e = src[(i + 3) * src_stride + j]; - } - dst[i * dst_stride + j] = a + b + c + d + e; - dst[(i + 1) * dst_stride + j] = b + c + d + e; - dst[(i + 2) * dst_stride + j] = c + d + e; - } - } else { - for (j = 0; j < width; ++j) { - a = src[j] * src[j]; - b = src[src_stride + j] * src[src_stride + j]; - c = src[2 * src_stride + j] * src[2 * src_stride + j]; - d = src[3 * src_stride + j] * src[3 * src_stride + j]; - e = src[4 * src_stride + j] * src[4 * src_stride + j]; - - dst[j] = a + b + c; - dst[dst_stride + j] = a + b + c + d; - for (i = 2; i < height - 3; ++i) { - dst[i * dst_stride + j] = a + b + c + d + e; - a = b; - b = c; - c = d; - d = e; - e = src[(i + 3) * src_stride + j] * src[(i + 3) * src_stride + j]; - } - dst[i * dst_stride + j] = a + b + c + d + e; - dst[(i + 1) * dst_stride + j] = b + c + d + e; - dst[(i + 2) * dst_stride + j] = c + d + e; - } - } - - // Horizontal sum over 5-pixel regions of dst - for (i = 0; i < height; ++i) { - a = dst[i * dst_stride]; - b = dst[i * dst_stride + 1]; - c = dst[i * dst_stride + 2]; - d = dst[i * dst_stride + 3]; - e = dst[i * dst_stride + 4]; - - dst[i * dst_stride] = a + b + c; - dst[i * dst_stride + 1] = a + b + c + d; - for (j = 2; j < width - 3; ++j) { - // Loop invariant: At the start of each iteration, - // a = src[i * src_stride + (j - 2)] - // b = src[i * src_stride + (j - 1)] - // c = src[i * src_stride + (j )] - // d = src[i * src_stride + (j + 1)] - // e = src[i * src_stride + (j + 2)] - dst[i * dst_stride + j] = a + b + c + d + e; - a = b; - b = c; - c = d; - d = e; - e = dst[i * dst_stride + (j + 3)]; - } - dst[i * dst_stride + j] = a + b + c + d + e; - dst[i * dst_stride + (j + 1)] = b + c + d + e; - dst[i * dst_stride + (j + 2)] = c + d + e; - } -} - -static void boxsum(int32_t *src, int width, int height, int src_stride, int r, - int sqr, int32_t *dst, int dst_stride) { - if (r == 1) - boxsum1(src, width, height, src_stride, sqr, dst, dst_stride); - else if (r == 2) - boxsum2(src, width, height, src_stride, sqr, dst, dst_stride); - else - assert(0 && "Invalid value of r in self-guided filter"); -} - -void decode_xq(const int *xqd, int *xq, const sgr_params_type *params) { - if (params->r[0] == 0) { - xq[0] = 0; - xq[1] = (1 << SGRPROJ_PRJ_BITS) - xqd[1]; - } else if (params->r[1] == 0) { - xq[0] = xqd[0]; - xq[1] = 0; - } else { - xq[0] = xqd[0]; - xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0] - xqd[1]; - } -} - -const int32_t x_by_xplus1[256] = { - // Special case: Map 0 -> 1 (corresponding to a value of 1/256) - // instead of 0. See comments in selfguided_restoration_internal() for why - 1, 128, 171, 192, 205, 213, 219, 224, 228, 230, 233, 235, 236, 238, 239, - 240, 241, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 247, 247, - 248, 248, 248, 248, 249, 249, 249, 249, 249, 250, 250, 250, 250, 250, 250, - 250, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 252, 252, 252, 252, - 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 253, 253, - 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, - 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 254, 254, 254, - 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, - 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, - 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, - 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, - 254, 254, 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 256, -}; - -const int32_t one_by_x[MAX_NELEM] = { - 4096, 2048, 1365, 1024, 819, 683, 585, 512, 455, 410, 372, 341, 315, - 293, 273, 256, 241, 228, 216, 205, 195, 186, 178, 171, 164, -}; - -static void calculate_intermediate_result(int32_t *dgd, int width, int height, - int dgd_stride, int bit_depth, - int sgr_params_idx, int radius_idx, - int pass, int32_t *A, int32_t *B) { - const sgr_params_type *const params = &sgr_params[sgr_params_idx]; - const int r = params->r[radius_idx]; - const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ; - const int height_ext = height + 2 * SGRPROJ_BORDER_VERT; - // Adjusting the stride of A and B here appears to avoid bad cache effects, - // leading to a significant speed improvement. - // We also align the stride to a multiple of 16 bytes, for consistency - // with the SIMD version of this function. - int buf_stride = ((width_ext + 3) & ~3) + 16; - const int step = pass == 0 ? 1 : 2; - int i, j; - - assert(r <= MAX_RADIUS && "Need MAX_RADIUS >= r"); - assert(r <= SGRPROJ_BORDER_VERT - 1 && r <= SGRPROJ_BORDER_HORZ - 1 && - "Need SGRPROJ_BORDER_* >= r+1"); - - boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ, - width_ext, height_ext, dgd_stride, r, 0, B, buf_stride); - boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ, - width_ext, height_ext, dgd_stride, r, 1, A, buf_stride); - A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ; - B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ; - // Calculate the eventual A[] and B[] arrays. Include a 1-pixel border - ie, - // for a 64x64 processing unit, we calculate 66x66 pixels of A[] and B[]. - for (i = -1; i < height + 1; i += step) { - for (j = -1; j < width + 1; ++j) { - const int k = i * buf_stride + j; - const int n = (2 * r + 1) * (2 * r + 1); - - // a < 2^16 * n < 2^22 regardless of bit depth - uint32_t a = ROUND_POWER_OF_TWO(A[k], 2 * (bit_depth - 8)); - // b < 2^8 * n < 2^14 regardless of bit depth - uint32_t b = ROUND_POWER_OF_TWO(B[k], bit_depth - 8); - - // Each term in calculating p = a * n - b * b is < 2^16 * n^2 < 2^28, - // and p itself satisfies p < 2^14 * n^2 < 2^26. - // This bound on p is due to: - // https://en.wikipedia.org/wiki/Popoviciu's_inequality_on_variances - // - // Note: Sometimes, in high bit depth, we can end up with a*n < b*b. - // This is an artefact of rounding, and can only happen if all pixels - // are (almost) identical, so in this case we saturate to p=0. - uint32_t p = (a * n < b * b) ? 0 : a * n - b * b; - - const uint32_t s = params->s[radius_idx]; - - // p * s < (2^14 * n^2) * round(2^20 / n^2 eps) < 2^34 / eps < 2^32 - // as long as eps >= 4. So p * s fits into a uint32_t, and z < 2^12 - // (this holds even after accounting for the rounding in s) - const uint32_t z = ROUND_POWER_OF_TWO(p * s, SGRPROJ_MTABLE_BITS); - - // Note: We have to be quite careful about the value of A[k]. - // This is used as a blend factor between individual pixel values and the - // local mean. So it logically has a range of [0, 256], including both - // endpoints. - // - // This is a pain for hardware, as we'd like something which can be stored - // in exactly 8 bits. - // Further, in the calculation of B[k] below, if z == 0 and r == 2, - // then A[k] "should be" 0. But then we can end up setting B[k] to a value - // slightly above 2^(8 + bit depth), due to rounding in the value of - // one_by_x[25-1]. - // - // Thus we saturate so that, when z == 0, A[k] is set to 1 instead of 0. - // This fixes the above issues (256 - A[k] fits in a uint8, and we can't - // overflow), without significantly affecting the final result: z == 0 - // implies that the image is essentially "flat", so the local mean and - // individual pixel values are very similar. - // - // Note that saturating on the other side, ie. requring A[k] <= 255, - // would be a bad idea, as that corresponds to the case where the image - // is very variable, when we want to preserve the local pixel value as - // much as possible. - A[k] = x_by_xplus1[AOMMIN(z, 255)]; // in range [1, 256] - - // SGRPROJ_SGR - A[k] < 2^8 (from above), B[k] < 2^(bit_depth) * n, - // one_by_x[n - 1] = round(2^12 / n) - // => the product here is < 2^(20 + bit_depth) <= 2^32, - // and B[k] is set to a value < 2^(8 + bit depth) - // This holds even with the rounding in one_by_x and in the overall - // result, as long as SGRPROJ_SGR - A[k] is strictly less than 2^8. - B[k] = (int32_t)ROUND_POWER_OF_TWO((uint32_t)(SGRPROJ_SGR - A[k]) * - (uint32_t)B[k] * - (uint32_t)one_by_x[n - 1], - SGRPROJ_RECIP_BITS); - } - } -} - -static void selfguided_restoration_fast_internal( - int32_t *dgd, int width, int height, int dgd_stride, int32_t *dst, - int dst_stride, int bit_depth, int sgr_params_idx, int radius_idx) { - const sgr_params_type *const params = &sgr_params[sgr_params_idx]; - const int r = params->r[radius_idx]; - const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ; - // Adjusting the stride of A and B here appears to avoid bad cache effects, - // leading to a significant speed improvement. - // We also align the stride to a multiple of 16 bytes, for consistency - // with the SIMD version of this function. - int buf_stride = ((width_ext + 3) & ~3) + 16; - int32_t A_[RESTORATION_PROC_UNIT_PELS]; - int32_t B_[RESTORATION_PROC_UNIT_PELS]; - int32_t *A = A_; - int32_t *B = B_; - int i, j; - calculate_intermediate_result(dgd, width, height, dgd_stride, bit_depth, - sgr_params_idx, radius_idx, 1, A, B); - A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ; - B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ; - - // Use the A[] and B[] arrays to calculate the filtered image - (void)r; - assert(r == 2); - for (i = 0; i < height; ++i) { - if (!(i & 1)) { // even row - for (j = 0; j < width; ++j) { - const int k = i * buf_stride + j; - const int l = i * dgd_stride + j; - const int m = i * dst_stride + j; - const int nb = 5; - const int32_t a = (A[k - buf_stride] + A[k + buf_stride]) * 6 + - (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] + - A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) * - 5; - const int32_t b = (B[k - buf_stride] + B[k + buf_stride]) * 6 + - (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] + - B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) * - 5; - const int32_t v = a * dgd[l] + b; - dst[m] = - ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); - } - } else { // odd row - for (j = 0; j < width; ++j) { - const int k = i * buf_stride + j; - const int l = i * dgd_stride + j; - const int m = i * dst_stride + j; - const int nb = 4; - const int32_t a = A[k] * 6 + (A[k - 1] + A[k + 1]) * 5; - const int32_t b = B[k] * 6 + (B[k - 1] + B[k + 1]) * 5; - const int32_t v = a * dgd[l] + b; - dst[m] = - ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); - } - } - } -} - -static void selfguided_restoration_internal(int32_t *dgd, int width, int height, - int dgd_stride, int32_t *dst, - int dst_stride, int bit_depth, - int sgr_params_idx, - int radius_idx) { - const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ; - // Adjusting the stride of A and B here appears to avoid bad cache effects, - // leading to a significant speed improvement. - // We also align the stride to a multiple of 16 bytes, for consistency - // with the SIMD version of this function. - int buf_stride = ((width_ext + 3) & ~3) + 16; - int32_t A_[RESTORATION_PROC_UNIT_PELS]; - int32_t B_[RESTORATION_PROC_UNIT_PELS]; - int32_t *A = A_; - int32_t *B = B_; - int i, j; - calculate_intermediate_result(dgd, width, height, dgd_stride, bit_depth, - sgr_params_idx, radius_idx, 0, A, B); - A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ; - B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ; - - // Use the A[] and B[] arrays to calculate the filtered image - for (i = 0; i < height; ++i) { - for (j = 0; j < width; ++j) { - const int k = i * buf_stride + j; - const int l = i * dgd_stride + j; - const int m = i * dst_stride + j; - const int nb = 5; - const int32_t a = - (A[k] + A[k - 1] + A[k + 1] + A[k - buf_stride] + A[k + buf_stride]) * - 4 + - (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] + - A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) * - 3; - const int32_t b = - (B[k] + B[k - 1] + B[k + 1] + B[k - buf_stride] + B[k + buf_stride]) * - 4 + - (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] + - B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) * - 3; - const int32_t v = a * dgd[l] + b; - dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); - } - } -} - -int av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height, - int dgd_stride, int32_t *flt0, int32_t *flt1, - int flt_stride, int sgr_params_idx, - int bit_depth, int highbd) { - int32_t dgd32_[RESTORATION_PROC_UNIT_PELS]; - const int dgd32_stride = width + 2 * SGRPROJ_BORDER_HORZ; - int32_t *dgd32 = - dgd32_ + dgd32_stride * SGRPROJ_BORDER_VERT + SGRPROJ_BORDER_HORZ; - - if (highbd) { - const uint16_t *dgd16 = CONVERT_TO_SHORTPTR(dgd8); - for (int i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) { - for (int j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) { - dgd32[i * dgd32_stride + j] = dgd16[i * dgd_stride + j]; - } - } - } else { - for (int i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) { - for (int j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) { - dgd32[i * dgd32_stride + j] = dgd8[i * dgd_stride + j]; - } - } - } - - const sgr_params_type *const params = &sgr_params[sgr_params_idx]; - // If params->r == 0 we skip the corresponding filter. We only allow one of - // the radii to be 0, as having both equal to 0 would be equivalent to - // skipping SGR entirely. - assert(!(params->r[0] == 0 && params->r[1] == 0)); - - if (params->r[0] > 0) - selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride, - flt0, flt_stride, bit_depth, - sgr_params_idx, 0); - if (params->r[1] > 0) - selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt1, - flt_stride, bit_depth, sgr_params_idx, 1); - return 0; -} - -void apply_selfguided_restoration_c(const uint8_t *dat8, int width, int height, - int stride, int eps, const int *xqd, - uint8_t *dst8, int dst_stride, - int32_t *tmpbuf, int bit_depth, - int highbd) { - int32_t *flt0 = tmpbuf; - int32_t *flt1 = flt0 + RESTORATION_UNITPELS_MAX; - assert(width * height <= RESTORATION_UNITPELS_MAX); - - const int ret = av1_selfguided_restoration_c( - dat8, width, height, stride, flt0, flt1, width, eps, bit_depth, highbd); - (void)ret; - assert(!ret); - const sgr_params_type *const params = &sgr_params[eps]; - int xq[2]; - decode_xq(xqd, xq, params); - for (int i = 0; i < height; ++i) { - for (int j = 0; j < width; ++j) { - const int k = i * width + j; - uint8_t *dst8ij = dst8 + i * dst_stride + j; - const uint8_t *dat8ij = dat8 + i * stride + j; - - const uint16_t pre_u = highbd ? *CONVERT_TO_SHORTPTR(dat8ij) : *dat8ij; - const int32_t u = (int32_t)pre_u << SGRPROJ_RST_BITS; - int32_t v = u << SGRPROJ_PRJ_BITS; - // If params->r == 0 then we skipped the filtering in - // av1_selfguided_restoration_c, i.e. flt[k] == u - if (params->r[0] > 0) v += xq[0] * (flt0[k] - u); - if (params->r[1] > 0) v += xq[1] * (flt1[k] - u); - const int16_t w = - (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS); - - const uint16_t out = clip_pixel_highbd(w, bit_depth); - if (highbd) - *CONVERT_TO_SHORTPTR(dst8ij) = out; - else - *dst8ij = (uint8_t)out; - } - } -} - -static void sgrproj_filter_stripe(const RestorationUnitInfo *rui, - int stripe_width, int stripe_height, - int procunit_width, const uint8_t *src, - int src_stride, uint8_t *dst, int dst_stride, - int32_t *tmpbuf, int bit_depth) { - (void)bit_depth; - assert(bit_depth == 8); - - for (int j = 0; j < stripe_width; j += procunit_width) { - int w = AOMMIN(procunit_width, stripe_width - j); - apply_selfguided_restoration(src + j, w, stripe_height, src_stride, - rui->sgrproj_info.ep, rui->sgrproj_info.xqd, - dst + j, dst_stride, tmpbuf, bit_depth, 0); - } -} - -static void wiener_filter_stripe_highbd(const RestorationUnitInfo *rui, - int stripe_width, int stripe_height, - int procunit_width, const uint8_t *src8, - int src_stride, uint8_t *dst8, - int dst_stride, int32_t *tmpbuf, - int bit_depth) { - (void)tmpbuf; - const ConvolveParams conv_params = get_conv_params_wiener(bit_depth); - - for (int j = 0; j < stripe_width; j += procunit_width) { - int w = AOMMIN(procunit_width, (stripe_width - j + 15) & ~15); - const uint8_t *src8_p = src8 + j; - uint8_t *dst8_p = dst8 + j; - av1_highbd_wiener_convolve_add_src(src8_p, src_stride, dst8_p, dst_stride, - rui->wiener_info.hfilter, 16, - rui->wiener_info.vfilter, 16, w, - stripe_height, &conv_params, bit_depth); - } -} - -static void sgrproj_filter_stripe_highbd(const RestorationUnitInfo *rui, - int stripe_width, int stripe_height, - int procunit_width, - const uint8_t *src8, int src_stride, - uint8_t *dst8, int dst_stride, - int32_t *tmpbuf, int bit_depth) { - for (int j = 0; j < stripe_width; j += procunit_width) { - int w = AOMMIN(procunit_width, stripe_width - j); - apply_selfguided_restoration(src8 + j, w, stripe_height, src_stride, - rui->sgrproj_info.ep, rui->sgrproj_info.xqd, - dst8 + j, dst_stride, tmpbuf, bit_depth, 1); - } -} - -typedef void (*stripe_filter_fun)(const RestorationUnitInfo *rui, - int stripe_width, int stripe_height, - int procunit_width, const uint8_t *src, - int src_stride, uint8_t *dst, int dst_stride, - int32_t *tmpbuf, int bit_depth); - -#define NUM_STRIPE_FILTERS 4 - -static const stripe_filter_fun stripe_filters[NUM_STRIPE_FILTERS] = { - wiener_filter_stripe, sgrproj_filter_stripe, wiener_filter_stripe_highbd, - sgrproj_filter_stripe_highbd -}; - -// Filter one restoration unit -void av1_loop_restoration_filter_unit( - const RestorationTileLimits *limits, const RestorationUnitInfo *rui, - const RestorationStripeBoundaries *rsb, RestorationLineBuffers *rlbs, - const AV1PixelRect *tile_rect, int tile_stripe0, int ss_x, int ss_y, - int highbd, int bit_depth, uint8_t *data8, int stride, uint8_t *dst8, - int dst_stride, int32_t *tmpbuf, int optimized_lr) { - RestorationType unit_rtype = rui->restoration_type; - - int unit_h = limits->v_end - limits->v_start; - int unit_w = limits->h_end - limits->h_start; - uint8_t *data8_tl = data8 + limits->v_start * stride + limits->h_start; - uint8_t *dst8_tl = dst8 + limits->v_start * dst_stride + limits->h_start; - - if (unit_rtype == RESTORE_NONE) { - copy_tile(unit_w, unit_h, data8_tl, stride, dst8_tl, dst_stride, highbd); - return; - } - - const int filter_idx = 2 * highbd + (unit_rtype == RESTORE_SGRPROJ); - assert(filter_idx < NUM_STRIPE_FILTERS); - const stripe_filter_fun stripe_filter = stripe_filters[filter_idx]; - - const int procunit_width = RESTORATION_PROC_UNIT_SIZE >> ss_x; - - // Convolve the whole tile one stripe at a time - RestorationTileLimits remaining_stripes = *limits; - int i = 0; - while (i < unit_h) { - int copy_above, copy_below; - remaining_stripes.v_start = limits->v_start + i; - - get_stripe_boundary_info(&remaining_stripes, tile_rect, ss_y, ©_above, - ©_below); - - const int full_stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y; - const int runit_offset = RESTORATION_UNIT_OFFSET >> ss_y; - - // Work out where this stripe's boundaries are within - // rsb->stripe_boundary_{above,below} - const int tile_stripe = - (remaining_stripes.v_start - tile_rect->top + runit_offset) / - full_stripe_height; - const int frame_stripe = tile_stripe0 + tile_stripe; - const int rsb_row = RESTORATION_CTX_VERT * frame_stripe; - - // Calculate this stripe's height, based on two rules: - // * The topmost stripe in each tile is 8 luma pixels shorter than usual. - // * We can't extend past the end of the current restoration unit - const int nominal_stripe_height = - full_stripe_height - ((tile_stripe == 0) ? runit_offset : 0); - const int h = AOMMIN(nominal_stripe_height, - remaining_stripes.v_end - remaining_stripes.v_start); - - setup_processing_stripe_boundary(&remaining_stripes, rsb, rsb_row, highbd, - h, data8, stride, rlbs, copy_above, - copy_below, optimized_lr); - - stripe_filter(rui, unit_w, h, procunit_width, data8_tl + i * stride, stride, - dst8_tl + i * dst_stride, dst_stride, tmpbuf, bit_depth); - - restore_processing_stripe_boundary(&remaining_stripes, rlbs, highbd, h, - data8, stride, copy_above, copy_below, - optimized_lr); - - i += h; - } -} - -static void filter_frame_on_tile(int tile_row, int tile_col, void *priv, - AV1_COMMON *cm) { - (void)tile_col; - FilterFrameCtxt *ctxt = (FilterFrameCtxt *)priv; - ctxt->tile_stripe0 = (tile_row == 0) ? 0 : cm->rst_end_stripe[tile_row - 1]; -} - -static void filter_frame_on_unit(const RestorationTileLimits *limits, - const AV1PixelRect *tile_rect, - int rest_unit_idx, void *priv, int32_t *tmpbuf, - RestorationLineBuffers *rlbs) { - FilterFrameCtxt *ctxt = (FilterFrameCtxt *)priv; - const RestorationInfo *rsi = ctxt->rsi; - - av1_loop_restoration_filter_unit( - limits, &rsi->unit_info[rest_unit_idx], &rsi->boundaries, rlbs, tile_rect, - ctxt->tile_stripe0, ctxt->ss_x, ctxt->ss_y, ctxt->highbd, ctxt->bit_depth, - ctxt->data8, ctxt->data_stride, ctxt->dst8, ctxt->dst_stride, tmpbuf, - rsi->optimized_lr); -} - -void av1_loop_restoration_filter_frame_init(AV1LrStruct *lr_ctxt, - YV12_BUFFER_CONFIG *frame, - AV1_COMMON *cm, int optimized_lr, - int num_planes) { - const SequenceHeader *const seq_params = &cm->seq_params; - const int bit_depth = seq_params->bit_depth; - const int highbd = seq_params->use_highbitdepth; - lr_ctxt->dst = &cm->rst_frame; - - const int frame_width = frame->crop_widths[0]; - const int frame_height = frame->crop_heights[0]; - if (aom_realloc_frame_buffer( - lr_ctxt->dst, frame_width, frame_height, seq_params->subsampling_x, - seq_params->subsampling_y, highbd, AOM_BORDER_IN_PIXELS, - cm->byte_alignment, NULL, NULL, NULL) < 0) - aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, - "Failed to allocate restoration dst buffer"); - - lr_ctxt->on_rest_unit = filter_frame_on_unit; - lr_ctxt->frame = frame; - for (int plane = 0; plane < num_planes; ++plane) { - RestorationInfo *rsi = &cm->rst_info[plane]; - RestorationType rtype = rsi->frame_restoration_type; - rsi->optimized_lr = optimized_lr; - - if (rtype == RESTORE_NONE) { - continue; - } - - const int is_uv = plane > 0; - const int plane_width = frame->crop_widths[is_uv]; - const int plane_height = frame->crop_heights[is_uv]; - FilterFrameCtxt *lr_plane_ctxt = &lr_ctxt->ctxt[plane]; - - extend_frame(frame->buffers[plane], plane_width, plane_height, - frame->strides[is_uv], RESTORATION_BORDER, RESTORATION_BORDER, - highbd); - - lr_plane_ctxt->rsi = rsi; - lr_plane_ctxt->ss_x = is_uv && seq_params->subsampling_x; - lr_plane_ctxt->ss_y = is_uv && seq_params->subsampling_y; - lr_plane_ctxt->highbd = highbd; - lr_plane_ctxt->bit_depth = bit_depth; - lr_plane_ctxt->data8 = frame->buffers[plane]; - lr_plane_ctxt->dst8 = lr_ctxt->dst->buffers[plane]; - lr_plane_ctxt->data_stride = frame->strides[is_uv]; - lr_plane_ctxt->dst_stride = lr_ctxt->dst->strides[is_uv]; - lr_plane_ctxt->tile_rect = av1_whole_frame_rect(cm, is_uv); - filter_frame_on_tile(LR_TILE_ROW, LR_TILE_COL, lr_plane_ctxt, cm); - } -} - -void av1_loop_restoration_copy_planes(AV1LrStruct *loop_rest_ctxt, - AV1_COMMON *cm, int num_planes) { - typedef void (*copy_fun)(const YV12_BUFFER_CONFIG *src_ybc, - YV12_BUFFER_CONFIG *dst_ybc, int hstart, int hend, - int vstart, int vend); - static const copy_fun copy_funs[3] = { - aom_yv12_partial_copy_y, aom_yv12_partial_copy_u, aom_yv12_partial_copy_v - }; - - for (int plane = 0; plane < num_planes; ++plane) { - if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue; - AV1PixelRect tile_rect = loop_rest_ctxt->ctxt[plane].tile_rect; - copy_funs[plane](loop_rest_ctxt->dst, loop_rest_ctxt->frame, tile_rect.left, - tile_rect.right, tile_rect.top, tile_rect.bottom); - } -} - -static void foreach_rest_unit_in_planes(AV1LrStruct *lr_ctxt, AV1_COMMON *cm, - int num_planes) { - FilterFrameCtxt *ctxt = lr_ctxt->ctxt; - - for (int plane = 0; plane < num_planes; ++plane) { - if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) { - continue; - } - - av1_foreach_rest_unit_in_plane(cm, plane, lr_ctxt->on_rest_unit, - &ctxt[plane], &ctxt[plane].tile_rect, - cm->rst_tmpbuf, cm->rlbs); - } -} - -void av1_loop_restoration_filter_frame(YV12_BUFFER_CONFIG *frame, - AV1_COMMON *cm, int optimized_lr, - void *lr_ctxt) { - assert(!cm->all_lossless); - const int num_planes = av1_num_planes(cm); - - AV1LrStruct *loop_rest_ctxt = (AV1LrStruct *)lr_ctxt; - - av1_loop_restoration_filter_frame_init(loop_rest_ctxt, frame, cm, - optimized_lr, num_planes); - - foreach_rest_unit_in_planes(loop_rest_ctxt, cm, num_planes); - - av1_loop_restoration_copy_planes(loop_rest_ctxt, cm, num_planes); -} - -void av1_foreach_rest_unit_in_row( - RestorationTileLimits *limits, const AV1PixelRect *tile_rect, - rest_unit_visitor_t on_rest_unit, int row_number, int unit_size, - int unit_idx0, int hunits_per_tile, int vunits_per_tile, int plane, - void *priv, int32_t *tmpbuf, RestorationLineBuffers *rlbs, - sync_read_fn_t on_sync_read, sync_write_fn_t on_sync_write, - struct AV1LrSyncData *const lr_sync) { - const int tile_w = tile_rect->right - tile_rect->left; - const int ext_size = unit_size * 3 / 2; - int x0 = 0, j = 0; - while (x0 < tile_w) { - int remaining_w = tile_w - x0; - int w = (remaining_w < ext_size) ? remaining_w : unit_size; - - limits->h_start = tile_rect->left + x0; - limits->h_end = tile_rect->left + x0 + w; - assert(limits->h_end <= tile_rect->right); - - const int unit_idx = unit_idx0 + row_number * hunits_per_tile + j; - - // No sync for even numbered rows - // For odd numbered rows, Loop Restoration of current block requires the LR - // of top-right and bottom-right blocks to be completed - - // top-right sync - on_sync_read(lr_sync, row_number, j, plane); - if ((row_number + 1) < vunits_per_tile) - // bottom-right sync - on_sync_read(lr_sync, row_number + 2, j, plane); - - on_rest_unit(limits, tile_rect, unit_idx, priv, tmpbuf, rlbs); - - on_sync_write(lr_sync, row_number, j, hunits_per_tile, plane); - - x0 += w; - ++j; - } -} - -void av1_lr_sync_read_dummy(void *const lr_sync, int r, int c, int plane) { - (void)lr_sync; - (void)r; - (void)c; - (void)plane; -} - -void av1_lr_sync_write_dummy(void *const lr_sync, int r, int c, - const int sb_cols, int plane) { - (void)lr_sync; - (void)r; - (void)c; - (void)sb_cols; - (void)plane; -} - -static void foreach_rest_unit_in_tile( - const AV1PixelRect *tile_rect, int tile_row, int tile_col, int tile_cols, - int hunits_per_tile, int vunits_per_tile, int units_per_tile, int unit_size, - int ss_y, int plane, rest_unit_visitor_t on_rest_unit, void *priv, - int32_t *tmpbuf, RestorationLineBuffers *rlbs) { - const int tile_h = tile_rect->bottom - tile_rect->top; - const int ext_size = unit_size * 3 / 2; - - const int tile_idx = tile_col + tile_row * tile_cols; - const int unit_idx0 = tile_idx * units_per_tile; - - int y0 = 0, i = 0; - while (y0 < tile_h) { - int remaining_h = tile_h - y0; - int h = (remaining_h < ext_size) ? remaining_h : unit_size; - - RestorationTileLimits limits; - limits.v_start = tile_rect->top + y0; - limits.v_end = tile_rect->top + y0 + h; - assert(limits.v_end <= tile_rect->bottom); - // Offset the tile upwards to align with the restoration processing stripe - const int voffset = RESTORATION_UNIT_OFFSET >> ss_y; - limits.v_start = AOMMAX(tile_rect->top, limits.v_start - voffset); - if (limits.v_end < tile_rect->bottom) limits.v_end -= voffset; - - av1_foreach_rest_unit_in_row( - &limits, tile_rect, on_rest_unit, i, unit_size, unit_idx0, - hunits_per_tile, vunits_per_tile, plane, priv, tmpbuf, rlbs, - av1_lr_sync_read_dummy, av1_lr_sync_write_dummy, NULL); - - y0 += h; - ++i; - } -} - -void av1_foreach_rest_unit_in_plane(const struct AV1Common *cm, int plane, - rest_unit_visitor_t on_rest_unit, - void *priv, AV1PixelRect *tile_rect, - int32_t *tmpbuf, - RestorationLineBuffers *rlbs) { - const int is_uv = plane > 0; - const int ss_y = is_uv && cm->seq_params.subsampling_y; - - const RestorationInfo *rsi = &cm->rst_info[plane]; - - foreach_rest_unit_in_tile(tile_rect, LR_TILE_ROW, LR_TILE_COL, LR_TILE_COLS, - rsi->horz_units_per_tile, rsi->vert_units_per_tile, - rsi->units_per_tile, rsi->restoration_unit_size, - ss_y, plane, on_rest_unit, priv, tmpbuf, rlbs); -} - -int av1_loop_restoration_corners_in_sb(const struct AV1Common *cm, int plane, - int mi_row, int mi_col, BLOCK_SIZE bsize, - int *rcol0, int *rcol1, int *rrow0, - int *rrow1) { - assert(rcol0 && rcol1 && rrow0 && rrow1); - - if (bsize != cm->seq_params.sb_size) return 0; - if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) return 0; - - assert(!cm->all_lossless); - - const int is_uv = plane > 0; - - const AV1PixelRect tile_rect = av1_whole_frame_rect(cm, is_uv); - const int tile_w = tile_rect.right - tile_rect.left; - const int tile_h = tile_rect.bottom - tile_rect.top; - - const int mi_top = 0; - const int mi_left = 0; - - // Compute the mi-unit corners of the superblock relative to the top-left of - // the tile - const int mi_rel_row0 = mi_row - mi_top; - const int mi_rel_col0 = mi_col - mi_left; - const int mi_rel_row1 = mi_rel_row0 + mi_size_high[bsize]; - const int mi_rel_col1 = mi_rel_col0 + mi_size_wide[bsize]; - - const RestorationInfo *rsi = &cm->rst_info[plane]; - const int size = rsi->restoration_unit_size; - - // Calculate the number of restoration units in this tile (which might be - // strictly less than rsi->horz_units_per_tile and rsi->vert_units_per_tile) - const int horz_units = av1_lr_count_units_in_tile(size, tile_w); - const int vert_units = av1_lr_count_units_in_tile(size, tile_h); - - // The size of an MI-unit on this plane of the image - const int ss_x = is_uv && cm->seq_params.subsampling_x; - const int ss_y = is_uv && cm->seq_params.subsampling_y; - const int mi_size_x = MI_SIZE >> ss_x; - const int mi_size_y = MI_SIZE >> ss_y; - - // Write m for the relative mi column or row, D for the superres denominator - // and N for the superres numerator. If u is the upscaled pixel offset then - // we can write the downscaled pixel offset in two ways as: - // - // MI_SIZE * m = N / D u - // - // from which we get u = D * MI_SIZE * m / N - const int mi_to_num_x = av1_superres_scaled(cm) - ? mi_size_x * cm->superres_scale_denominator - : mi_size_x; - const int mi_to_num_y = mi_size_y; - const int denom_x = av1_superres_scaled(cm) ? size * SCALE_NUMERATOR : size; - const int denom_y = size; - - const int rnd_x = denom_x - 1; - const int rnd_y = denom_y - 1; - - // rcol0/rrow0 should be the first column/row of restoration units (relative - // to the top-left of the tile) that doesn't start left/below of - // mi_col/mi_row. For this calculation, we need to round up the division (if - // the sb starts at runit column 10.1, the first matching runit has column - // index 11) - *rcol0 = (mi_rel_col0 * mi_to_num_x + rnd_x) / denom_x; - *rrow0 = (mi_rel_row0 * mi_to_num_y + rnd_y) / denom_y; - - // rel_col1/rel_row1 is the equivalent calculation, but for the superblock - // below-right. If we're at the bottom or right of the tile, this restoration - // unit might not exist, in which case we'll clamp accordingly. - *rcol1 = AOMMIN((mi_rel_col1 * mi_to_num_x + rnd_x) / denom_x, horz_units); - *rrow1 = AOMMIN((mi_rel_row1 * mi_to_num_y + rnd_y) / denom_y, vert_units); - - return *rcol0 < *rcol1 && *rrow0 < *rrow1; -} - -// Extend to left and right -static void extend_lines(uint8_t *buf, int width, int height, int stride, - int extend, int use_highbitdepth) { - for (int i = 0; i < height; ++i) { - if (use_highbitdepth) { - uint16_t *buf16 = (uint16_t *)buf; - aom_memset16(buf16 - extend, buf16[0], extend); - aom_memset16(buf16 + width, buf16[width - 1], extend); - } else { - memset(buf - extend, buf[0], extend); - memset(buf + width, buf[width - 1], extend); - } - buf += stride; - } -} - -static void save_deblock_boundary_lines( - const YV12_BUFFER_CONFIG *frame, const AV1_COMMON *cm, int plane, int row, - int stripe, int use_highbd, int is_above, - RestorationStripeBoundaries *boundaries) { - const int is_uv = plane > 0; - const uint8_t *src_buf = REAL_PTR(use_highbd, frame->buffers[plane]); - const int src_stride = frame->strides[is_uv] << use_highbd; - const uint8_t *src_rows = src_buf + row * src_stride; - - uint8_t *bdry_buf = is_above ? boundaries->stripe_boundary_above - : boundaries->stripe_boundary_below; - uint8_t *bdry_start = bdry_buf + (RESTORATION_EXTRA_HORZ << use_highbd); - const int bdry_stride = boundaries->stripe_boundary_stride << use_highbd; - uint8_t *bdry_rows = bdry_start + RESTORATION_CTX_VERT * stripe * bdry_stride; - - // There is a rare case in which a processing stripe can end 1px above the - // crop border. In this case, we do want to use deblocked pixels from below - // the stripe (hence why we ended up in this function), but instead of - // fetching 2 "below" rows we need to fetch one and duplicate it. - // This is equivalent to clamping the sample locations against the crop border - const int lines_to_save = - AOMMIN(RESTORATION_CTX_VERT, frame->crop_heights[is_uv] - row); - assert(lines_to_save == 1 || lines_to_save == 2); - - int upscaled_width; - int line_bytes; - if (av1_superres_scaled(cm)) { - const int ss_x = is_uv && cm->seq_params.subsampling_x; - upscaled_width = (cm->superres_upscaled_width + ss_x) >> ss_x; - line_bytes = upscaled_width << use_highbd; - if (use_highbd) - av1_upscale_normative_rows( - cm, CONVERT_TO_BYTEPTR(src_rows), frame->strides[is_uv], - CONVERT_TO_BYTEPTR(bdry_rows), boundaries->stripe_boundary_stride, - plane, lines_to_save); - else - av1_upscale_normative_rows(cm, src_rows, frame->strides[is_uv], bdry_rows, - boundaries->stripe_boundary_stride, plane, - lines_to_save); - } else { - upscaled_width = frame->crop_widths[is_uv]; - line_bytes = upscaled_width << use_highbd; - for (int i = 0; i < lines_to_save; i++) { - memcpy(bdry_rows + i * bdry_stride, src_rows + i * src_stride, - line_bytes); - } - } - // If we only saved one line, then copy it into the second line buffer - if (lines_to_save == 1) - memcpy(bdry_rows + bdry_stride, bdry_rows, line_bytes); - - extend_lines(bdry_rows, upscaled_width, RESTORATION_CTX_VERT, bdry_stride, - RESTORATION_EXTRA_HORZ, use_highbd); -} - -static void save_cdef_boundary_lines(const YV12_BUFFER_CONFIG *frame, - const AV1_COMMON *cm, int plane, int row, - int stripe, int use_highbd, int is_above, - RestorationStripeBoundaries *boundaries) { - const int is_uv = plane > 0; - const uint8_t *src_buf = REAL_PTR(use_highbd, frame->buffers[plane]); - const int src_stride = frame->strides[is_uv] << use_highbd; - const uint8_t *src_rows = src_buf + row * src_stride; - - uint8_t *bdry_buf = is_above ? boundaries->stripe_boundary_above - : boundaries->stripe_boundary_below; - uint8_t *bdry_start = bdry_buf + (RESTORATION_EXTRA_HORZ << use_highbd); - const int bdry_stride = boundaries->stripe_boundary_stride << use_highbd; - uint8_t *bdry_rows = bdry_start + RESTORATION_CTX_VERT * stripe * bdry_stride; - const int src_width = frame->crop_widths[is_uv]; - - // At the point where this function is called, we've already applied - // superres. So we don't need to extend the lines here, we can just - // pull directly from the topmost row of the upscaled frame. - const int ss_x = is_uv && cm->seq_params.subsampling_x; - const int upscaled_width = av1_superres_scaled(cm) - ? (cm->superres_upscaled_width + ss_x) >> ss_x - : src_width; - const int line_bytes = upscaled_width << use_highbd; - for (int i = 0; i < RESTORATION_CTX_VERT; i++) { - // Copy the line at 'row' into both context lines. This is because - // we want to (effectively) extend the outermost row of CDEF data - // from this tile to produce a border, rather than using deblocked - // pixels from the tile above/below. - memcpy(bdry_rows + i * bdry_stride, src_rows, line_bytes); - } - extend_lines(bdry_rows, upscaled_width, RESTORATION_CTX_VERT, bdry_stride, - RESTORATION_EXTRA_HORZ, use_highbd); -} - -static void save_tile_row_boundary_lines(const YV12_BUFFER_CONFIG *frame, - int use_highbd, int plane, - AV1_COMMON *cm, int after_cdef) { - const int is_uv = plane > 0; - const int ss_y = is_uv && cm->seq_params.subsampling_y; - const int stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y; - const int stripe_off = RESTORATION_UNIT_OFFSET >> ss_y; - - // Get the tile rectangle, with height rounded up to the next multiple of 8 - // luma pixels (only relevant for the bottom tile of the frame) - const AV1PixelRect tile_rect = av1_whole_frame_rect(cm, is_uv); - const int stripe0 = 0; - - RestorationStripeBoundaries *boundaries = &cm->rst_info[plane].boundaries; - - const int plane_height = ROUND_POWER_OF_TWO(cm->height, ss_y); - - int tile_stripe; - for (tile_stripe = 0;; ++tile_stripe) { - const int rel_y0 = AOMMAX(0, tile_stripe * stripe_height - stripe_off); - const int y0 = tile_rect.top + rel_y0; - if (y0 >= tile_rect.bottom) break; - - const int rel_y1 = (tile_stripe + 1) * stripe_height - stripe_off; - const int y1 = AOMMIN(tile_rect.top + rel_y1, tile_rect.bottom); - - const int frame_stripe = stripe0 + tile_stripe; - - // In this case, we should only use CDEF pixels at the top - // and bottom of the frame as a whole; internal tile boundaries - // can use deblocked pixels from adjacent tiles for context. - const int use_deblock_above = (frame_stripe > 0); - const int use_deblock_below = (y1 < plane_height); - - if (!after_cdef) { - // Save deblocked context where needed. - if (use_deblock_above) { - save_deblock_boundary_lines(frame, cm, plane, y0 - RESTORATION_CTX_VERT, - frame_stripe, use_highbd, 1, boundaries); - } - if (use_deblock_below) { - save_deblock_boundary_lines(frame, cm, plane, y1, frame_stripe, - use_highbd, 0, boundaries); - } - } else { - // Save CDEF context where needed. Note that we need to save the CDEF - // context for a particular boundary iff we *didn't* save deblocked - // context for that boundary. - // - // In addition, we need to save copies of the outermost line within - // the tile, rather than using data from outside the tile. - if (!use_deblock_above) { - save_cdef_boundary_lines(frame, cm, plane, y0, frame_stripe, use_highbd, - 1, boundaries); - } - if (!use_deblock_below) { - save_cdef_boundary_lines(frame, cm, plane, y1 - 1, frame_stripe, - use_highbd, 0, boundaries); - } - } - } -} - -// For each RESTORATION_PROC_UNIT_SIZE pixel high stripe, save 4 scan -// lines to be used as boundary in the loop restoration process. The -// lines are saved in rst_internal.stripe_boundary_lines -void av1_loop_restoration_save_boundary_lines(const YV12_BUFFER_CONFIG *frame, - AV1_COMMON *cm, int after_cdef) { - const int num_planes = av1_num_planes(cm); - const int use_highbd = cm->seq_params.use_highbitdepth; - for (int p = 0; p < num_planes; ++p) { - save_tile_row_boundary_lines(frame, use_highbd, p, cm, after_cdef); - } -} diff --git a/third_party/aom/av1/common/restoration.h b/third_party/aom/av1/common/restoration.h deleted file mode 100644 index d834f9270..000000000 --- a/third_party/aom/av1/common/restoration.h +++ /dev/null @@ -1,377 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_RESTORATION_H_ -#define AOM_AV1_COMMON_RESTORATION_H_ - -#include "aom_ports/mem.h" -#include "config/aom_config.h" - -#include "av1/common/blockd.h" -#include "av1/common/enums.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define CLIP(x, lo, hi) ((x) < (lo) ? (lo) : (x) > (hi) ? (hi) : (x)) -#define RINT(x) ((x) < 0 ? (int)((x)-0.5) : (int)((x) + 0.5)) - -#define RESTORATION_PROC_UNIT_SIZE 64 - -// Filter tile grid offset upwards compared to the superblock grid -#define RESTORATION_UNIT_OFFSET 8 - -#define SGRPROJ_BORDER_VERT 3 // Vertical border used for Sgr -#define SGRPROJ_BORDER_HORZ 3 // Horizontal border used for Sgr - -#define WIENER_BORDER_VERT 2 // Vertical border used for Wiener -#define WIENER_HALFWIN 3 -#define WIENER_BORDER_HORZ (WIENER_HALFWIN) // Horizontal border for Wiener - -// RESTORATION_BORDER_VERT determines line buffer requirement for LR. -// Should be set at the max of SGRPROJ_BORDER_VERT and WIENER_BORDER_VERT. -// Note the line buffer needed is twice the value of this macro. -#if SGRPROJ_BORDER_VERT >= WIENER_BORDER_VERT -#define RESTORATION_BORDER_VERT (SGRPROJ_BORDER_VERT) -#else -#define RESTORATION_BORDER_VERT (WIENER_BORDER_VERT) -#endif // SGRPROJ_BORDER_VERT >= WIENER_BORDER_VERT - -#if SGRPROJ_BORDER_HORZ >= WIENER_BORDER_HORZ -#define RESTORATION_BORDER_HORZ (SGRPROJ_BORDER_HORZ) -#else -#define RESTORATION_BORDER_HORZ (WIENER_BORDER_HORZ) -#endif // SGRPROJ_BORDER_VERT >= WIENER_BORDER_VERT - -// How many border pixels do we need for each processing unit? -#define RESTORATION_BORDER 3 - -// How many rows of deblocked pixels do we save above/below each processing -// stripe? -#define RESTORATION_CTX_VERT 2 - -// Additional pixels to the left and right in above/below buffers -// It is RESTORATION_BORDER_HORZ rounded up to get nicer buffer alignment -#define RESTORATION_EXTRA_HORZ 4 - -// Pad up to 20 more (may be much less is needed) -#define RESTORATION_PADDING 20 -#define RESTORATION_PROC_UNIT_PELS \ - ((RESTORATION_PROC_UNIT_SIZE + RESTORATION_BORDER_HORZ * 2 + \ - RESTORATION_PADDING) * \ - (RESTORATION_PROC_UNIT_SIZE + RESTORATION_BORDER_VERT * 2 + \ - RESTORATION_PADDING)) - -#define RESTORATION_UNITSIZE_MAX 256 -#define RESTORATION_UNITPELS_HORZ_MAX \ - (RESTORATION_UNITSIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_HORZ + 16) -#define RESTORATION_UNITPELS_VERT_MAX \ - ((RESTORATION_UNITSIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_VERT + \ - RESTORATION_UNIT_OFFSET)) -#define RESTORATION_UNITPELS_MAX \ - (RESTORATION_UNITPELS_HORZ_MAX * RESTORATION_UNITPELS_VERT_MAX) - -// Two 32-bit buffers needed for the restored versions from two filters -// TODO(debargha, rupert): Refactor to not need the large tilesize to be stored -// on the decoder side. -#define SGRPROJ_TMPBUF_SIZE (RESTORATION_UNITPELS_MAX * 2 * sizeof(int32_t)) - -#define SGRPROJ_EXTBUF_SIZE (0) -#define SGRPROJ_PARAMS_BITS 4 -#define SGRPROJ_PARAMS (1 << SGRPROJ_PARAMS_BITS) - -// Precision bits for projection -#define SGRPROJ_PRJ_BITS 7 -// Restoration precision bits generated higher than source before projection -#define SGRPROJ_RST_BITS 4 -// Internal precision bits for core selfguided_restoration -#define SGRPROJ_SGR_BITS 8 -#define SGRPROJ_SGR (1 << SGRPROJ_SGR_BITS) - -#define SGRPROJ_PRJ_MIN0 (-(1 << SGRPROJ_PRJ_BITS) * 3 / 4) -#define SGRPROJ_PRJ_MAX0 (SGRPROJ_PRJ_MIN0 + (1 << SGRPROJ_PRJ_BITS) - 1) -#define SGRPROJ_PRJ_MIN1 (-(1 << SGRPROJ_PRJ_BITS) / 4) -#define SGRPROJ_PRJ_MAX1 (SGRPROJ_PRJ_MIN1 + (1 << SGRPROJ_PRJ_BITS) - 1) - -#define SGRPROJ_PRJ_SUBEXP_K 4 - -#define SGRPROJ_BITS (SGRPROJ_PRJ_BITS * 2 + SGRPROJ_PARAMS_BITS) - -#define MAX_RADIUS 2 // Only 1, 2, 3 allowed -#define MAX_NELEM ((2 * MAX_RADIUS + 1) * (2 * MAX_RADIUS + 1)) -#define SGRPROJ_MTABLE_BITS 20 -#define SGRPROJ_RECIP_BITS 12 - -#define WIENER_HALFWIN1 (WIENER_HALFWIN + 1) -#define WIENER_WIN (2 * WIENER_HALFWIN + 1) -#define WIENER_WIN2 ((WIENER_WIN) * (WIENER_WIN)) -#define WIENER_TMPBUF_SIZE (0) -#define WIENER_EXTBUF_SIZE (0) - -// If WIENER_WIN_CHROMA == WIENER_WIN - 2, that implies 5x5 filters are used for -// chroma. To use 7x7 for chroma set WIENER_WIN_CHROMA to WIENER_WIN. -#define WIENER_WIN_CHROMA (WIENER_WIN - 2) -#define WIENER_WIN2_CHROMA ((WIENER_WIN_CHROMA) * (WIENER_WIN_CHROMA)) - -#define WIENER_FILT_PREC_BITS 7 -#define WIENER_FILT_STEP (1 << WIENER_FILT_PREC_BITS) - -// Central values for the taps -#define WIENER_FILT_TAP0_MIDV (3) -#define WIENER_FILT_TAP1_MIDV (-7) -#define WIENER_FILT_TAP2_MIDV (15) -#define WIENER_FILT_TAP3_MIDV \ - (WIENER_FILT_STEP - 2 * (WIENER_FILT_TAP0_MIDV + WIENER_FILT_TAP1_MIDV + \ - WIENER_FILT_TAP2_MIDV)) - -#define WIENER_FILT_TAP0_BITS 4 -#define WIENER_FILT_TAP1_BITS 5 -#define WIENER_FILT_TAP2_BITS 6 - -#define WIENER_FILT_BITS \ - ((WIENER_FILT_TAP0_BITS + WIENER_FILT_TAP1_BITS + WIENER_FILT_TAP2_BITS) * 2) - -#define WIENER_FILT_TAP0_MINV \ - (WIENER_FILT_TAP0_MIDV - (1 << WIENER_FILT_TAP0_BITS) / 2) -#define WIENER_FILT_TAP1_MINV \ - (WIENER_FILT_TAP1_MIDV - (1 << WIENER_FILT_TAP1_BITS) / 2) -#define WIENER_FILT_TAP2_MINV \ - (WIENER_FILT_TAP2_MIDV - (1 << WIENER_FILT_TAP2_BITS) / 2) - -#define WIENER_FILT_TAP0_MAXV \ - (WIENER_FILT_TAP0_MIDV - 1 + (1 << WIENER_FILT_TAP0_BITS) / 2) -#define WIENER_FILT_TAP1_MAXV \ - (WIENER_FILT_TAP1_MIDV - 1 + (1 << WIENER_FILT_TAP1_BITS) / 2) -#define WIENER_FILT_TAP2_MAXV \ - (WIENER_FILT_TAP2_MIDV - 1 + (1 << WIENER_FILT_TAP2_BITS) / 2) - -#define WIENER_FILT_TAP0_SUBEXP_K 1 -#define WIENER_FILT_TAP1_SUBEXP_K 2 -#define WIENER_FILT_TAP2_SUBEXP_K 3 - -// Max of SGRPROJ_TMPBUF_SIZE, DOMAINTXFMRF_TMPBUF_SIZE, WIENER_TMPBUF_SIZE -#define RESTORATION_TMPBUF_SIZE (SGRPROJ_TMPBUF_SIZE) - -// Max of SGRPROJ_EXTBUF_SIZE, WIENER_EXTBUF_SIZE -#define RESTORATION_EXTBUF_SIZE (WIENER_EXTBUF_SIZE) - -// Check the assumptions of the existing code -#if SUBPEL_TAPS != WIENER_WIN + 1 -#error "Wiener filter currently only works if SUBPEL_TAPS == WIENER_WIN + 1" -#endif -#if WIENER_FILT_PREC_BITS != 7 -#error "Wiener filter currently only works if WIENER_FILT_PREC_BITS == 7" -#endif - -#define LR_TILE_ROW 0 -#define LR_TILE_COL 0 -#define LR_TILE_COLS 1 - -typedef struct { - int r[2]; // radii - int s[2]; // sgr parameters for r[0] and r[1], based on GenSgrprojVtable() -} sgr_params_type; - -typedef struct { - RestorationType restoration_type; - WienerInfo wiener_info; - SgrprojInfo sgrproj_info; -} RestorationUnitInfo; - -// A restoration line buffer needs space for two lines plus a horizontal filter -// margin of RESTORATION_EXTRA_HORZ on each side. -#define RESTORATION_LINEBUFFER_WIDTH \ - (RESTORATION_UNITSIZE_MAX * 3 / 2 + 2 * RESTORATION_EXTRA_HORZ) - -// Similarly, the column buffers (used when we're at a vertical tile edge -// that we can't filter across) need space for one processing unit's worth -// of pixels, plus the top/bottom border width -#define RESTORATION_COLBUFFER_HEIGHT \ - (RESTORATION_PROC_UNIT_SIZE + 2 * RESTORATION_BORDER) - -typedef struct { - // Temporary buffers to save/restore 3 lines above/below the restoration - // stripe. - uint16_t tmp_save_above[RESTORATION_BORDER][RESTORATION_LINEBUFFER_WIDTH]; - uint16_t tmp_save_below[RESTORATION_BORDER][RESTORATION_LINEBUFFER_WIDTH]; -} RestorationLineBuffers; - -typedef struct { - uint8_t *stripe_boundary_above; - uint8_t *stripe_boundary_below; - int stripe_boundary_stride; - int stripe_boundary_size; -} RestorationStripeBoundaries; - -typedef struct { - RestorationType frame_restoration_type; - int restoration_unit_size; - - // Fields below here are allocated and initialised by - // av1_alloc_restoration_struct. (horz_)units_per_tile give the number of - // restoration units in (one row of) the largest tile in the frame. The data - // in unit_info is laid out with units_per_tile entries for each tile, which - // have stride horz_units_per_tile. - // - // Even if there are tiles of different sizes, the data in unit_info is laid - // out as if all tiles are of full size. - int units_per_tile; - int vert_units_per_tile, horz_units_per_tile; - RestorationUnitInfo *unit_info; - RestorationStripeBoundaries boundaries; - int optimized_lr; -} RestorationInfo; - -static INLINE void set_default_sgrproj(SgrprojInfo *sgrproj_info) { - sgrproj_info->xqd[0] = (SGRPROJ_PRJ_MIN0 + SGRPROJ_PRJ_MAX0) / 2; - sgrproj_info->xqd[1] = (SGRPROJ_PRJ_MIN1 + SGRPROJ_PRJ_MAX1) / 2; -} - -static INLINE void set_default_wiener(WienerInfo *wiener_info) { - wiener_info->vfilter[0] = wiener_info->hfilter[0] = WIENER_FILT_TAP0_MIDV; - wiener_info->vfilter[1] = wiener_info->hfilter[1] = WIENER_FILT_TAP1_MIDV; - wiener_info->vfilter[2] = wiener_info->hfilter[2] = WIENER_FILT_TAP2_MIDV; - wiener_info->vfilter[WIENER_HALFWIN] = wiener_info->hfilter[WIENER_HALFWIN] = - -2 * - (WIENER_FILT_TAP2_MIDV + WIENER_FILT_TAP1_MIDV + WIENER_FILT_TAP0_MIDV); - wiener_info->vfilter[4] = wiener_info->hfilter[4] = WIENER_FILT_TAP2_MIDV; - wiener_info->vfilter[5] = wiener_info->hfilter[5] = WIENER_FILT_TAP1_MIDV; - wiener_info->vfilter[6] = wiener_info->hfilter[6] = WIENER_FILT_TAP0_MIDV; -} - -typedef struct { - int h_start, h_end, v_start, v_end; -} RestorationTileLimits; - -typedef void (*rest_unit_visitor_t)(const RestorationTileLimits *limits, - const AV1PixelRect *tile_rect, - int rest_unit_idx, void *priv, - int32_t *tmpbuf, - RestorationLineBuffers *rlbs); - -typedef struct FilterFrameCtxt { - const RestorationInfo *rsi; - int tile_stripe0; - int ss_x, ss_y; - int highbd, bit_depth; - uint8_t *data8, *dst8; - int data_stride, dst_stride; - AV1PixelRect tile_rect; -} FilterFrameCtxt; - -typedef struct AV1LrStruct { - rest_unit_visitor_t on_rest_unit; - FilterFrameCtxt ctxt[MAX_MB_PLANE]; - YV12_BUFFER_CONFIG *frame; - YV12_BUFFER_CONFIG *dst; -} AV1LrStruct; - -extern const sgr_params_type sgr_params[SGRPROJ_PARAMS]; -extern int sgrproj_mtable[SGRPROJ_PARAMS][2]; -extern const int32_t x_by_xplus1[256]; -extern const int32_t one_by_x[MAX_NELEM]; - -void av1_alloc_restoration_struct(struct AV1Common *cm, RestorationInfo *rsi, - int is_uv); -void av1_free_restoration_struct(RestorationInfo *rst_info); - -void extend_frame(uint8_t *data, int width, int height, int stride, - int border_horz, int border_vert, int highbd); -void decode_xq(const int *xqd, int *xq, const sgr_params_type *params); - -// Filter a single loop restoration unit. -// -// limits is the limits of the unit. rui gives the mode to use for this unit -// and its coefficients. If striped loop restoration is enabled, rsb contains -// deblocked pixels to use for stripe boundaries; rlbs is just some space to -// use as a scratch buffer. tile_rect gives the limits of the tile containing -// this unit. tile_stripe0 is the index of the first stripe in this tile. -// -// ss_x and ss_y are flags which should be 1 if this is a plane with -// horizontal/vertical subsampling, respectively. highbd is a flag which should -// be 1 in high bit depth mode, in which case bit_depth is the bit depth. -// -// data8 is the frame data (pointing at the top-left corner of the frame, not -// the restoration unit) and stride is its stride. dst8 is the buffer where the -// results will be written and has stride dst_stride. Like data8, dst8 should -// point at the top-left corner of the frame. -// -// Finally tmpbuf is a scratch buffer used by the sgrproj filter which should -// be at least SGRPROJ_TMPBUF_SIZE big. -void av1_loop_restoration_filter_unit( - const RestorationTileLimits *limits, const RestorationUnitInfo *rui, - const RestorationStripeBoundaries *rsb, RestorationLineBuffers *rlbs, - const AV1PixelRect *tile_rect, int tile_stripe0, int ss_x, int ss_y, - int highbd, int bit_depth, uint8_t *data8, int stride, uint8_t *dst8, - int dst_stride, int32_t *tmpbuf, int optimized_lr); - -void av1_loop_restoration_filter_frame(YV12_BUFFER_CONFIG *frame, - struct AV1Common *cm, int optimized_lr, - void *lr_ctxt); -void av1_loop_restoration_precal(); - -typedef void (*rest_tile_start_visitor_t)(int tile_row, int tile_col, - void *priv); -struct AV1LrSyncData; - -typedef void (*sync_read_fn_t)(void *const lr_sync, int r, int c, int plane); - -typedef void (*sync_write_fn_t)(void *const lr_sync, int r, int c, - const int sb_cols, int plane); - -// Call on_rest_unit for each loop restoration unit in the plane. -void av1_foreach_rest_unit_in_plane(const struct AV1Common *cm, int plane, - rest_unit_visitor_t on_rest_unit, - void *priv, AV1PixelRect *tile_rect, - int32_t *tmpbuf, - RestorationLineBuffers *rlbs); - -// Return 1 iff the block at mi_row, mi_col with size bsize is a -// top-level superblock containing the top-left corner of at least one -// loop restoration unit. -// -// If the block is a top-level superblock, the function writes to -// *rcol0, *rcol1, *rrow0, *rrow1. The rectangle of restoration unit -// indices given by [*rcol0, *rcol1) x [*rrow0, *rrow1) are relative -// to the current tile, whose starting index is returned as -// *tile_tl_idx. -int av1_loop_restoration_corners_in_sb(const struct AV1Common *cm, int plane, - int mi_row, int mi_col, BLOCK_SIZE bsize, - int *rcol0, int *rcol1, int *rrow0, - int *rrow1); - -void av1_loop_restoration_save_boundary_lines(const YV12_BUFFER_CONFIG *frame, - struct AV1Common *cm, - int after_cdef); -void av1_loop_restoration_filter_frame_init(AV1LrStruct *lr_ctxt, - YV12_BUFFER_CONFIG *frame, - struct AV1Common *cm, - int optimized_lr, int num_planes); -void av1_loop_restoration_copy_planes(AV1LrStruct *loop_rest_ctxt, - struct AV1Common *cm, int num_planes); -void av1_foreach_rest_unit_in_row( - RestorationTileLimits *limits, const AV1PixelRect *tile_rect, - rest_unit_visitor_t on_rest_unit, int row_number, int unit_size, - int unit_idx0, int hunits_per_tile, int vunits_per_tile, int plane, - void *priv, int32_t *tmpbuf, RestorationLineBuffers *rlbs, - sync_read_fn_t on_sync_read, sync_write_fn_t on_sync_write, - struct AV1LrSyncData *const lr_sync); -AV1PixelRect av1_whole_frame_rect(const struct AV1Common *cm, int is_uv); -int av1_lr_count_units_in_tile(int unit_size, int tile_size); -void av1_lr_sync_read_dummy(void *const lr_sync, int r, int c, int plane); -void av1_lr_sync_write_dummy(void *const lr_sync, int r, int c, - const int sb_cols, int plane); -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_RESTORATION_H_ diff --git a/third_party/aom/av1/common/scale.c b/third_party/aom/av1/common/scale.c deleted file mode 100644 index c525fe229..000000000 --- a/third_party/aom/av1/common/scale.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "config/aom_dsp_rtcd.h" -#include "config/av1_rtcd.h" - -#include "av1/common/filter.h" -#include "av1/common/scale.h" -#include "aom_dsp/aom_filter.h" - -// Note: Expect val to be in q4 precision -static INLINE int scaled_x(int val, const struct scale_factors *sf) { - const int off = - (sf->x_scale_fp - (1 << REF_SCALE_SHIFT)) * (1 << (SUBPEL_BITS - 1)); - const int64_t tval = (int64_t)val * sf->x_scale_fp + off; - return (int)ROUND_POWER_OF_TWO_SIGNED_64(tval, - REF_SCALE_SHIFT - SCALE_EXTRA_BITS); -} - -// Note: Expect val to be in q4 precision -static INLINE int scaled_y(int val, const struct scale_factors *sf) { - const int off = - (sf->y_scale_fp - (1 << REF_SCALE_SHIFT)) * (1 << (SUBPEL_BITS - 1)); - const int64_t tval = (int64_t)val * sf->y_scale_fp + off; - return (int)ROUND_POWER_OF_TWO_SIGNED_64(tval, - REF_SCALE_SHIFT - SCALE_EXTRA_BITS); -} - -// Note: Expect val to be in q4 precision -static int unscaled_value(int val, const struct scale_factors *sf) { - (void)sf; - return val << SCALE_EXTRA_BITS; -} - -static int get_fixed_point_scale_factor(int other_size, int this_size) { - // Calculate scaling factor once for each reference frame - // and use fixed point scaling factors in decoding and encoding routines. - // Hardware implementations can calculate scale factor in device driver - // and use multiplication and shifting on hardware instead of division. - return ((other_size << REF_SCALE_SHIFT) + this_size / 2) / this_size; -} - -// Given the fixed point scale, calculate coarse point scale. -static int fixed_point_scale_to_coarse_point_scale(int scale_fp) { - return ROUND_POWER_OF_TWO(scale_fp, REF_SCALE_SHIFT - SCALE_SUBPEL_BITS); -} - -// Note: x and y are integer precision, mvq4 is q4 precision. -MV32 av1_scale_mv(const MV *mvq4, int x, int y, - const struct scale_factors *sf) { - const int x_off_q4 = scaled_x(x << SUBPEL_BITS, sf); - const int y_off_q4 = scaled_y(y << SUBPEL_BITS, sf); - const MV32 res = { scaled_y((y << SUBPEL_BITS) + mvq4->row, sf) - y_off_q4, - scaled_x((x << SUBPEL_BITS) + mvq4->col, sf) - x_off_q4 }; - return res; -} - -void av1_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w, - int other_h, int this_w, int this_h) { - if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) { - sf->x_scale_fp = REF_INVALID_SCALE; - sf->y_scale_fp = REF_INVALID_SCALE; - return; - } - - sf->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w); - sf->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h); - - sf->x_step_q4 = fixed_point_scale_to_coarse_point_scale(sf->x_scale_fp); - sf->y_step_q4 = fixed_point_scale_to_coarse_point_scale(sf->y_scale_fp); - - if (av1_is_scaled(sf)) { - sf->scale_value_x = scaled_x; - sf->scale_value_y = scaled_y; - } else { - sf->scale_value_x = unscaled_value; - sf->scale_value_y = unscaled_value; - } - - // AV1 convolve functions - // Special case convolve functions should produce the same result as - // av1_convolve_2d. - // subpel_x_q4 == 0 && subpel_y_q4 == 0 - sf->convolve[0][0][0] = av1_convolve_2d_copy_sr; - // subpel_x_q4 == 0 - sf->convolve[0][1][0] = av1_convolve_y_sr; - // subpel_y_q4 == 0 - sf->convolve[1][0][0] = av1_convolve_x_sr; - // subpel_x_q4 != 0 && subpel_y_q4 != 0 - sf->convolve[1][1][0] = av1_convolve_2d_sr; - // subpel_x_q4 == 0 && subpel_y_q4 == 0 - sf->convolve[0][0][1] = av1_jnt_convolve_2d_copy; - // subpel_x_q4 == 0 - sf->convolve[0][1][1] = av1_jnt_convolve_y; - // subpel_y_q4 == 0 - sf->convolve[1][0][1] = av1_jnt_convolve_x; - // subpel_x_q4 != 0 && subpel_y_q4 != 0 - sf->convolve[1][1][1] = av1_jnt_convolve_2d; - // AV1 High BD convolve functions - // Special case convolve functions should produce the same result as - // av1_highbd_convolve_2d. - // subpel_x_q4 == 0 && subpel_y_q4 == 0 - sf->highbd_convolve[0][0][0] = av1_highbd_convolve_2d_copy_sr; - // subpel_x_q4 == 0 - sf->highbd_convolve[0][1][0] = av1_highbd_convolve_y_sr; - // subpel_y_q4 == 0 - sf->highbd_convolve[1][0][0] = av1_highbd_convolve_x_sr; - // subpel_x_q4 != 0 && subpel_y_q4 != 0 - sf->highbd_convolve[1][1][0] = av1_highbd_convolve_2d_sr; - // subpel_x_q4 == 0 && subpel_y_q4 == 0 - sf->highbd_convolve[0][0][1] = av1_highbd_jnt_convolve_2d_copy; - // subpel_x_q4 == 0 - sf->highbd_convolve[0][1][1] = av1_highbd_jnt_convolve_y; - // subpel_y_q4 == 0 - sf->highbd_convolve[1][0][1] = av1_highbd_jnt_convolve_x; - // subpel_x_q4 != 0 && subpel_y_q4 != 0 - sf->highbd_convolve[1][1][1] = av1_highbd_jnt_convolve_2d; -} diff --git a/third_party/aom/av1/common/scale.h b/third_party/aom/av1/common/scale.h deleted file mode 100644 index 748e958c3..000000000 --- a/third_party/aom/av1/common/scale.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_SCALE_H_ -#define AOM_AV1_COMMON_SCALE_H_ - -#include "av1/common/convolve.h" -#include "av1/common/mv.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define SCALE_NUMERATOR 8 - -#define REF_SCALE_SHIFT 14 -#define REF_NO_SCALE (1 << REF_SCALE_SHIFT) -#define REF_INVALID_SCALE -1 - -struct scale_factors { - int x_scale_fp; // horizontal fixed point scale factor - int y_scale_fp; // vertical fixed point scale factor - int x_step_q4; - int y_step_q4; - - int (*scale_value_x)(int val, const struct scale_factors *sf); - int (*scale_value_y)(int val, const struct scale_factors *sf); - - // convolve_fn_ptr[subpel_x != 0][subpel_y != 0][is_compound] - aom_convolve_fn_t convolve[2][2][2]; - aom_highbd_convolve_fn_t highbd_convolve[2][2][2]; -}; - -MV32 av1_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf); - -void av1_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w, - int other_h, int this_w, int this_h); - -static INLINE int av1_is_valid_scale(const struct scale_factors *sf) { - return sf->x_scale_fp != REF_INVALID_SCALE && - sf->y_scale_fp != REF_INVALID_SCALE; -} - -static INLINE int av1_is_scaled(const struct scale_factors *sf) { - return av1_is_valid_scale(sf) && - (sf->x_scale_fp != REF_NO_SCALE || sf->y_scale_fp != REF_NO_SCALE); -} - -static INLINE int valid_ref_frame_size(int ref_width, int ref_height, - int this_width, int this_height) { - return 2 * this_width >= ref_width && 2 * this_height >= ref_height && - this_width <= 16 * ref_width && this_height <= 16 * ref_height; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_SCALE_H_ diff --git a/third_party/aom/av1/common/scan.c b/third_party/aom/av1/common/scan.c deleted file mode 100644 index 31a787b53..000000000 --- a/third_party/aom/av1/common/scan.c +++ /dev/null @@ -1,3735 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <assert.h> - -#include "av1/common/common_data.h" -#include "av1/common/scan.h" - -DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4[16]) = { - 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 -}; - -DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x4[16]) = { - 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, -}; - -DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x4[16]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_4x8[32]) = { - 0, 1, 4, 2, 5, 8, 3, 6, 9, 12, 7, 10, 13, 16, 11, 14, - 17, 20, 15, 18, 21, 24, 19, 22, 25, 28, 23, 26, 29, 27, 30, 31, -}; - -DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x8[32]) = { - 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29, - 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31, -}; - -DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x8[32]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_8x4[32]) = { - 0, 8, 1, 16, 9, 2, 24, 17, 10, 3, 25, 18, 11, 4, 26, 19, - 12, 5, 27, 20, 13, 6, 28, 21, 14, 7, 29, 22, 15, 30, 23, 31, -}; - -DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x4[32]) = { - 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27, - 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31, -}; - -DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x4[32]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_4x16[64]) = { - 0, 1, 4, 2, 5, 8, 3, 6, 9, 12, 7, 10, 13, 16, 11, 14, - 17, 20, 15, 18, 21, 24, 19, 22, 25, 28, 23, 26, 29, 32, 27, 30, - 33, 36, 31, 34, 37, 40, 35, 38, 41, 44, 39, 42, 45, 48, 43, 46, - 49, 52, 47, 50, 53, 56, 51, 54, 57, 60, 55, 58, 61, 59, 62, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_16x4[64]) = { - 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 49, 34, 19, 4, 50, 35, - 20, 5, 51, 36, 21, 6, 52, 37, 22, 7, 53, 38, 23, 8, 54, 39, - 24, 9, 55, 40, 25, 10, 56, 41, 26, 11, 57, 42, 27, 12, 58, 43, - 28, 13, 59, 44, 29, 14, 60, 45, 30, 15, 61, 46, 31, 62, 47, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x16[64]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x4[64]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x16[64]) = { - 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, - 1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, - 2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62, - 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x4[64]) = { - 0, 16, 32, 48, 1, 17, 33, 49, 2, 18, 34, 50, 3, 19, 35, 51, - 4, 20, 36, 52, 5, 21, 37, 53, 6, 22, 38, 54, 7, 23, 39, 55, - 8, 24, 40, 56, 9, 25, 41, 57, 10, 26, 42, 58, 11, 27, 43, 59, - 12, 28, 44, 60, 13, 29, 45, 61, 14, 30, 46, 62, 15, 31, 47, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_8x32[256]) = { - 0, 1, 8, 2, 9, 16, 3, 10, 17, 24, 4, 11, 18, 25, 32, - 5, 12, 19, 26, 33, 40, 6, 13, 20, 27, 34, 41, 48, 7, 14, - 21, 28, 35, 42, 49, 56, 15, 22, 29, 36, 43, 50, 57, 64, 23, - 30, 37, 44, 51, 58, 65, 72, 31, 38, 45, 52, 59, 66, 73, 80, - 39, 46, 53, 60, 67, 74, 81, 88, 47, 54, 61, 68, 75, 82, 89, - 96, 55, 62, 69, 76, 83, 90, 97, 104, 63, 70, 77, 84, 91, 98, - 105, 112, 71, 78, 85, 92, 99, 106, 113, 120, 79, 86, 93, 100, 107, - 114, 121, 128, 87, 94, 101, 108, 115, 122, 129, 136, 95, 102, 109, 116, - 123, 130, 137, 144, 103, 110, 117, 124, 131, 138, 145, 152, 111, 118, 125, - 132, 139, 146, 153, 160, 119, 126, 133, 140, 147, 154, 161, 168, 127, 134, - 141, 148, 155, 162, 169, 176, 135, 142, 149, 156, 163, 170, 177, 184, 143, - 150, 157, 164, 171, 178, 185, 192, 151, 158, 165, 172, 179, 186, 193, 200, - 159, 166, 173, 180, 187, 194, 201, 208, 167, 174, 181, 188, 195, 202, 209, - 216, 175, 182, 189, 196, 203, 210, 217, 224, 183, 190, 197, 204, 211, 218, - 225, 232, 191, 198, 205, 212, 219, 226, 233, 240, 199, 206, 213, 220, 227, - 234, 241, 248, 207, 214, 221, 228, 235, 242, 249, 215, 222, 229, 236, 243, - 250, 223, 230, 237, 244, 251, 231, 238, 245, 252, 239, 246, 253, 247, 254, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_32x8[256]) = { - 0, 32, 1, 64, 33, 2, 96, 65, 34, 3, 128, 97, 66, 35, 4, - 160, 129, 98, 67, 36, 5, 192, 161, 130, 99, 68, 37, 6, 224, 193, - 162, 131, 100, 69, 38, 7, 225, 194, 163, 132, 101, 70, 39, 8, 226, - 195, 164, 133, 102, 71, 40, 9, 227, 196, 165, 134, 103, 72, 41, 10, - 228, 197, 166, 135, 104, 73, 42, 11, 229, 198, 167, 136, 105, 74, 43, - 12, 230, 199, 168, 137, 106, 75, 44, 13, 231, 200, 169, 138, 107, 76, - 45, 14, 232, 201, 170, 139, 108, 77, 46, 15, 233, 202, 171, 140, 109, - 78, 47, 16, 234, 203, 172, 141, 110, 79, 48, 17, 235, 204, 173, 142, - 111, 80, 49, 18, 236, 205, 174, 143, 112, 81, 50, 19, 237, 206, 175, - 144, 113, 82, 51, 20, 238, 207, 176, 145, 114, 83, 52, 21, 239, 208, - 177, 146, 115, 84, 53, 22, 240, 209, 178, 147, 116, 85, 54, 23, 241, - 210, 179, 148, 117, 86, 55, 24, 242, 211, 180, 149, 118, 87, 56, 25, - 243, 212, 181, 150, 119, 88, 57, 26, 244, 213, 182, 151, 120, 89, 58, - 27, 245, 214, 183, 152, 121, 90, 59, 28, 246, 215, 184, 153, 122, 91, - 60, 29, 247, 216, 185, 154, 123, 92, 61, 30, 248, 217, 186, 155, 124, - 93, 62, 31, 249, 218, 187, 156, 125, 94, 63, 250, 219, 188, 157, 126, - 95, 251, 220, 189, 158, 127, 252, 221, 190, 159, 253, 222, 191, 254, 223, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x32[256]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, - 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, - 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, - 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, - 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, mrow_scan_32x8[256]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, - 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, - 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, - 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, - 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x32[256]) = { - 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, - 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, - 240, 248, 1, 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97, - 105, 113, 121, 129, 137, 145, 153, 161, 169, 177, 185, 193, 201, 209, 217, - 225, 233, 241, 249, 2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, - 90, 98, 106, 114, 122, 130, 138, 146, 154, 162, 170, 178, 186, 194, 202, - 210, 218, 226, 234, 242, 250, 3, 11, 19, 27, 35, 43, 51, 59, 67, - 75, 83, 91, 99, 107, 115, 123, 131, 139, 147, 155, 163, 171, 179, 187, - 195, 203, 211, 219, 227, 235, 243, 251, 4, 12, 20, 28, 36, 44, 52, - 60, 68, 76, 84, 92, 100, 108, 116, 124, 132, 140, 148, 156, 164, 172, - 180, 188, 196, 204, 212, 220, 228, 236, 244, 252, 5, 13, 21, 29, 37, - 45, 53, 61, 69, 77, 85, 93, 101, 109, 117, 125, 133, 141, 149, 157, - 165, 173, 181, 189, 197, 205, 213, 221, 229, 237, 245, 253, 6, 14, 22, - 30, 38, 46, 54, 62, 70, 78, 86, 94, 102, 110, 118, 126, 134, 142, - 150, 158, 166, 174, 182, 190, 198, 206, 214, 222, 230, 238, 246, 254, 7, - 15, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 111, 119, 127, - 135, 143, 151, 159, 167, 175, 183, 191, 199, 207, 215, 223, 231, 239, 247, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, mcol_scan_32x8[256]) = { - 0, 32, 64, 96, 128, 160, 192, 224, 1, 33, 65, 97, 129, 161, 193, 225, - 2, 34, 66, 98, 130, 162, 194, 226, 3, 35, 67, 99, 131, 163, 195, 227, - 4, 36, 68, 100, 132, 164, 196, 228, 5, 37, 69, 101, 133, 165, 197, 229, - 6, 38, 70, 102, 134, 166, 198, 230, 7, 39, 71, 103, 135, 167, 199, 231, - 8, 40, 72, 104, 136, 168, 200, 232, 9, 41, 73, 105, 137, 169, 201, 233, - 10, 42, 74, 106, 138, 170, 202, 234, 11, 43, 75, 107, 139, 171, 203, 235, - 12, 44, 76, 108, 140, 172, 204, 236, 13, 45, 77, 109, 141, 173, 205, 237, - 14, 46, 78, 110, 142, 174, 206, 238, 15, 47, 79, 111, 143, 175, 207, 239, - 16, 48, 80, 112, 144, 176, 208, 240, 17, 49, 81, 113, 145, 177, 209, 241, - 18, 50, 82, 114, 146, 178, 210, 242, 19, 51, 83, 115, 147, 179, 211, 243, - 20, 52, 84, 116, 148, 180, 212, 244, 21, 53, 85, 117, 149, 181, 213, 245, - 22, 54, 86, 118, 150, 182, 214, 246, 23, 55, 87, 119, 151, 183, 215, 247, - 24, 56, 88, 120, 152, 184, 216, 248, 25, 57, 89, 121, 153, 185, 217, 249, - 26, 58, 90, 122, 154, 186, 218, 250, 27, 59, 91, 123, 155, 187, 219, 251, - 28, 60, 92, 124, 156, 188, 220, 252, 29, 61, 93, 125, 157, 189, 221, 253, - 30, 62, 94, 126, 158, 190, 222, 254, 31, 63, 95, 127, 159, 191, 223, 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8[64]) = { - 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, - 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, - 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, - 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 -}; - -DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x8[64]) = { - 0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57, - 2, 10, 18, 26, 34, 42, 50, 58, 3, 11, 19, 27, 35, 43, 51, 59, - 4, 12, 20, 28, 36, 44, 52, 60, 5, 13, 21, 29, 37, 45, 53, 61, - 6, 14, 22, 30, 38, 46, 54, 62, 7, 15, 23, 31, 39, 47, 55, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x8[64]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_8x16[128]) = { - 0, 1, 8, 2, 9, 16, 3, 10, 17, 24, 4, 11, 18, 25, 32, - 5, 12, 19, 26, 33, 40, 6, 13, 20, 27, 34, 41, 48, 7, 14, - 21, 28, 35, 42, 49, 56, 15, 22, 29, 36, 43, 50, 57, 64, 23, - 30, 37, 44, 51, 58, 65, 72, 31, 38, 45, 52, 59, 66, 73, 80, - 39, 46, 53, 60, 67, 74, 81, 88, 47, 54, 61, 68, 75, 82, 89, - 96, 55, 62, 69, 76, 83, 90, 97, 104, 63, 70, 77, 84, 91, 98, - 105, 112, 71, 78, 85, 92, 99, 106, 113, 120, 79, 86, 93, 100, 107, - 114, 121, 87, 94, 101, 108, 115, 122, 95, 102, 109, 116, 123, 103, 110, - 117, 124, 111, 118, 125, 119, 126, 127, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_16x8[128]) = { - 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 49, 34, 19, 4, 80, - 65, 50, 35, 20, 5, 96, 81, 66, 51, 36, 21, 6, 112, 97, 82, 67, - 52, 37, 22, 7, 113, 98, 83, 68, 53, 38, 23, 8, 114, 99, 84, 69, - 54, 39, 24, 9, 115, 100, 85, 70, 55, 40, 25, 10, 116, 101, 86, 71, - 56, 41, 26, 11, 117, 102, 87, 72, 57, 42, 27, 12, 118, 103, 88, 73, - 58, 43, 28, 13, 119, 104, 89, 74, 59, 44, 29, 14, 120, 105, 90, 75, - 60, 45, 30, 15, 121, 106, 91, 76, 61, 46, 31, 122, 107, 92, 77, 62, - 47, 123, 108, 93, 78, 63, 124, 109, 94, 79, 125, 110, 95, 126, 111, 127, -}; - -DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x16[128]) = { - 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, - 1, 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97, 105, 113, 121, - 2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122, - 3, 11, 19, 27, 35, 43, 51, 59, 67, 75, 83, 91, 99, 107, 115, 123, - 4, 12, 20, 28, 36, 44, 52, 60, 68, 76, 84, 92, 100, 108, 116, 124, - 5, 13, 21, 29, 37, 45, 53, 61, 69, 77, 85, 93, 101, 109, 117, 125, - 6, 14, 22, 30, 38, 46, 54, 62, 70, 78, 86, 94, 102, 110, 118, 126, - 7, 15, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 111, 119, 127, -}; - -DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x8[128]) = { - 0, 16, 32, 48, 64, 80, 96, 112, 1, 17, 33, 49, 65, 81, 97, 113, - 2, 18, 34, 50, 66, 82, 98, 114, 3, 19, 35, 51, 67, 83, 99, 115, - 4, 20, 36, 52, 68, 84, 100, 116, 5, 21, 37, 53, 69, 85, 101, 117, - 6, 22, 38, 54, 70, 86, 102, 118, 7, 23, 39, 55, 71, 87, 103, 119, - 8, 24, 40, 56, 72, 88, 104, 120, 9, 25, 41, 57, 73, 89, 105, 121, - 10, 26, 42, 58, 74, 90, 106, 122, 11, 27, 43, 59, 75, 91, 107, 123, - 12, 28, 44, 60, 76, 92, 108, 124, 13, 29, 45, 61, 77, 93, 109, 125, - 14, 30, 46, 62, 78, 94, 110, 126, 15, 31, 47, 63, 79, 95, 111, 127, -}; - -DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x16[128]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, -}; - -DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x8[128]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_16x32[512]) = { - 0, 1, 16, 2, 17, 32, 3, 18, 33, 48, 4, 19, 34, 49, 64, - 5, 20, 35, 50, 65, 80, 6, 21, 36, 51, 66, 81, 96, 7, 22, - 37, 52, 67, 82, 97, 112, 8, 23, 38, 53, 68, 83, 98, 113, 128, - 9, 24, 39, 54, 69, 84, 99, 114, 129, 144, 10, 25, 40, 55, 70, - 85, 100, 115, 130, 145, 160, 11, 26, 41, 56, 71, 86, 101, 116, 131, - 146, 161, 176, 12, 27, 42, 57, 72, 87, 102, 117, 132, 147, 162, 177, - 192, 13, 28, 43, 58, 73, 88, 103, 118, 133, 148, 163, 178, 193, 208, - 14, 29, 44, 59, 74, 89, 104, 119, 134, 149, 164, 179, 194, 209, 224, - 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225, - 240, 31, 46, 61, 76, 91, 106, 121, 136, 151, 166, 181, 196, 211, 226, - 241, 256, 47, 62, 77, 92, 107, 122, 137, 152, 167, 182, 197, 212, 227, - 242, 257, 272, 63, 78, 93, 108, 123, 138, 153, 168, 183, 198, 213, 228, - 243, 258, 273, 288, 79, 94, 109, 124, 139, 154, 169, 184, 199, 214, 229, - 244, 259, 274, 289, 304, 95, 110, 125, 140, 155, 170, 185, 200, 215, 230, - 245, 260, 275, 290, 305, 320, 111, 126, 141, 156, 171, 186, 201, 216, 231, - 246, 261, 276, 291, 306, 321, 336, 127, 142, 157, 172, 187, 202, 217, 232, - 247, 262, 277, 292, 307, 322, 337, 352, 143, 158, 173, 188, 203, 218, 233, - 248, 263, 278, 293, 308, 323, 338, 353, 368, 159, 174, 189, 204, 219, 234, - 249, 264, 279, 294, 309, 324, 339, 354, 369, 384, 175, 190, 205, 220, 235, - 250, 265, 280, 295, 310, 325, 340, 355, 370, 385, 400, 191, 206, 221, 236, - 251, 266, 281, 296, 311, 326, 341, 356, 371, 386, 401, 416, 207, 222, 237, - 252, 267, 282, 297, 312, 327, 342, 357, 372, 387, 402, 417, 432, 223, 238, - 253, 268, 283, 298, 313, 328, 343, 358, 373, 388, 403, 418, 433, 448, 239, - 254, 269, 284, 299, 314, 329, 344, 359, 374, 389, 404, 419, 434, 449, 464, - 255, 270, 285, 300, 315, 330, 345, 360, 375, 390, 405, 420, 435, 450, 465, - 480, 271, 286, 301, 316, 331, 346, 361, 376, 391, 406, 421, 436, 451, 466, - 481, 496, 287, 302, 317, 332, 347, 362, 377, 392, 407, 422, 437, 452, 467, - 482, 497, 303, 318, 333, 348, 363, 378, 393, 408, 423, 438, 453, 468, 483, - 498, 319, 334, 349, 364, 379, 394, 409, 424, 439, 454, 469, 484, 499, 335, - 350, 365, 380, 395, 410, 425, 440, 455, 470, 485, 500, 351, 366, 381, 396, - 411, 426, 441, 456, 471, 486, 501, 367, 382, 397, 412, 427, 442, 457, 472, - 487, 502, 383, 398, 413, 428, 443, 458, 473, 488, 503, 399, 414, 429, 444, - 459, 474, 489, 504, 415, 430, 445, 460, 475, 490, 505, 431, 446, 461, 476, - 491, 506, 447, 462, 477, 492, 507, 463, 478, 493, 508, 479, 494, 509, 495, - 510, 511, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_32x16[512]) = { - 0, 32, 1, 64, 33, 2, 96, 65, 34, 3, 128, 97, 66, 35, 4, - 160, 129, 98, 67, 36, 5, 192, 161, 130, 99, 68, 37, 6, 224, 193, - 162, 131, 100, 69, 38, 7, 256, 225, 194, 163, 132, 101, 70, 39, 8, - 288, 257, 226, 195, 164, 133, 102, 71, 40, 9, 320, 289, 258, 227, 196, - 165, 134, 103, 72, 41, 10, 352, 321, 290, 259, 228, 197, 166, 135, 104, - 73, 42, 11, 384, 353, 322, 291, 260, 229, 198, 167, 136, 105, 74, 43, - 12, 416, 385, 354, 323, 292, 261, 230, 199, 168, 137, 106, 75, 44, 13, - 448, 417, 386, 355, 324, 293, 262, 231, 200, 169, 138, 107, 76, 45, 14, - 480, 449, 418, 387, 356, 325, 294, 263, 232, 201, 170, 139, 108, 77, 46, - 15, 481, 450, 419, 388, 357, 326, 295, 264, 233, 202, 171, 140, 109, 78, - 47, 16, 482, 451, 420, 389, 358, 327, 296, 265, 234, 203, 172, 141, 110, - 79, 48, 17, 483, 452, 421, 390, 359, 328, 297, 266, 235, 204, 173, 142, - 111, 80, 49, 18, 484, 453, 422, 391, 360, 329, 298, 267, 236, 205, 174, - 143, 112, 81, 50, 19, 485, 454, 423, 392, 361, 330, 299, 268, 237, 206, - 175, 144, 113, 82, 51, 20, 486, 455, 424, 393, 362, 331, 300, 269, 238, - 207, 176, 145, 114, 83, 52, 21, 487, 456, 425, 394, 363, 332, 301, 270, - 239, 208, 177, 146, 115, 84, 53, 22, 488, 457, 426, 395, 364, 333, 302, - 271, 240, 209, 178, 147, 116, 85, 54, 23, 489, 458, 427, 396, 365, 334, - 303, 272, 241, 210, 179, 148, 117, 86, 55, 24, 490, 459, 428, 397, 366, - 335, 304, 273, 242, 211, 180, 149, 118, 87, 56, 25, 491, 460, 429, 398, - 367, 336, 305, 274, 243, 212, 181, 150, 119, 88, 57, 26, 492, 461, 430, - 399, 368, 337, 306, 275, 244, 213, 182, 151, 120, 89, 58, 27, 493, 462, - 431, 400, 369, 338, 307, 276, 245, 214, 183, 152, 121, 90, 59, 28, 494, - 463, 432, 401, 370, 339, 308, 277, 246, 215, 184, 153, 122, 91, 60, 29, - 495, 464, 433, 402, 371, 340, 309, 278, 247, 216, 185, 154, 123, 92, 61, - 30, 496, 465, 434, 403, 372, 341, 310, 279, 248, 217, 186, 155, 124, 93, - 62, 31, 497, 466, 435, 404, 373, 342, 311, 280, 249, 218, 187, 156, 125, - 94, 63, 498, 467, 436, 405, 374, 343, 312, 281, 250, 219, 188, 157, 126, - 95, 499, 468, 437, 406, 375, 344, 313, 282, 251, 220, 189, 158, 127, 500, - 469, 438, 407, 376, 345, 314, 283, 252, 221, 190, 159, 501, 470, 439, 408, - 377, 346, 315, 284, 253, 222, 191, 502, 471, 440, 409, 378, 347, 316, 285, - 254, 223, 503, 472, 441, 410, 379, 348, 317, 286, 255, 504, 473, 442, 411, - 380, 349, 318, 287, 505, 474, 443, 412, 381, 350, 319, 506, 475, 444, 413, - 382, 351, 507, 476, 445, 414, 383, 508, 477, 446, 415, 509, 478, 447, 510, - 479, 511, -}; - -DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x32[512]) = { - 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, - 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 416, 432, 448, 464, - 480, 496, 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193, - 209, 225, 241, 257, 273, 289, 305, 321, 337, 353, 369, 385, 401, 417, 433, - 449, 465, 481, 497, 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162, - 178, 194, 210, 226, 242, 258, 274, 290, 306, 322, 338, 354, 370, 386, 402, - 418, 434, 450, 466, 482, 498, 3, 19, 35, 51, 67, 83, 99, 115, 131, - 147, 163, 179, 195, 211, 227, 243, 259, 275, 291, 307, 323, 339, 355, 371, - 387, 403, 419, 435, 451, 467, 483, 499, 4, 20, 36, 52, 68, 84, 100, - 116, 132, 148, 164, 180, 196, 212, 228, 244, 260, 276, 292, 308, 324, 340, - 356, 372, 388, 404, 420, 436, 452, 468, 484, 500, 5, 21, 37, 53, 69, - 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245, 261, 277, 293, 309, - 325, 341, 357, 373, 389, 405, 421, 437, 453, 469, 485, 501, 6, 22, 38, - 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246, 262, 278, - 294, 310, 326, 342, 358, 374, 390, 406, 422, 438, 454, 470, 486, 502, 7, - 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247, - 263, 279, 295, 311, 327, 343, 359, 375, 391, 407, 423, 439, 455, 471, 487, - 503, 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, - 232, 248, 264, 280, 296, 312, 328, 344, 360, 376, 392, 408, 424, 440, 456, - 472, 488, 504, 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185, - 201, 217, 233, 249, 265, 281, 297, 313, 329, 345, 361, 377, 393, 409, 425, - 441, 457, 473, 489, 505, 10, 26, 42, 58, 74, 90, 106, 122, 138, 154, - 170, 186, 202, 218, 234, 250, 266, 282, 298, 314, 330, 346, 362, 378, 394, - 410, 426, 442, 458, 474, 490, 506, 11, 27, 43, 59, 75, 91, 107, 123, - 139, 155, 171, 187, 203, 219, 235, 251, 267, 283, 299, 315, 331, 347, 363, - 379, 395, 411, 427, 443, 459, 475, 491, 507, 12, 28, 44, 60, 76, 92, - 108, 124, 140, 156, 172, 188, 204, 220, 236, 252, 268, 284, 300, 316, 332, - 348, 364, 380, 396, 412, 428, 444, 460, 476, 492, 508, 13, 29, 45, 61, - 77, 93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253, 269, 285, 301, - 317, 333, 349, 365, 381, 397, 413, 429, 445, 461, 477, 493, 509, 14, 30, - 46, 62, 78, 94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254, 270, - 286, 302, 318, 334, 350, 366, 382, 398, 414, 430, 446, 462, 478, 494, 510, - 15, 31, 47, 63, 79, 95, 111, 127, 143, 159, 175, 191, 207, 223, 239, - 255, 271, 287, 303, 319, 335, 351, 367, 383, 399, 415, 431, 447, 463, 479, - 495, 511, -}; - -DECLARE_ALIGNED(16, static const int16_t, mcol_scan_32x16[512]) = { - 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 480, - 1, 33, 65, 97, 129, 161, 193, 225, 257, 289, 321, 353, 385, 417, 449, 481, - 2, 34, 66, 98, 130, 162, 194, 226, 258, 290, 322, 354, 386, 418, 450, 482, - 3, 35, 67, 99, 131, 163, 195, 227, 259, 291, 323, 355, 387, 419, 451, 483, - 4, 36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356, 388, 420, 452, 484, - 5, 37, 69, 101, 133, 165, 197, 229, 261, 293, 325, 357, 389, 421, 453, 485, - 6, 38, 70, 102, 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, 454, 486, - 7, 39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423, 455, 487, - 8, 40, 72, 104, 136, 168, 200, 232, 264, 296, 328, 360, 392, 424, 456, 488, - 9, 41, 73, 105, 137, 169, 201, 233, 265, 297, 329, 361, 393, 425, 457, 489, - 10, 42, 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, 426, 458, 490, - 11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363, 395, 427, 459, 491, - 12, 44, 76, 108, 140, 172, 204, 236, 268, 300, 332, 364, 396, 428, 460, 492, - 13, 45, 77, 109, 141, 173, 205, 237, 269, 301, 333, 365, 397, 429, 461, 493, - 14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, 430, 462, 494, - 15, 47, 79, 111, 143, 175, 207, 239, 271, 303, 335, 367, 399, 431, 463, 495, - 16, 48, 80, 112, 144, 176, 208, 240, 272, 304, 336, 368, 400, 432, 464, 496, - 17, 49, 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, 433, 465, 497, - 18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370, 402, 434, 466, 498, - 19, 51, 83, 115, 147, 179, 211, 243, 275, 307, 339, 371, 403, 435, 467, 499, - 20, 52, 84, 116, 148, 180, 212, 244, 276, 308, 340, 372, 404, 436, 468, 500, - 21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, 437, 469, 501, - 22, 54, 86, 118, 150, 182, 214, 246, 278, 310, 342, 374, 406, 438, 470, 502, - 23, 55, 87, 119, 151, 183, 215, 247, 279, 311, 343, 375, 407, 439, 471, 503, - 24, 56, 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, 440, 472, 504, - 25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377, 409, 441, 473, 505, - 26, 58, 90, 122, 154, 186, 218, 250, 282, 314, 346, 378, 410, 442, 474, 506, - 27, 59, 91, 123, 155, 187, 219, 251, 283, 315, 347, 379, 411, 443, 475, 507, - 28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, 444, 476, 508, - 29, 61, 93, 125, 157, 189, 221, 253, 285, 317, 349, 381, 413, 445, 477, 509, - 30, 62, 94, 126, 158, 190, 222, 254, 286, 318, 350, 382, 414, 446, 478, 510, - 31, 63, 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, 447, 479, 511, -}; - -DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x32[512]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, - 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, - 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, - 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, - 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, - 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, - 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, - 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, - 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, - 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, - 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, - 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, - 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, - 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, - 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, - 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, - 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, - 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, - 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, - 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, - 510, 511, -}; - -DECLARE_ALIGNED(16, static const int16_t, mrow_scan_32x16[512]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, - 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, - 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, - 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, - 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, - 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, - 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, - 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, - 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, - 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, - 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, - 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, - 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, - 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, - 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, - 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, - 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, - 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, - 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, - 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, - 510, 511, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16[256]) = { - 0, 1, 16, 32, 17, 2, 3, 18, 33, 48, 64, 49, 34, 19, 4, - 5, 20, 35, 50, 65, 80, 96, 81, 66, 51, 36, 21, 6, 7, 22, - 37, 52, 67, 82, 97, 112, 128, 113, 98, 83, 68, 53, 38, 23, 8, - 9, 24, 39, 54, 69, 84, 99, 114, 129, 144, 160, 145, 130, 115, 100, - 85, 70, 55, 40, 25, 10, 11, 26, 41, 56, 71, 86, 101, 116, 131, - 146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87, 72, 57, 42, 27, - 12, 13, 28, 43, 58, 73, 88, 103, 118, 133, 148, 163, 178, 193, 208, - 224, 209, 194, 179, 164, 149, 134, 119, 104, 89, 74, 59, 44, 29, 14, - 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225, - 240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91, 76, 61, 46, - 31, 47, 62, 77, 92, 107, 122, 137, 152, 167, 182, 197, 212, 227, 242, - 243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93, 78, 63, 79, 94, - 109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185, - 170, 155, 140, 125, 110, 95, 111, 126, 141, 156, 171, 186, 201, 216, 231, - 246, 247, 232, 217, 202, 187, 172, 157, 142, 127, 143, 158, 173, 188, 203, - 218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235, - 250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254, - 255 -}; - -DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x16[256]) = { - 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, - 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193, 209, 225, 241, - 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162, 178, 194, 210, 226, 242, - 3, 19, 35, 51, 67, 83, 99, 115, 131, 147, 163, 179, 195, 211, 227, 243, - 4, 20, 36, 52, 68, 84, 100, 116, 132, 148, 164, 180, 196, 212, 228, 244, - 5, 21, 37, 53, 69, 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245, - 6, 22, 38, 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246, - 7, 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247, - 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, 232, 248, - 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185, 201, 217, 233, 249, - 10, 26, 42, 58, 74, 90, 106, 122, 138, 154, 170, 186, 202, 218, 234, 250, - 11, 27, 43, 59, 75, 91, 107, 123, 139, 155, 171, 187, 203, 219, 235, 251, - 12, 28, 44, 60, 76, 92, 108, 124, 140, 156, 172, 188, 204, 220, 236, 252, - 13, 29, 45, 61, 77, 93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253, - 14, 30, 46, 62, 78, 94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254, - 15, 31, 47, 63, 79, 95, 111, 127, 143, 159, 175, 191, 207, 223, 239, 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x16[256]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, - 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, - 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, - 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, - 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, mcol_scan_32x32[1024]) = { - 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, - 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, - 896, 928, 960, 992, 1, 33, 65, 97, 129, 161, 193, 225, 257, 289, - 321, 353, 385, 417, 449, 481, 513, 545, 577, 609, 641, 673, 705, 737, - 769, 801, 833, 865, 897, 929, 961, 993, 2, 34, 66, 98, 130, 162, - 194, 226, 258, 290, 322, 354, 386, 418, 450, 482, 514, 546, 578, 610, - 642, 674, 706, 738, 770, 802, 834, 866, 898, 930, 962, 994, 3, 35, - 67, 99, 131, 163, 195, 227, 259, 291, 323, 355, 387, 419, 451, 483, - 515, 547, 579, 611, 643, 675, 707, 739, 771, 803, 835, 867, 899, 931, - 963, 995, 4, 36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356, - 388, 420, 452, 484, 516, 548, 580, 612, 644, 676, 708, 740, 772, 804, - 836, 868, 900, 932, 964, 996, 5, 37, 69, 101, 133, 165, 197, 229, - 261, 293, 325, 357, 389, 421, 453, 485, 517, 549, 581, 613, 645, 677, - 709, 741, 773, 805, 837, 869, 901, 933, 965, 997, 6, 38, 70, 102, - 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, 454, 486, 518, 550, - 582, 614, 646, 678, 710, 742, 774, 806, 838, 870, 902, 934, 966, 998, - 7, 39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423, - 455, 487, 519, 551, 583, 615, 647, 679, 711, 743, 775, 807, 839, 871, - 903, 935, 967, 999, 8, 40, 72, 104, 136, 168, 200, 232, 264, 296, - 328, 360, 392, 424, 456, 488, 520, 552, 584, 616, 648, 680, 712, 744, - 776, 808, 840, 872, 904, 936, 968, 1000, 9, 41, 73, 105, 137, 169, - 201, 233, 265, 297, 329, 361, 393, 425, 457, 489, 521, 553, 585, 617, - 649, 681, 713, 745, 777, 809, 841, 873, 905, 937, 969, 1001, 10, 42, - 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, 426, 458, 490, - 522, 554, 586, 618, 650, 682, 714, 746, 778, 810, 842, 874, 906, 938, - 970, 1002, 11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363, - 395, 427, 459, 491, 523, 555, 587, 619, 651, 683, 715, 747, 779, 811, - 843, 875, 907, 939, 971, 1003, 12, 44, 76, 108, 140, 172, 204, 236, - 268, 300, 332, 364, 396, 428, 460, 492, 524, 556, 588, 620, 652, 684, - 716, 748, 780, 812, 844, 876, 908, 940, 972, 1004, 13, 45, 77, 109, - 141, 173, 205, 237, 269, 301, 333, 365, 397, 429, 461, 493, 525, 557, - 589, 621, 653, 685, 717, 749, 781, 813, 845, 877, 909, 941, 973, 1005, - 14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, 430, - 462, 494, 526, 558, 590, 622, 654, 686, 718, 750, 782, 814, 846, 878, - 910, 942, 974, 1006, 15, 47, 79, 111, 143, 175, 207, 239, 271, 303, - 335, 367, 399, 431, 463, 495, 527, 559, 591, 623, 655, 687, 719, 751, - 783, 815, 847, 879, 911, 943, 975, 1007, 16, 48, 80, 112, 144, 176, - 208, 240, 272, 304, 336, 368, 400, 432, 464, 496, 528, 560, 592, 624, - 656, 688, 720, 752, 784, 816, 848, 880, 912, 944, 976, 1008, 17, 49, - 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, 433, 465, 497, - 529, 561, 593, 625, 657, 689, 721, 753, 785, 817, 849, 881, 913, 945, - 977, 1009, 18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370, - 402, 434, 466, 498, 530, 562, 594, 626, 658, 690, 722, 754, 786, 818, - 850, 882, 914, 946, 978, 1010, 19, 51, 83, 115, 147, 179, 211, 243, - 275, 307, 339, 371, 403, 435, 467, 499, 531, 563, 595, 627, 659, 691, - 723, 755, 787, 819, 851, 883, 915, 947, 979, 1011, 20, 52, 84, 116, - 148, 180, 212, 244, 276, 308, 340, 372, 404, 436, 468, 500, 532, 564, - 596, 628, 660, 692, 724, 756, 788, 820, 852, 884, 916, 948, 980, 1012, - 21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, 437, - 469, 501, 533, 565, 597, 629, 661, 693, 725, 757, 789, 821, 853, 885, - 917, 949, 981, 1013, 22, 54, 86, 118, 150, 182, 214, 246, 278, 310, - 342, 374, 406, 438, 470, 502, 534, 566, 598, 630, 662, 694, 726, 758, - 790, 822, 854, 886, 918, 950, 982, 1014, 23, 55, 87, 119, 151, 183, - 215, 247, 279, 311, 343, 375, 407, 439, 471, 503, 535, 567, 599, 631, - 663, 695, 727, 759, 791, 823, 855, 887, 919, 951, 983, 1015, 24, 56, - 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, 440, 472, 504, - 536, 568, 600, 632, 664, 696, 728, 760, 792, 824, 856, 888, 920, 952, - 984, 1016, 25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377, - 409, 441, 473, 505, 537, 569, 601, 633, 665, 697, 729, 761, 793, 825, - 857, 889, 921, 953, 985, 1017, 26, 58, 90, 122, 154, 186, 218, 250, - 282, 314, 346, 378, 410, 442, 474, 506, 538, 570, 602, 634, 666, 698, - 730, 762, 794, 826, 858, 890, 922, 954, 986, 1018, 27, 59, 91, 123, - 155, 187, 219, 251, 283, 315, 347, 379, 411, 443, 475, 507, 539, 571, - 603, 635, 667, 699, 731, 763, 795, 827, 859, 891, 923, 955, 987, 1019, - 28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, 444, - 476, 508, 540, 572, 604, 636, 668, 700, 732, 764, 796, 828, 860, 892, - 924, 956, 988, 1020, 29, 61, 93, 125, 157, 189, 221, 253, 285, 317, - 349, 381, 413, 445, 477, 509, 541, 573, 605, 637, 669, 701, 733, 765, - 797, 829, 861, 893, 925, 957, 989, 1021, 30, 62, 94, 126, 158, 190, - 222, 254, 286, 318, 350, 382, 414, 446, 478, 510, 542, 574, 606, 638, - 670, 702, 734, 766, 798, 830, 862, 894, 926, 958, 990, 1022, 31, 63, - 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, 447, 479, 511, - 543, 575, 607, 639, 671, 703, 735, 767, 799, 831, 863, 895, 927, 959, - 991, 1023, -}; - -DECLARE_ALIGNED(16, static const int16_t, mrow_scan_32x32[1024]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, - 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, - 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, - 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, - 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, - 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, - 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, - 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, - 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, - 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, - 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, - 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, - 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, - 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, - 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, - 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, - 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, - 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, - 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, - 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, - 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, - 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, - 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, - 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, - 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, - 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, - 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, - 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, - 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, - 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, - 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, - 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, - 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, - 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, - 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, - 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, - 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, - 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, - 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, - 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, - 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, - 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, - 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, - 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, - 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, - 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, - 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, - 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, - 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, - 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, - 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, - 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, - 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, - 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, - 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, - 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, - 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, - 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, - 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, - 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, - 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, - 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, - 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, - 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, - 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, - 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, - 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, - 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, - 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, - 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, - 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, - 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = { - 0, 1, 32, 64, 33, 2, 3, 34, 65, 96, 128, 97, 66, - 35, 4, 5, 36, 67, 98, 129, 160, 192, 161, 130, 99, 68, - 37, 6, 7, 38, 69, 100, 131, 162, 193, 224, 256, 225, 194, - 163, 132, 101, 70, 39, 8, 9, 40, 71, 102, 133, 164, 195, - 226, 257, 288, 320, 289, 258, 227, 196, 165, 134, 103, 72, 41, - 10, 11, 42, 73, 104, 135, 166, 197, 228, 259, 290, 321, 352, - 384, 353, 322, 291, 260, 229, 198, 167, 136, 105, 74, 43, 12, - 13, 44, 75, 106, 137, 168, 199, 230, 261, 292, 323, 354, 385, - 416, 448, 417, 386, 355, 324, 293, 262, 231, 200, 169, 138, 107, - 76, 45, 14, 15, 46, 77, 108, 139, 170, 201, 232, 263, 294, - 325, 356, 387, 418, 449, 480, 512, 481, 450, 419, 388, 357, 326, - 295, 264, 233, 202, 171, 140, 109, 78, 47, 16, 17, 48, 79, - 110, 141, 172, 203, 234, 265, 296, 327, 358, 389, 420, 451, 482, - 513, 544, 576, 545, 514, 483, 452, 421, 390, 359, 328, 297, 266, - 235, 204, 173, 142, 111, 80, 49, 18, 19, 50, 81, 112, 143, - 174, 205, 236, 267, 298, 329, 360, 391, 422, 453, 484, 515, 546, - 577, 608, 640, 609, 578, 547, 516, 485, 454, 423, 392, 361, 330, - 299, 268, 237, 206, 175, 144, 113, 82, 51, 20, 21, 52, 83, - 114, 145, 176, 207, 238, 269, 300, 331, 362, 393, 424, 455, 486, - 517, 548, 579, 610, 641, 672, 704, 673, 642, 611, 580, 549, 518, - 487, 456, 425, 394, 363, 332, 301, 270, 239, 208, 177, 146, 115, - 84, 53, 22, 23, 54, 85, 116, 147, 178, 209, 240, 271, 302, - 333, 364, 395, 426, 457, 488, 519, 550, 581, 612, 643, 674, 705, - 736, 768, 737, 706, 675, 644, 613, 582, 551, 520, 489, 458, 427, - 396, 365, 334, 303, 272, 241, 210, 179, 148, 117, 86, 55, 24, - 25, 56, 87, 118, 149, 180, 211, 242, 273, 304, 335, 366, 397, - 428, 459, 490, 521, 552, 583, 614, 645, 676, 707, 738, 769, 800, - 832, 801, 770, 739, 708, 677, 646, 615, 584, 553, 522, 491, 460, - 429, 398, 367, 336, 305, 274, 243, 212, 181, 150, 119, 88, 57, - 26, 27, 58, 89, 120, 151, 182, 213, 244, 275, 306, 337, 368, - 399, 430, 461, 492, 523, 554, 585, 616, 647, 678, 709, 740, 771, - 802, 833, 864, 896, 865, 834, 803, 772, 741, 710, 679, 648, 617, - 586, 555, 524, 493, 462, 431, 400, 369, 338, 307, 276, 245, 214, - 183, 152, 121, 90, 59, 28, 29, 60, 91, 122, 153, 184, 215, - 246, 277, 308, 339, 370, 401, 432, 463, 494, 525, 556, 587, 618, - 649, 680, 711, 742, 773, 804, 835, 866, 897, 928, 960, 929, 898, - 867, 836, 805, 774, 743, 712, 681, 650, 619, 588, 557, 526, 495, - 464, 433, 402, 371, 340, 309, 278, 247, 216, 185, 154, 123, 92, - 61, 30, 31, 62, 93, 124, 155, 186, 217, 248, 279, 310, 341, - 372, 403, 434, 465, 496, 527, 558, 589, 620, 651, 682, 713, 744, - 775, 806, 837, 868, 899, 930, 961, 992, 993, 962, 931, 900, 869, - 838, 807, 776, 745, 714, 683, 652, 621, 590, 559, 528, 497, 466, - 435, 404, 373, 342, 311, 280, 249, 218, 187, 156, 125, 94, 63, - 95, 126, 157, 188, 219, 250, 281, 312, 343, 374, 405, 436, 467, - 498, 529, 560, 591, 622, 653, 684, 715, 746, 777, 808, 839, 870, - 901, 932, 963, 994, 995, 964, 933, 902, 871, 840, 809, 778, 747, - 716, 685, 654, 623, 592, 561, 530, 499, 468, 437, 406, 375, 344, - 313, 282, 251, 220, 189, 158, 127, 159, 190, 221, 252, 283, 314, - 345, 376, 407, 438, 469, 500, 531, 562, 593, 624, 655, 686, 717, - 748, 779, 810, 841, 872, 903, 934, 965, 996, 997, 966, 935, 904, - 873, 842, 811, 780, 749, 718, 687, 656, 625, 594, 563, 532, 501, - 470, 439, 408, 377, 346, 315, 284, 253, 222, 191, 223, 254, 285, - 316, 347, 378, 409, 440, 471, 502, 533, 564, 595, 626, 657, 688, - 719, 750, 781, 812, 843, 874, 905, 936, 967, 998, 999, 968, 937, - 906, 875, 844, 813, 782, 751, 720, 689, 658, 627, 596, 565, 534, - 503, 472, 441, 410, 379, 348, 317, 286, 255, 287, 318, 349, 380, - 411, 442, 473, 504, 535, 566, 597, 628, 659, 690, 721, 752, 783, - 814, 845, 876, 907, 938, 969, 1000, 1001, 970, 939, 908, 877, 846, - 815, 784, 753, 722, 691, 660, 629, 598, 567, 536, 505, 474, 443, - 412, 381, 350, 319, 351, 382, 413, 444, 475, 506, 537, 568, 599, - 630, 661, 692, 723, 754, 785, 816, 847, 878, 909, 940, 971, 1002, - 1003, 972, 941, 910, 879, 848, 817, 786, 755, 724, 693, 662, 631, - 600, 569, 538, 507, 476, 445, 414, 383, 415, 446, 477, 508, 539, - 570, 601, 632, 663, 694, 725, 756, 787, 818, 849, 880, 911, 942, - 973, 1004, 1005, 974, 943, 912, 881, 850, 819, 788, 757, 726, 695, - 664, 633, 602, 571, 540, 509, 478, 447, 479, 510, 541, 572, 603, - 634, 665, 696, 727, 758, 789, 820, 851, 882, 913, 944, 975, 1006, - 1007, 976, 945, 914, 883, 852, 821, 790, 759, 728, 697, 666, 635, - 604, 573, 542, 511, 543, 574, 605, 636, 667, 698, 729, 760, 791, - 822, 853, 884, 915, 946, 977, 1008, 1009, 978, 947, 916, 885, 854, - 823, 792, 761, 730, 699, 668, 637, 606, 575, 607, 638, 669, 700, - 731, 762, 793, 824, 855, 886, 917, 948, 979, 1010, 1011, 980, 949, - 918, 887, 856, 825, 794, 763, 732, 701, 670, 639, 671, 702, 733, - 764, 795, 826, 857, 888, 919, 950, 981, 1012, 1013, 982, 951, 920, - 889, 858, 827, 796, 765, 734, 703, 735, 766, 797, 828, 859, 890, - 921, 952, 983, 1014, 1015, 984, 953, 922, 891, 860, 829, 798, 767, - 799, 830, 861, 892, 923, 954, 985, 1016, 1017, 986, 955, 924, 893, - 862, 831, 863, 894, 925, 956, 987, 1018, 1019, 988, 957, 926, 895, - 927, 958, 989, 1020, 1021, 990, 959, 991, 1022, 1023 -}; - -// Neighborhood 2-tuples for various scans and blocksizes, -// in {top, left} order for each position in corresponding scan order. -DECLARE_ALIGNED(16, static const int16_t, - default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 4, 4, 1, 4, 1, 1, 2, 2, 2, 5, 5, - 8, 8, 8, 9, 12, 6, 9, 3, 6, 7, 10, 10, 13, 11, 14, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mcol_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 4, 4, 8, 8, 0, 0, 1, 4, 5, 8, 9, 12, 1, - 1, 2, 5, 6, 9, 10, 13, 2, 2, 3, 6, 7, 10, 11, 14, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - mrow_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, 4, 2, 5, 3, 6, 4, - 4, 5, 8, 6, 9, 7, 10, 8, 8, 9, 12, 10, 13, 11, 14, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 4, 1, 1, 4, 4, 2, 5, 5, 8, 6, - 9, 2, 2, 8, 8, 3, 6, 9, 12, 7, 10, 10, 13, 12, 12, 13, 16, - 11, 14, 14, 17, 15, 18, 16, 16, 17, 20, 18, 21, 19, 22, 20, 20, 21, - 24, 22, 25, 23, 26, 24, 24, 25, 28, 26, 29, 27, 30, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mcol_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 4, 4, 8, 8, 12, 12, 16, 16, 20, 20, 24, 24, 0, - 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 1, 1, - 2, 5, 6, 9, 10, 13, 14, 17, 18, 21, 22, 25, 26, 29, 2, 2, 3, - 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mrow_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, 4, 2, 5, 3, 6, 4, - 4, 5, 8, 6, 9, 7, 10, 8, 8, 9, 12, 10, 13, 11, 14, 12, 12, - 13, 16, 14, 17, 15, 18, 16, 16, 17, 20, 18, 21, 19, 22, 20, 20, 21, - 24, 22, 25, 23, 26, 24, 24, 25, 28, 26, 29, 27, 30, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 8, 1, 1, 8, 8, 2, 9, 9, 16, 10, - 17, 2, 2, 16, 16, 3, 10, 17, 24, 11, 18, 18, 25, 3, 3, 4, 11, - 19, 26, 12, 19, 4, 4, 20, 27, 5, 12, 13, 20, 21, 28, 5, 5, 6, - 13, 14, 21, 22, 29, 6, 6, 7, 14, 15, 22, 23, 30, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mcol_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 8, 8, 16, 16, 0, 0, 1, 8, 9, 16, 17, 24, 1, - 1, 2, 9, 10, 17, 18, 25, 2, 2, 3, 10, 11, 18, 19, 26, 3, 3, - 4, 11, 12, 19, 20, 27, 4, 4, 5, 12, 13, 20, 21, 28, 5, 5, 6, - 13, 14, 21, 22, 29, 6, 6, 7, 14, 15, 22, 23, 30, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mrow_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 0, - 0, 1, 8, 2, 9, 3, 10, 4, 11, 5, 12, 6, 13, 7, 14, 8, 8, - 9, 16, 10, 17, 11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 16, 17, - 24, 18, 25, 19, 26, 20, 27, 21, 28, 22, 29, 23, 30, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_4x16_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 4, 1, 1, 4, 4, 2, 5, 5, 8, 6, 9, 2, - 2, 8, 8, 3, 6, 9, 12, 7, 10, 10, 13, 12, 12, 13, 16, 11, 14, 14, 17, - 15, 18, 16, 16, 17, 20, 18, 21, 19, 22, 20, 20, 21, 24, 22, 25, 23, 26, 24, - 24, 25, 28, 26, 29, 27, 30, 28, 28, 29, 32, 30, 33, 31, 34, 32, 32, 33, 36, - 34, 37, 35, 38, 36, 36, 37, 40, 38, 41, 39, 42, 40, 40, 41, 44, 42, 45, 43, - 46, 44, 44, 45, 48, 46, 49, 47, 50, 48, 48, 49, 52, 50, 53, 51, 54, 52, 52, - 53, 56, 54, 57, 55, 58, 56, 56, 57, 60, 58, 61, 59, 62, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_16x4_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 16, 1, 1, 16, 16, 2, 17, 17, 32, 18, 33, 2, - 2, 32, 32, 3, 18, 33, 48, 19, 34, 34, 49, 3, 3, 4, 19, 35, 50, 20, 35, - 4, 4, 36, 51, 5, 20, 21, 36, 37, 52, 5, 5, 6, 21, 22, 37, 38, 53, 6, - 6, 7, 22, 23, 38, 39, 54, 7, 7, 8, 23, 24, 39, 40, 55, 8, 8, 9, 24, - 25, 40, 41, 56, 9, 9, 10, 25, 26, 41, 42, 57, 10, 10, 11, 26, 27, 42, 43, - 58, 11, 11, 12, 27, 28, 43, 44, 59, 12, 12, 13, 28, 29, 44, 45, 60, 13, 13, - 14, 29, 30, 45, 46, 61, 14, 14, 15, 30, 31, 46, 47, 62, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mrow_scan_4x16_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, 4, 2, 5, 3, 6, 4, 4, 5, - 8, 6, 9, 7, 10, 8, 8, 9, 12, 10, 13, 11, 14, 12, 12, 13, 16, 14, 17, - 15, 18, 16, 16, 17, 20, 18, 21, 19, 22, 20, 20, 21, 24, 22, 25, 23, 26, 24, - 24, 25, 28, 26, 29, 27, 30, 28, 28, 29, 32, 30, 33, 31, 34, 32, 32, 33, 36, - 34, 37, 35, 38, 36, 36, 37, 40, 38, 41, 39, 42, 40, 40, 41, 44, 42, 45, 43, - 46, 44, 44, 45, 48, 46, 49, 47, 50, 48, 48, 49, 52, 50, 53, 51, 54, 52, 52, - 53, 56, 54, 57, 55, 58, 56, 56, 57, 60, 58, 61, 59, 62, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mrow_scan_16x4_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, - 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 0, 0, 1, 16, 2, 17, - 3, 18, 4, 19, 5, 20, 6, 21, 7, 22, 8, 23, 9, 24, 10, 25, 11, 26, 12, - 27, 13, 28, 14, 29, 15, 30, 16, 16, 17, 32, 18, 33, 19, 34, 20, 35, 21, 36, - 22, 37, 23, 38, 24, 39, 25, 40, 26, 41, 27, 42, 28, 43, 29, 44, 30, 45, 31, - 46, 32, 32, 33, 48, 34, 49, 35, 50, 36, 51, 37, 52, 38, 53, 39, 54, 40, 55, - 41, 56, 42, 57, 43, 58, 44, 59, 45, 60, 46, 61, 47, 62, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mcol_scan_4x16_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 4, 4, 8, 8, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28, 32, - 32, 36, 36, 40, 40, 44, 44, 48, 48, 52, 52, 56, 56, 0, 0, 1, 4, 5, 8, - 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29, 32, 33, 36, 37, 40, 41, 44, 45, - 48, 49, 52, 53, 56, 57, 60, 1, 1, 2, 5, 6, 9, 10, 13, 14, 17, 18, 21, - 22, 25, 26, 29, 30, 33, 34, 37, 38, 41, 42, 45, 46, 49, 50, 53, 54, 57, 58, - 61, 2, 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31, 34, - 35, 38, 39, 42, 43, 46, 47, 50, 51, 54, 55, 58, 59, 62, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mcol_scan_16x4_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 16, 16, 32, 32, 0, 0, 1, 16, 17, 32, 33, 48, 1, 1, 2, - 17, 18, 33, 34, 49, 2, 2, 3, 18, 19, 34, 35, 50, 3, 3, 4, 19, 20, 35, - 36, 51, 4, 4, 5, 20, 21, 36, 37, 52, 5, 5, 6, 21, 22, 37, 38, 53, 6, - 6, 7, 22, 23, 38, 39, 54, 7, 7, 8, 23, 24, 39, 40, 55, 8, 8, 9, 24, - 25, 40, 41, 56, 9, 9, 10, 25, 26, 41, 42, 57, 10, 10, 11, 26, 27, 42, 43, - 58, 11, 11, 12, 27, 28, 43, 44, 59, 12, 12, 13, 28, 29, 44, 45, 60, 13, 13, - 14, 29, 30, 45, 46, 61, 14, 14, 15, 30, 31, 46, 47, 62, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_8x32_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 1, 1, 8, 8, 8, 2, 2, 2, - 9, 9, 16, 16, 16, 3, 3, 3, 10, 10, 17, 17, 24, 24, 24, - 4, 4, 4, 11, 11, 18, 18, 25, 25, 32, 32, 32, 5, 5, 5, - 12, 12, 19, 19, 26, 26, 33, 33, 40, 40, 40, 6, 6, 6, 13, - 13, 20, 20, 27, 27, 34, 34, 41, 41, 48, 48, 48, 7, 14, 14, - 21, 21, 28, 28, 35, 35, 42, 42, 49, 49, 56, 56, 56, 15, 22, - 22, 29, 29, 36, 36, 43, 43, 50, 50, 57, 57, 64, 64, 64, 23, - 30, 30, 37, 37, 44, 44, 51, 51, 58, 58, 65, 65, 72, 72, 72, - 31, 38, 38, 45, 45, 52, 52, 59, 59, 66, 66, 73, 73, 80, 80, - 80, 39, 46, 46, 53, 53, 60, 60, 67, 67, 74, 74, 81, 81, 88, - 88, 88, 47, 54, 54, 61, 61, 68, 68, 75, 75, 82, 82, 89, 89, - 96, 96, 96, 55, 62, 62, 69, 69, 76, 76, 83, 83, 90, 90, 97, - 97, 104, 104, 104, 63, 70, 70, 77, 77, 84, 84, 91, 91, 98, 98, - 105, 105, 112, 112, 112, 71, 78, 78, 85, 85, 92, 92, 99, 99, 106, - 106, 113, 113, 120, 120, 120, 79, 86, 86, 93, 93, 100, 100, 107, 107, - 114, 114, 121, 121, 128, 128, 128, 87, 94, 94, 101, 101, 108, 108, 115, - 115, 122, 122, 129, 129, 136, 136, 136, 95, 102, 102, 109, 109, 116, 116, - 123, 123, 130, 130, 137, 137, 144, 144, 144, 103, 110, 110, 117, 117, 124, - 124, 131, 131, 138, 138, 145, 145, 152, 152, 152, 111, 118, 118, 125, 125, - 132, 132, 139, 139, 146, 146, 153, 153, 160, 160, 160, 119, 126, 126, 133, - 133, 140, 140, 147, 147, 154, 154, 161, 161, 168, 168, 168, 127, 134, 134, - 141, 141, 148, 148, 155, 155, 162, 162, 169, 169, 176, 176, 176, 135, 142, - 142, 149, 149, 156, 156, 163, 163, 170, 170, 177, 177, 184, 184, 184, 143, - 150, 150, 157, 157, 164, 164, 171, 171, 178, 178, 185, 185, 192, 192, 192, - 151, 158, 158, 165, 165, 172, 172, 179, 179, 186, 186, 193, 193, 200, 200, - 200, 159, 166, 166, 173, 173, 180, 180, 187, 187, 194, 194, 201, 201, 208, - 208, 208, 167, 174, 174, 181, 181, 188, 188, 195, 195, 202, 202, 209, 209, - 216, 216, 216, 175, 182, 182, 189, 189, 196, 196, 203, 203, 210, 210, 217, - 217, 224, 224, 224, 183, 190, 190, 197, 197, 204, 204, 211, 211, 218, 218, - 225, 225, 232, 232, 232, 191, 198, 198, 205, 205, 212, 212, 219, 219, 226, - 226, 233, 233, 240, 240, 240, 199, 206, 206, 213, 213, 220, 220, 227, 227, - 234, 234, 241, 241, 248, 207, 214, 214, 221, 221, 228, 228, 235, 235, 242, - 242, 249, 215, 222, 222, 229, 229, 236, 236, 243, 243, 250, 223, 230, 230, - 237, 237, 244, 244, 251, 231, 238, 238, 245, 245, 252, 239, 246, 246, 253, - 247, 254, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_32x8_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 1, 1, 32, 32, 32, 2, 2, 2, - 33, 33, 64, 64, 64, 3, 3, 3, 34, 34, 65, 65, 96, 96, 96, - 4, 4, 4, 35, 35, 66, 66, 97, 97, 128, 128, 128, 5, 5, 5, - 36, 36, 67, 67, 98, 98, 129, 129, 160, 160, 160, 6, 6, 6, 37, - 37, 68, 68, 99, 99, 130, 130, 161, 161, 192, 192, 192, 7, 7, 7, - 38, 38, 69, 69, 100, 100, 131, 131, 162, 162, 193, 193, 224, 8, 8, - 8, 39, 39, 70, 70, 101, 101, 132, 132, 163, 163, 194, 194, 225, 9, - 9, 9, 40, 40, 71, 71, 102, 102, 133, 133, 164, 164, 195, 195, 226, - 10, 10, 10, 41, 41, 72, 72, 103, 103, 134, 134, 165, 165, 196, 196, - 227, 11, 11, 11, 42, 42, 73, 73, 104, 104, 135, 135, 166, 166, 197, - 197, 228, 12, 12, 12, 43, 43, 74, 74, 105, 105, 136, 136, 167, 167, - 198, 198, 229, 13, 13, 13, 44, 44, 75, 75, 106, 106, 137, 137, 168, - 168, 199, 199, 230, 14, 14, 14, 45, 45, 76, 76, 107, 107, 138, 138, - 169, 169, 200, 200, 231, 15, 15, 15, 46, 46, 77, 77, 108, 108, 139, - 139, 170, 170, 201, 201, 232, 16, 16, 16, 47, 47, 78, 78, 109, 109, - 140, 140, 171, 171, 202, 202, 233, 17, 17, 17, 48, 48, 79, 79, 110, - 110, 141, 141, 172, 172, 203, 203, 234, 18, 18, 18, 49, 49, 80, 80, - 111, 111, 142, 142, 173, 173, 204, 204, 235, 19, 19, 19, 50, 50, 81, - 81, 112, 112, 143, 143, 174, 174, 205, 205, 236, 20, 20, 20, 51, 51, - 82, 82, 113, 113, 144, 144, 175, 175, 206, 206, 237, 21, 21, 21, 52, - 52, 83, 83, 114, 114, 145, 145, 176, 176, 207, 207, 238, 22, 22, 22, - 53, 53, 84, 84, 115, 115, 146, 146, 177, 177, 208, 208, 239, 23, 23, - 23, 54, 54, 85, 85, 116, 116, 147, 147, 178, 178, 209, 209, 240, 24, - 24, 24, 55, 55, 86, 86, 117, 117, 148, 148, 179, 179, 210, 210, 241, - 25, 25, 25, 56, 56, 87, 87, 118, 118, 149, 149, 180, 180, 211, 211, - 242, 26, 26, 26, 57, 57, 88, 88, 119, 119, 150, 150, 181, 181, 212, - 212, 243, 27, 27, 27, 58, 58, 89, 89, 120, 120, 151, 151, 182, 182, - 213, 213, 244, 28, 28, 28, 59, 59, 90, 90, 121, 121, 152, 152, 183, - 183, 214, 214, 245, 29, 29, 29, 60, 60, 91, 91, 122, 122, 153, 153, - 184, 184, 215, 215, 246, 30, 30, 30, 61, 61, 92, 92, 123, 123, 154, - 154, 185, 185, 216, 216, 247, 31, 62, 62, 93, 93, 124, 124, 155, 155, - 186, 186, 217, 217, 248, 63, 94, 94, 125, 125, 156, 156, 187, 187, 218, - 218, 249, 95, 126, 126, 157, 157, 188, 188, 219, 219, 250, 127, 158, 158, - 189, 189, 220, 220, 251, 159, 190, 190, 221, 221, 252, 191, 222, 222, 253, - 223, 254, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mrow_scan_8x32_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, - 6, 0, 0, 1, 8, 2, 9, 3, 10, 4, 11, 5, 12, 6, 13, - 7, 14, 8, 8, 9, 16, 10, 17, 11, 18, 12, 19, 13, 20, 14, - 21, 15, 22, 16, 16, 17, 24, 18, 25, 19, 26, 20, 27, 21, 28, - 22, 29, 23, 30, 24, 24, 25, 32, 26, 33, 27, 34, 28, 35, 29, - 36, 30, 37, 31, 38, 32, 32, 33, 40, 34, 41, 35, 42, 36, 43, - 37, 44, 38, 45, 39, 46, 40, 40, 41, 48, 42, 49, 43, 50, 44, - 51, 45, 52, 46, 53, 47, 54, 48, 48, 49, 56, 50, 57, 51, 58, - 52, 59, 53, 60, 54, 61, 55, 62, 56, 56, 57, 64, 58, 65, 59, - 66, 60, 67, 61, 68, 62, 69, 63, 70, 64, 64, 65, 72, 66, 73, - 67, 74, 68, 75, 69, 76, 70, 77, 71, 78, 72, 72, 73, 80, 74, - 81, 75, 82, 76, 83, 77, 84, 78, 85, 79, 86, 80, 80, 81, 88, - 82, 89, 83, 90, 84, 91, 85, 92, 86, 93, 87, 94, 88, 88, 89, - 96, 90, 97, 91, 98, 92, 99, 93, 100, 94, 101, 95, 102, 96, 96, - 97, 104, 98, 105, 99, 106, 100, 107, 101, 108, 102, 109, 103, 110, 104, - 104, 105, 112, 106, 113, 107, 114, 108, 115, 109, 116, 110, 117, 111, 118, - 112, 112, 113, 120, 114, 121, 115, 122, 116, 123, 117, 124, 118, 125, 119, - 126, 120, 120, 121, 128, 122, 129, 123, 130, 124, 131, 125, 132, 126, 133, - 127, 134, 128, 128, 129, 136, 130, 137, 131, 138, 132, 139, 133, 140, 134, - 141, 135, 142, 136, 136, 137, 144, 138, 145, 139, 146, 140, 147, 141, 148, - 142, 149, 143, 150, 144, 144, 145, 152, 146, 153, 147, 154, 148, 155, 149, - 156, 150, 157, 151, 158, 152, 152, 153, 160, 154, 161, 155, 162, 156, 163, - 157, 164, 158, 165, 159, 166, 160, 160, 161, 168, 162, 169, 163, 170, 164, - 171, 165, 172, 166, 173, 167, 174, 168, 168, 169, 176, 170, 177, 171, 178, - 172, 179, 173, 180, 174, 181, 175, 182, 176, 176, 177, 184, 178, 185, 179, - 186, 180, 187, 181, 188, 182, 189, 183, 190, 184, 184, 185, 192, 186, 193, - 187, 194, 188, 195, 189, 196, 190, 197, 191, 198, 192, 192, 193, 200, 194, - 201, 195, 202, 196, 203, 197, 204, 198, 205, 199, 206, 200, 200, 201, 208, - 202, 209, 203, 210, 204, 211, 205, 212, 206, 213, 207, 214, 208, 208, 209, - 216, 210, 217, 211, 218, 212, 219, 213, 220, 214, 221, 215, 222, 216, 216, - 217, 224, 218, 225, 219, 226, 220, 227, 221, 228, 222, 229, 223, 230, 224, - 224, 225, 232, 226, 233, 227, 234, 228, 235, 229, 236, 230, 237, 231, 238, - 232, 232, 233, 240, 234, 241, 235, 242, 236, 243, 237, 244, 238, 245, 239, - 246, 240, 240, 241, 248, 242, 249, 243, 250, 244, 251, 245, 252, 246, 253, - 247, 254, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mrow_scan_32x8_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, - 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, - 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, - 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, - 29, 29, 30, 30, 0, 0, 1, 32, 2, 33, 3, 34, 4, 35, 5, - 36, 6, 37, 7, 38, 8, 39, 9, 40, 10, 41, 11, 42, 12, 43, - 13, 44, 14, 45, 15, 46, 16, 47, 17, 48, 18, 49, 19, 50, 20, - 51, 21, 52, 22, 53, 23, 54, 24, 55, 25, 56, 26, 57, 27, 58, - 28, 59, 29, 60, 30, 61, 31, 62, 32, 32, 33, 64, 34, 65, 35, - 66, 36, 67, 37, 68, 38, 69, 39, 70, 40, 71, 41, 72, 42, 73, - 43, 74, 44, 75, 45, 76, 46, 77, 47, 78, 48, 79, 49, 80, 50, - 81, 51, 82, 52, 83, 53, 84, 54, 85, 55, 86, 56, 87, 57, 88, - 58, 89, 59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 64, 64, 65, - 96, 66, 97, 67, 98, 68, 99, 69, 100, 70, 101, 71, 102, 72, 103, - 73, 104, 74, 105, 75, 106, 76, 107, 77, 108, 78, 109, 79, 110, 80, - 111, 81, 112, 82, 113, 83, 114, 84, 115, 85, 116, 86, 117, 87, 118, - 88, 119, 89, 120, 90, 121, 91, 122, 92, 123, 93, 124, 94, 125, 95, - 126, 96, 96, 97, 128, 98, 129, 99, 130, 100, 131, 101, 132, 102, 133, - 103, 134, 104, 135, 105, 136, 106, 137, 107, 138, 108, 139, 109, 140, 110, - 141, 111, 142, 112, 143, 113, 144, 114, 145, 115, 146, 116, 147, 117, 148, - 118, 149, 119, 150, 120, 151, 121, 152, 122, 153, 123, 154, 124, 155, 125, - 156, 126, 157, 127, 158, 128, 128, 129, 160, 130, 161, 131, 162, 132, 163, - 133, 164, 134, 165, 135, 166, 136, 167, 137, 168, 138, 169, 139, 170, 140, - 171, 141, 172, 142, 173, 143, 174, 144, 175, 145, 176, 146, 177, 147, 178, - 148, 179, 149, 180, 150, 181, 151, 182, 152, 183, 153, 184, 154, 185, 155, - 186, 156, 187, 157, 188, 158, 189, 159, 190, 160, 160, 161, 192, 162, 193, - 163, 194, 164, 195, 165, 196, 166, 197, 167, 198, 168, 199, 169, 200, 170, - 201, 171, 202, 172, 203, 173, 204, 174, 205, 175, 206, 176, 207, 177, 208, - 178, 209, 179, 210, 180, 211, 181, 212, 182, 213, 183, 214, 184, 215, 185, - 216, 186, 217, 187, 218, 188, 219, 189, 220, 190, 221, 191, 222, 192, 192, - 193, 224, 194, 225, 195, 226, 196, 227, 197, 228, 198, 229, 199, 230, 200, - 231, 201, 232, 202, 233, 203, 234, 204, 235, 205, 236, 206, 237, 207, 238, - 208, 239, 209, 240, 210, 241, 211, 242, 212, 243, 213, 244, 214, 245, 215, - 246, 216, 247, 217, 248, 218, 249, 219, 250, 220, 251, 221, 252, 222, 253, - 223, 254, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mcol_scan_8x32_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 8, 8, 16, 16, 24, 24, 32, 32, 40, 40, 48, - 48, 56, 56, 64, 64, 72, 72, 80, 80, 88, 88, 96, 96, 104, 104, - 112, 112, 120, 120, 128, 128, 136, 136, 144, 144, 152, 152, 160, 160, 168, - 168, 176, 176, 184, 184, 192, 192, 200, 200, 208, 208, 216, 216, 224, 224, - 232, 232, 240, 240, 0, 0, 1, 8, 9, 16, 17, 24, 25, 32, 33, - 40, 41, 48, 49, 56, 57, 64, 65, 72, 73, 80, 81, 88, 89, 96, - 97, 104, 105, 112, 113, 120, 121, 128, 129, 136, 137, 144, 145, 152, 153, - 160, 161, 168, 169, 176, 177, 184, 185, 192, 193, 200, 201, 208, 209, 216, - 217, 224, 225, 232, 233, 240, 241, 248, 1, 1, 2, 9, 10, 17, 18, - 25, 26, 33, 34, 41, 42, 49, 50, 57, 58, 65, 66, 73, 74, 81, - 82, 89, 90, 97, 98, 105, 106, 113, 114, 121, 122, 129, 130, 137, 138, - 145, 146, 153, 154, 161, 162, 169, 170, 177, 178, 185, 186, 193, 194, 201, - 202, 209, 210, 217, 218, 225, 226, 233, 234, 241, 242, 249, 2, 2, 3, - 10, 11, 18, 19, 26, 27, 34, 35, 42, 43, 50, 51, 58, 59, 66, - 67, 74, 75, 82, 83, 90, 91, 98, 99, 106, 107, 114, 115, 122, 123, - 130, 131, 138, 139, 146, 147, 154, 155, 162, 163, 170, 171, 178, 179, 186, - 187, 194, 195, 202, 203, 210, 211, 218, 219, 226, 227, 234, 235, 242, 243, - 250, 3, 3, 4, 11, 12, 19, 20, 27, 28, 35, 36, 43, 44, 51, - 52, 59, 60, 67, 68, 75, 76, 83, 84, 91, 92, 99, 100, 107, 108, - 115, 116, 123, 124, 131, 132, 139, 140, 147, 148, 155, 156, 163, 164, 171, - 172, 179, 180, 187, 188, 195, 196, 203, 204, 211, 212, 219, 220, 227, 228, - 235, 236, 243, 244, 251, 4, 4, 5, 12, 13, 20, 21, 28, 29, 36, - 37, 44, 45, 52, 53, 60, 61, 68, 69, 76, 77, 84, 85, 92, 93, - 100, 101, 108, 109, 116, 117, 124, 125, 132, 133, 140, 141, 148, 149, 156, - 157, 164, 165, 172, 173, 180, 181, 188, 189, 196, 197, 204, 205, 212, 213, - 220, 221, 228, 229, 236, 237, 244, 245, 252, 5, 5, 6, 13, 14, 21, - 22, 29, 30, 37, 38, 45, 46, 53, 54, 61, 62, 69, 70, 77, 78, - 85, 86, 93, 94, 101, 102, 109, 110, 117, 118, 125, 126, 133, 134, 141, - 142, 149, 150, 157, 158, 165, 166, 173, 174, 181, 182, 189, 190, 197, 198, - 205, 206, 213, 214, 221, 222, 229, 230, 237, 238, 245, 246, 253, 6, 6, - 7, 14, 15, 22, 23, 30, 31, 38, 39, 46, 47, 54, 55, 62, 63, - 70, 71, 78, 79, 86, 87, 94, 95, 102, 103, 110, 111, 118, 119, 126, - 127, 134, 135, 142, 143, 150, 151, 158, 159, 166, 167, 174, 175, 182, 183, - 190, 191, 198, 199, 206, 207, 214, 215, 222, 223, 230, 231, 238, 239, 246, - 247, 254, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mcol_scan_32x8_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 32, 32, 64, 64, 96, 96, 128, 128, 160, 160, 192, 192, - 0, 0, 1, 32, 33, 64, 65, 96, 97, 128, 129, 160, 161, 192, 193, 224, - 1, 1, 2, 33, 34, 65, 66, 97, 98, 129, 130, 161, 162, 193, 194, 225, - 2, 2, 3, 34, 35, 66, 67, 98, 99, 130, 131, 162, 163, 194, 195, 226, - 3, 3, 4, 35, 36, 67, 68, 99, 100, 131, 132, 163, 164, 195, 196, 227, - 4, 4, 5, 36, 37, 68, 69, 100, 101, 132, 133, 164, 165, 196, 197, 228, - 5, 5, 6, 37, 38, 69, 70, 101, 102, 133, 134, 165, 166, 197, 198, 229, - 6, 6, 7, 38, 39, 70, 71, 102, 103, 134, 135, 166, 167, 198, 199, 230, - 7, 7, 8, 39, 40, 71, 72, 103, 104, 135, 136, 167, 168, 199, 200, 231, - 8, 8, 9, 40, 41, 72, 73, 104, 105, 136, 137, 168, 169, 200, 201, 232, - 9, 9, 10, 41, 42, 73, 74, 105, 106, 137, 138, 169, 170, 201, 202, 233, - 10, 10, 11, 42, 43, 74, 75, 106, 107, 138, 139, 170, 171, 202, 203, 234, - 11, 11, 12, 43, 44, 75, 76, 107, 108, 139, 140, 171, 172, 203, 204, 235, - 12, 12, 13, 44, 45, 76, 77, 108, 109, 140, 141, 172, 173, 204, 205, 236, - 13, 13, 14, 45, 46, 77, 78, 109, 110, 141, 142, 173, 174, 205, 206, 237, - 14, 14, 15, 46, 47, 78, 79, 110, 111, 142, 143, 174, 175, 206, 207, 238, - 15, 15, 16, 47, 48, 79, 80, 111, 112, 143, 144, 175, 176, 207, 208, 239, - 16, 16, 17, 48, 49, 80, 81, 112, 113, 144, 145, 176, 177, 208, 209, 240, - 17, 17, 18, 49, 50, 81, 82, 113, 114, 145, 146, 177, 178, 209, 210, 241, - 18, 18, 19, 50, 51, 82, 83, 114, 115, 146, 147, 178, 179, 210, 211, 242, - 19, 19, 20, 51, 52, 83, 84, 115, 116, 147, 148, 179, 180, 211, 212, 243, - 20, 20, 21, 52, 53, 84, 85, 116, 117, 148, 149, 180, 181, 212, 213, 244, - 21, 21, 22, 53, 54, 85, 86, 117, 118, 149, 150, 181, 182, 213, 214, 245, - 22, 22, 23, 54, 55, 86, 87, 118, 119, 150, 151, 182, 183, 214, 215, 246, - 23, 23, 24, 55, 56, 87, 88, 119, 120, 151, 152, 183, 184, 215, 216, 247, - 24, 24, 25, 56, 57, 88, 89, 120, 121, 152, 153, 184, 185, 216, 217, 248, - 25, 25, 26, 57, 58, 89, 90, 121, 122, 153, 154, 185, 186, 217, 218, 249, - 26, 26, 27, 58, 59, 90, 91, 122, 123, 154, 155, 186, 187, 218, 219, 250, - 27, 27, 28, 59, 60, 91, 92, 123, 124, 155, 156, 187, 188, 219, 220, 251, - 28, 28, 29, 60, 61, 92, 93, 124, 125, 156, 157, 188, 189, 220, 221, 252, - 29, 29, 30, 61, 62, 93, 94, 125, 126, 157, 158, 189, 190, 221, 222, 253, - 30, 30, 31, 62, 63, 94, 95, 126, 127, 158, 159, 190, 191, 222, 223, 254, - 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mcol_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 8, 8, 16, 16, 24, 24, 32, 32, 40, 40, 48, 48, 0, 0, 1, - 8, 9, 16, 17, 24, 25, 32, 33, 40, 41, 48, 49, 56, 1, 1, 2, 9, 10, 17, - 18, 25, 26, 33, 34, 41, 42, 49, 50, 57, 2, 2, 3, 10, 11, 18, 19, 26, 27, - 34, 35, 42, 43, 50, 51, 58, 3, 3, 4, 11, 12, 19, 20, 27, 28, 35, 36, 43, - 44, 51, 52, 59, 4, 4, 5, 12, 13, 20, 21, 28, 29, 36, 37, 44, 45, 52, 53, - 60, 5, 5, 6, 13, 14, 21, 22, 29, 30, 37, 38, 45, 46, 53, 54, 61, 6, 6, - 7, 14, 15, 22, 23, 30, 31, 38, 39, 46, 47, 54, 55, 62, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - mrow_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 0, 0, 1, - 8, 2, 9, 3, 10, 4, 11, 5, 12, 6, 13, 7, 14, 8, 8, 9, 16, 10, 17, - 11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 16, 17, 24, 18, 25, 19, 26, 20, - 27, 21, 28, 22, 29, 23, 30, 24, 24, 25, 32, 26, 33, 27, 34, 28, 35, 29, 36, - 30, 37, 31, 38, 32, 32, 33, 40, 34, 41, 35, 42, 36, 43, 37, 44, 38, 45, 39, - 46, 40, 40, 41, 48, 42, 49, 43, 50, 44, 51, 45, 52, 46, 53, 47, 54, 48, 48, - 49, 56, 50, 57, 51, 58, 52, 59, 53, 60, 54, 61, 55, 62, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 8, 8, 1, 8, 1, 1, 2, 2, 2, 9, 9, 16, 16, - 16, 24, 24, 17, 24, 10, 17, 3, 10, 3, 3, 4, 4, 4, 11, 11, 18, 18, 25, - 25, 32, 32, 32, 40, 40, 33, 40, 26, 33, 19, 26, 12, 19, 5, 12, 5, 5, 6, - 6, 6, 13, 13, 20, 20, 27, 27, 34, 34, 41, 41, 48, 48, 48, 49, 56, 42, 49, - 35, 42, 28, 35, 21, 28, 14, 21, 7, 14, 15, 22, 22, 29, 29, 36, 36, 43, 43, - 50, 50, 57, 51, 58, 44, 51, 37, 44, 30, 37, 23, 30, 31, 38, 38, 45, 45, 52, - 52, 59, 53, 60, 46, 53, 39, 46, 47, 54, 54, 61, 55, 62, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_8x16_neighbors[129 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 1, 1, 8, 8, 8, 2, 2, 2, - 9, 9, 16, 16, 16, 3, 3, 3, 10, 10, 17, 17, 24, 24, 24, - 4, 4, 4, 11, 11, 18, 18, 25, 25, 32, 32, 32, 5, 5, 5, - 12, 12, 19, 19, 26, 26, 33, 33, 40, 40, 40, 6, 6, 6, 13, - 13, 20, 20, 27, 27, 34, 34, 41, 41, 48, 48, 48, 7, 14, 14, - 21, 21, 28, 28, 35, 35, 42, 42, 49, 49, 56, 56, 56, 15, 22, - 22, 29, 29, 36, 36, 43, 43, 50, 50, 57, 57, 64, 64, 64, 23, - 30, 30, 37, 37, 44, 44, 51, 51, 58, 58, 65, 65, 72, 72, 72, - 31, 38, 38, 45, 45, 52, 52, 59, 59, 66, 66, 73, 73, 80, 80, - 80, 39, 46, 46, 53, 53, 60, 60, 67, 67, 74, 74, 81, 81, 88, - 88, 88, 47, 54, 54, 61, 61, 68, 68, 75, 75, 82, 82, 89, 89, - 96, 96, 96, 55, 62, 62, 69, 69, 76, 76, 83, 83, 90, 90, 97, - 97, 104, 104, 104, 63, 70, 70, 77, 77, 84, 84, 91, 91, 98, 98, - 105, 105, 112, 112, 112, 71, 78, 78, 85, 85, 92, 92, 99, 99, 106, - 106, 113, 113, 120, 79, 86, 86, 93, 93, 100, 100, 107, 107, 114, 114, - 121, 87, 94, 94, 101, 101, 108, 108, 115, 115, 122, 95, 102, 102, 109, - 109, 116, 116, 123, 103, 110, 110, 117, 117, 124, 111, 118, 118, 125, 119, - 126, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_16x8_neighbors[129 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 1, 1, 16, 16, 16, 2, 2, 2, - 17, 17, 32, 32, 32, 3, 3, 3, 18, 18, 33, 33, 48, 48, 48, - 4, 4, 4, 19, 19, 34, 34, 49, 49, 64, 64, 64, 5, 5, 5, - 20, 20, 35, 35, 50, 50, 65, 65, 80, 80, 80, 6, 6, 6, 21, - 21, 36, 36, 51, 51, 66, 66, 81, 81, 96, 96, 96, 7, 7, 7, - 22, 22, 37, 37, 52, 52, 67, 67, 82, 82, 97, 97, 112, 8, 8, - 8, 23, 23, 38, 38, 53, 53, 68, 68, 83, 83, 98, 98, 113, 9, - 9, 9, 24, 24, 39, 39, 54, 54, 69, 69, 84, 84, 99, 99, 114, - 10, 10, 10, 25, 25, 40, 40, 55, 55, 70, 70, 85, 85, 100, 100, - 115, 11, 11, 11, 26, 26, 41, 41, 56, 56, 71, 71, 86, 86, 101, - 101, 116, 12, 12, 12, 27, 27, 42, 42, 57, 57, 72, 72, 87, 87, - 102, 102, 117, 13, 13, 13, 28, 28, 43, 43, 58, 58, 73, 73, 88, - 88, 103, 103, 118, 14, 14, 14, 29, 29, 44, 44, 59, 59, 74, 74, - 89, 89, 104, 104, 119, 15, 30, 30, 45, 45, 60, 60, 75, 75, 90, - 90, 105, 105, 120, 31, 46, 46, 61, 61, 76, 76, 91, 91, 106, 106, - 121, 47, 62, 62, 77, 77, 92, 92, 107, 107, 122, 63, 78, 78, 93, - 93, 108, 108, 123, 79, 94, 94, 109, 109, 124, 95, 110, 110, 125, 111, - 126, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mcol_scan_8x16_neighbors[129 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 8, 8, 16, 16, 24, 24, 32, 32, 40, 40, 48, 48, - 56, 56, 64, 64, 72, 72, 80, 80, 88, 88, 96, 96, 104, 104, 112, 112, - 0, 0, 1, 8, 9, 16, 17, 24, 25, 32, 33, 40, 41, 48, 49, 56, - 57, 64, 65, 72, 73, 80, 81, 88, 89, 96, 97, 104, 105, 112, 113, 120, - 1, 1, 2, 9, 10, 17, 18, 25, 26, 33, 34, 41, 42, 49, 50, 57, - 58, 65, 66, 73, 74, 81, 82, 89, 90, 97, 98, 105, 106, 113, 114, 121, - 2, 2, 3, 10, 11, 18, 19, 26, 27, 34, 35, 42, 43, 50, 51, 58, - 59, 66, 67, 74, 75, 82, 83, 90, 91, 98, 99, 106, 107, 114, 115, 122, - 3, 3, 4, 11, 12, 19, 20, 27, 28, 35, 36, 43, 44, 51, 52, 59, - 60, 67, 68, 75, 76, 83, 84, 91, 92, 99, 100, 107, 108, 115, 116, 123, - 4, 4, 5, 12, 13, 20, 21, 28, 29, 36, 37, 44, 45, 52, 53, 60, - 61, 68, 69, 76, 77, 84, 85, 92, 93, 100, 101, 108, 109, 116, 117, 124, - 5, 5, 6, 13, 14, 21, 22, 29, 30, 37, 38, 45, 46, 53, 54, 61, - 62, 69, 70, 77, 78, 85, 86, 93, 94, 101, 102, 109, 110, 117, 118, 125, - 6, 6, 7, 14, 15, 22, 23, 30, 31, 38, 39, 46, 47, 54, 55, 62, - 63, 70, 71, 78, 79, 86, 87, 94, 95, 102, 103, 110, 111, 118, 119, 126, - 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mcol_scan_16x8_neighbors[129 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 16, 16, 32, 32, 48, 48, 64, 64, 80, 80, 96, 96, - 0, 0, 1, 16, 17, 32, 33, 48, 49, 64, 65, 80, 81, 96, 97, 112, - 1, 1, 2, 17, 18, 33, 34, 49, 50, 65, 66, 81, 82, 97, 98, 113, - 2, 2, 3, 18, 19, 34, 35, 50, 51, 66, 67, 82, 83, 98, 99, 114, - 3, 3, 4, 19, 20, 35, 36, 51, 52, 67, 68, 83, 84, 99, 100, 115, - 4, 4, 5, 20, 21, 36, 37, 52, 53, 68, 69, 84, 85, 100, 101, 116, - 5, 5, 6, 21, 22, 37, 38, 53, 54, 69, 70, 85, 86, 101, 102, 117, - 6, 6, 7, 22, 23, 38, 39, 54, 55, 70, 71, 86, 87, 102, 103, 118, - 7, 7, 8, 23, 24, 39, 40, 55, 56, 71, 72, 87, 88, 103, 104, 119, - 8, 8, 9, 24, 25, 40, 41, 56, 57, 72, 73, 88, 89, 104, 105, 120, - 9, 9, 10, 25, 26, 41, 42, 57, 58, 73, 74, 89, 90, 105, 106, 121, - 10, 10, 11, 26, 27, 42, 43, 58, 59, 74, 75, 90, 91, 106, 107, 122, - 11, 11, 12, 27, 28, 43, 44, 59, 60, 75, 76, 91, 92, 107, 108, 123, - 12, 12, 13, 28, 29, 44, 45, 60, 61, 76, 77, 92, 93, 108, 109, 124, - 13, 13, 14, 29, 30, 45, 46, 61, 62, 77, 78, 93, 94, 109, 110, 125, - 14, 14, 15, 30, 31, 46, 47, 62, 63, 78, 79, 94, 95, 110, 111, 126, - 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mrow_scan_8x16_neighbors[129 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, - 6, 0, 0, 1, 8, 2, 9, 3, 10, 4, 11, 5, 12, 6, 13, - 7, 14, 8, 8, 9, 16, 10, 17, 11, 18, 12, 19, 13, 20, 14, - 21, 15, 22, 16, 16, 17, 24, 18, 25, 19, 26, 20, 27, 21, 28, - 22, 29, 23, 30, 24, 24, 25, 32, 26, 33, 27, 34, 28, 35, 29, - 36, 30, 37, 31, 38, 32, 32, 33, 40, 34, 41, 35, 42, 36, 43, - 37, 44, 38, 45, 39, 46, 40, 40, 41, 48, 42, 49, 43, 50, 44, - 51, 45, 52, 46, 53, 47, 54, 48, 48, 49, 56, 50, 57, 51, 58, - 52, 59, 53, 60, 54, 61, 55, 62, 56, 56, 57, 64, 58, 65, 59, - 66, 60, 67, 61, 68, 62, 69, 63, 70, 64, 64, 65, 72, 66, 73, - 67, 74, 68, 75, 69, 76, 70, 77, 71, 78, 72, 72, 73, 80, 74, - 81, 75, 82, 76, 83, 77, 84, 78, 85, 79, 86, 80, 80, 81, 88, - 82, 89, 83, 90, 84, 91, 85, 92, 86, 93, 87, 94, 88, 88, 89, - 96, 90, 97, 91, 98, 92, 99, 93, 100, 94, 101, 95, 102, 96, 96, - 97, 104, 98, 105, 99, 106, 100, 107, 101, 108, 102, 109, 103, 110, 104, - 104, 105, 112, 106, 113, 107, 114, 108, 115, 109, 116, 110, 117, 111, 118, - 112, 112, 113, 120, 114, 121, 115, 122, 116, 123, 117, 124, 118, 125, 119, - 126, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mrow_scan_16x8_neighbors[129 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, - 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, - 14, 14, 0, 0, 1, 16, 2, 17, 3, 18, 4, 19, 5, 20, 6, - 21, 7, 22, 8, 23, 9, 24, 10, 25, 11, 26, 12, 27, 13, 28, - 14, 29, 15, 30, 16, 16, 17, 32, 18, 33, 19, 34, 20, 35, 21, - 36, 22, 37, 23, 38, 24, 39, 25, 40, 26, 41, 27, 42, 28, 43, - 29, 44, 30, 45, 31, 46, 32, 32, 33, 48, 34, 49, 35, 50, 36, - 51, 37, 52, 38, 53, 39, 54, 40, 55, 41, 56, 42, 57, 43, 58, - 44, 59, 45, 60, 46, 61, 47, 62, 48, 48, 49, 64, 50, 65, 51, - 66, 52, 67, 53, 68, 54, 69, 55, 70, 56, 71, 57, 72, 58, 73, - 59, 74, 60, 75, 61, 76, 62, 77, 63, 78, 64, 64, 65, 80, 66, - 81, 67, 82, 68, 83, 69, 84, 70, 85, 71, 86, 72, 87, 73, 88, - 74, 89, 75, 90, 76, 91, 77, 92, 78, 93, 79, 94, 80, 80, 81, - 96, 82, 97, 83, 98, 84, 99, 85, 100, 86, 101, 87, 102, 88, 103, - 89, 104, 90, 105, 91, 106, 92, 107, 93, 108, 94, 109, 95, 110, 96, - 96, 97, 112, 98, 113, 99, 114, 100, 115, 101, 116, 102, 117, 103, 118, - 104, 119, 105, 120, 106, 121, 107, 122, 108, 123, 109, 124, 110, 125, 111, - 126, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_16x32_neighbors[513 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 1, 1, 16, 16, 16, 2, 2, 2, - 17, 17, 32, 32, 32, 3, 3, 3, 18, 18, 33, 33, 48, 48, 48, - 4, 4, 4, 19, 19, 34, 34, 49, 49, 64, 64, 64, 5, 5, 5, - 20, 20, 35, 35, 50, 50, 65, 65, 80, 80, 80, 6, 6, 6, 21, - 21, 36, 36, 51, 51, 66, 66, 81, 81, 96, 96, 96, 7, 7, 7, - 22, 22, 37, 37, 52, 52, 67, 67, 82, 82, 97, 97, 112, 112, 112, - 8, 8, 8, 23, 23, 38, 38, 53, 53, 68, 68, 83, 83, 98, 98, - 113, 113, 128, 128, 128, 9, 9, 9, 24, 24, 39, 39, 54, 54, 69, - 69, 84, 84, 99, 99, 114, 114, 129, 129, 144, 144, 144, 10, 10, 10, - 25, 25, 40, 40, 55, 55, 70, 70, 85, 85, 100, 100, 115, 115, 130, - 130, 145, 145, 160, 160, 160, 11, 11, 11, 26, 26, 41, 41, 56, 56, - 71, 71, 86, 86, 101, 101, 116, 116, 131, 131, 146, 146, 161, 161, 176, - 176, 176, 12, 12, 12, 27, 27, 42, 42, 57, 57, 72, 72, 87, 87, - 102, 102, 117, 117, 132, 132, 147, 147, 162, 162, 177, 177, 192, 192, 192, - 13, 13, 13, 28, 28, 43, 43, 58, 58, 73, 73, 88, 88, 103, 103, - 118, 118, 133, 133, 148, 148, 163, 163, 178, 178, 193, 193, 208, 208, 208, - 14, 14, 14, 29, 29, 44, 44, 59, 59, 74, 74, 89, 89, 104, 104, - 119, 119, 134, 134, 149, 149, 164, 164, 179, 179, 194, 194, 209, 209, 224, - 224, 224, 15, 30, 30, 45, 45, 60, 60, 75, 75, 90, 90, 105, 105, - 120, 120, 135, 135, 150, 150, 165, 165, 180, 180, 195, 195, 210, 210, 225, - 225, 240, 240, 240, 31, 46, 46, 61, 61, 76, 76, 91, 91, 106, 106, - 121, 121, 136, 136, 151, 151, 166, 166, 181, 181, 196, 196, 211, 211, 226, - 226, 241, 241, 256, 256, 256, 47, 62, 62, 77, 77, 92, 92, 107, 107, - 122, 122, 137, 137, 152, 152, 167, 167, 182, 182, 197, 197, 212, 212, 227, - 227, 242, 242, 257, 257, 272, 272, 272, 63, 78, 78, 93, 93, 108, 108, - 123, 123, 138, 138, 153, 153, 168, 168, 183, 183, 198, 198, 213, 213, 228, - 228, 243, 243, 258, 258, 273, 273, 288, 288, 288, 79, 94, 94, 109, 109, - 124, 124, 139, 139, 154, 154, 169, 169, 184, 184, 199, 199, 214, 214, 229, - 229, 244, 244, 259, 259, 274, 274, 289, 289, 304, 304, 304, 95, 110, 110, - 125, 125, 140, 140, 155, 155, 170, 170, 185, 185, 200, 200, 215, 215, 230, - 230, 245, 245, 260, 260, 275, 275, 290, 290, 305, 305, 320, 320, 320, 111, - 126, 126, 141, 141, 156, 156, 171, 171, 186, 186, 201, 201, 216, 216, 231, - 231, 246, 246, 261, 261, 276, 276, 291, 291, 306, 306, 321, 321, 336, 336, - 336, 127, 142, 142, 157, 157, 172, 172, 187, 187, 202, 202, 217, 217, 232, - 232, 247, 247, 262, 262, 277, 277, 292, 292, 307, 307, 322, 322, 337, 337, - 352, 352, 352, 143, 158, 158, 173, 173, 188, 188, 203, 203, 218, 218, 233, - 233, 248, 248, 263, 263, 278, 278, 293, 293, 308, 308, 323, 323, 338, 338, - 353, 353, 368, 368, 368, 159, 174, 174, 189, 189, 204, 204, 219, 219, 234, - 234, 249, 249, 264, 264, 279, 279, 294, 294, 309, 309, 324, 324, 339, 339, - 354, 354, 369, 369, 384, 384, 384, 175, 190, 190, 205, 205, 220, 220, 235, - 235, 250, 250, 265, 265, 280, 280, 295, 295, 310, 310, 325, 325, 340, 340, - 355, 355, 370, 370, 385, 385, 400, 400, 400, 191, 206, 206, 221, 221, 236, - 236, 251, 251, 266, 266, 281, 281, 296, 296, 311, 311, 326, 326, 341, 341, - 356, 356, 371, 371, 386, 386, 401, 401, 416, 416, 416, 207, 222, 222, 237, - 237, 252, 252, 267, 267, 282, 282, 297, 297, 312, 312, 327, 327, 342, 342, - 357, 357, 372, 372, 387, 387, 402, 402, 417, 417, 432, 432, 432, 223, 238, - 238, 253, 253, 268, 268, 283, 283, 298, 298, 313, 313, 328, 328, 343, 343, - 358, 358, 373, 373, 388, 388, 403, 403, 418, 418, 433, 433, 448, 448, 448, - 239, 254, 254, 269, 269, 284, 284, 299, 299, 314, 314, 329, 329, 344, 344, - 359, 359, 374, 374, 389, 389, 404, 404, 419, 419, 434, 434, 449, 449, 464, - 464, 464, 255, 270, 270, 285, 285, 300, 300, 315, 315, 330, 330, 345, 345, - 360, 360, 375, 375, 390, 390, 405, 405, 420, 420, 435, 435, 450, 450, 465, - 465, 480, 480, 480, 271, 286, 286, 301, 301, 316, 316, 331, 331, 346, 346, - 361, 361, 376, 376, 391, 391, 406, 406, 421, 421, 436, 436, 451, 451, 466, - 466, 481, 481, 496, 287, 302, 302, 317, 317, 332, 332, 347, 347, 362, 362, - 377, 377, 392, 392, 407, 407, 422, 422, 437, 437, 452, 452, 467, 467, 482, - 482, 497, 303, 318, 318, 333, 333, 348, 348, 363, 363, 378, 378, 393, 393, - 408, 408, 423, 423, 438, 438, 453, 453, 468, 468, 483, 483, 498, 319, 334, - 334, 349, 349, 364, 364, 379, 379, 394, 394, 409, 409, 424, 424, 439, 439, - 454, 454, 469, 469, 484, 484, 499, 335, 350, 350, 365, 365, 380, 380, 395, - 395, 410, 410, 425, 425, 440, 440, 455, 455, 470, 470, 485, 485, 500, 351, - 366, 366, 381, 381, 396, 396, 411, 411, 426, 426, 441, 441, 456, 456, 471, - 471, 486, 486, 501, 367, 382, 382, 397, 397, 412, 412, 427, 427, 442, 442, - 457, 457, 472, 472, 487, 487, 502, 383, 398, 398, 413, 413, 428, 428, 443, - 443, 458, 458, 473, 473, 488, 488, 503, 399, 414, 414, 429, 429, 444, 444, - 459, 459, 474, 474, 489, 489, 504, 415, 430, 430, 445, 445, 460, 460, 475, - 475, 490, 490, 505, 431, 446, 446, 461, 461, 476, 476, 491, 491, 506, 447, - 462, 462, 477, 477, 492, 492, 507, 463, 478, 478, 493, 493, 508, 479, 494, - 494, 509, 495, 510, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_32x16_neighbors[513 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 1, 1, 32, 32, 32, 2, 2, 2, - 33, 33, 64, 64, 64, 3, 3, 3, 34, 34, 65, 65, 96, 96, 96, - 4, 4, 4, 35, 35, 66, 66, 97, 97, 128, 128, 128, 5, 5, 5, - 36, 36, 67, 67, 98, 98, 129, 129, 160, 160, 160, 6, 6, 6, 37, - 37, 68, 68, 99, 99, 130, 130, 161, 161, 192, 192, 192, 7, 7, 7, - 38, 38, 69, 69, 100, 100, 131, 131, 162, 162, 193, 193, 224, 224, 224, - 8, 8, 8, 39, 39, 70, 70, 101, 101, 132, 132, 163, 163, 194, 194, - 225, 225, 256, 256, 256, 9, 9, 9, 40, 40, 71, 71, 102, 102, 133, - 133, 164, 164, 195, 195, 226, 226, 257, 257, 288, 288, 288, 10, 10, 10, - 41, 41, 72, 72, 103, 103, 134, 134, 165, 165, 196, 196, 227, 227, 258, - 258, 289, 289, 320, 320, 320, 11, 11, 11, 42, 42, 73, 73, 104, 104, - 135, 135, 166, 166, 197, 197, 228, 228, 259, 259, 290, 290, 321, 321, 352, - 352, 352, 12, 12, 12, 43, 43, 74, 74, 105, 105, 136, 136, 167, 167, - 198, 198, 229, 229, 260, 260, 291, 291, 322, 322, 353, 353, 384, 384, 384, - 13, 13, 13, 44, 44, 75, 75, 106, 106, 137, 137, 168, 168, 199, 199, - 230, 230, 261, 261, 292, 292, 323, 323, 354, 354, 385, 385, 416, 416, 416, - 14, 14, 14, 45, 45, 76, 76, 107, 107, 138, 138, 169, 169, 200, 200, - 231, 231, 262, 262, 293, 293, 324, 324, 355, 355, 386, 386, 417, 417, 448, - 448, 448, 15, 15, 15, 46, 46, 77, 77, 108, 108, 139, 139, 170, 170, - 201, 201, 232, 232, 263, 263, 294, 294, 325, 325, 356, 356, 387, 387, 418, - 418, 449, 449, 480, 16, 16, 16, 47, 47, 78, 78, 109, 109, 140, 140, - 171, 171, 202, 202, 233, 233, 264, 264, 295, 295, 326, 326, 357, 357, 388, - 388, 419, 419, 450, 450, 481, 17, 17, 17, 48, 48, 79, 79, 110, 110, - 141, 141, 172, 172, 203, 203, 234, 234, 265, 265, 296, 296, 327, 327, 358, - 358, 389, 389, 420, 420, 451, 451, 482, 18, 18, 18, 49, 49, 80, 80, - 111, 111, 142, 142, 173, 173, 204, 204, 235, 235, 266, 266, 297, 297, 328, - 328, 359, 359, 390, 390, 421, 421, 452, 452, 483, 19, 19, 19, 50, 50, - 81, 81, 112, 112, 143, 143, 174, 174, 205, 205, 236, 236, 267, 267, 298, - 298, 329, 329, 360, 360, 391, 391, 422, 422, 453, 453, 484, 20, 20, 20, - 51, 51, 82, 82, 113, 113, 144, 144, 175, 175, 206, 206, 237, 237, 268, - 268, 299, 299, 330, 330, 361, 361, 392, 392, 423, 423, 454, 454, 485, 21, - 21, 21, 52, 52, 83, 83, 114, 114, 145, 145, 176, 176, 207, 207, 238, - 238, 269, 269, 300, 300, 331, 331, 362, 362, 393, 393, 424, 424, 455, 455, - 486, 22, 22, 22, 53, 53, 84, 84, 115, 115, 146, 146, 177, 177, 208, - 208, 239, 239, 270, 270, 301, 301, 332, 332, 363, 363, 394, 394, 425, 425, - 456, 456, 487, 23, 23, 23, 54, 54, 85, 85, 116, 116, 147, 147, 178, - 178, 209, 209, 240, 240, 271, 271, 302, 302, 333, 333, 364, 364, 395, 395, - 426, 426, 457, 457, 488, 24, 24, 24, 55, 55, 86, 86, 117, 117, 148, - 148, 179, 179, 210, 210, 241, 241, 272, 272, 303, 303, 334, 334, 365, 365, - 396, 396, 427, 427, 458, 458, 489, 25, 25, 25, 56, 56, 87, 87, 118, - 118, 149, 149, 180, 180, 211, 211, 242, 242, 273, 273, 304, 304, 335, 335, - 366, 366, 397, 397, 428, 428, 459, 459, 490, 26, 26, 26, 57, 57, 88, - 88, 119, 119, 150, 150, 181, 181, 212, 212, 243, 243, 274, 274, 305, 305, - 336, 336, 367, 367, 398, 398, 429, 429, 460, 460, 491, 27, 27, 27, 58, - 58, 89, 89, 120, 120, 151, 151, 182, 182, 213, 213, 244, 244, 275, 275, - 306, 306, 337, 337, 368, 368, 399, 399, 430, 430, 461, 461, 492, 28, 28, - 28, 59, 59, 90, 90, 121, 121, 152, 152, 183, 183, 214, 214, 245, 245, - 276, 276, 307, 307, 338, 338, 369, 369, 400, 400, 431, 431, 462, 462, 493, - 29, 29, 29, 60, 60, 91, 91, 122, 122, 153, 153, 184, 184, 215, 215, - 246, 246, 277, 277, 308, 308, 339, 339, 370, 370, 401, 401, 432, 432, 463, - 463, 494, 30, 30, 30, 61, 61, 92, 92, 123, 123, 154, 154, 185, 185, - 216, 216, 247, 247, 278, 278, 309, 309, 340, 340, 371, 371, 402, 402, 433, - 433, 464, 464, 495, 31, 62, 62, 93, 93, 124, 124, 155, 155, 186, 186, - 217, 217, 248, 248, 279, 279, 310, 310, 341, 341, 372, 372, 403, 403, 434, - 434, 465, 465, 496, 63, 94, 94, 125, 125, 156, 156, 187, 187, 218, 218, - 249, 249, 280, 280, 311, 311, 342, 342, 373, 373, 404, 404, 435, 435, 466, - 466, 497, 95, 126, 126, 157, 157, 188, 188, 219, 219, 250, 250, 281, 281, - 312, 312, 343, 343, 374, 374, 405, 405, 436, 436, 467, 467, 498, 127, 158, - 158, 189, 189, 220, 220, 251, 251, 282, 282, 313, 313, 344, 344, 375, 375, - 406, 406, 437, 437, 468, 468, 499, 159, 190, 190, 221, 221, 252, 252, 283, - 283, 314, 314, 345, 345, 376, 376, 407, 407, 438, 438, 469, 469, 500, 191, - 222, 222, 253, 253, 284, 284, 315, 315, 346, 346, 377, 377, 408, 408, 439, - 439, 470, 470, 501, 223, 254, 254, 285, 285, 316, 316, 347, 347, 378, 378, - 409, 409, 440, 440, 471, 471, 502, 255, 286, 286, 317, 317, 348, 348, 379, - 379, 410, 410, 441, 441, 472, 472, 503, 287, 318, 318, 349, 349, 380, 380, - 411, 411, 442, 442, 473, 473, 504, 319, 350, 350, 381, 381, 412, 412, 443, - 443, 474, 474, 505, 351, 382, 382, 413, 413, 444, 444, 475, 475, 506, 383, - 414, 414, 445, 445, 476, 476, 507, 415, 446, 446, 477, 477, 508, 447, 478, - 478, 509, 479, 510, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mcol_scan_16x32_neighbors[513 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 16, 16, 32, 32, 48, 48, 64, 64, 80, 80, 96, - 96, 112, 112, 128, 128, 144, 144, 160, 160, 176, 176, 192, 192, 208, 208, - 224, 224, 240, 240, 256, 256, 272, 272, 288, 288, 304, 304, 320, 320, 336, - 336, 352, 352, 368, 368, 384, 384, 400, 400, 416, 416, 432, 432, 448, 448, - 464, 464, 480, 480, 0, 0, 1, 16, 17, 32, 33, 48, 49, 64, 65, - 80, 81, 96, 97, 112, 113, 128, 129, 144, 145, 160, 161, 176, 177, 192, - 193, 208, 209, 224, 225, 240, 241, 256, 257, 272, 273, 288, 289, 304, 305, - 320, 321, 336, 337, 352, 353, 368, 369, 384, 385, 400, 401, 416, 417, 432, - 433, 448, 449, 464, 465, 480, 481, 496, 1, 1, 2, 17, 18, 33, 34, - 49, 50, 65, 66, 81, 82, 97, 98, 113, 114, 129, 130, 145, 146, 161, - 162, 177, 178, 193, 194, 209, 210, 225, 226, 241, 242, 257, 258, 273, 274, - 289, 290, 305, 306, 321, 322, 337, 338, 353, 354, 369, 370, 385, 386, 401, - 402, 417, 418, 433, 434, 449, 450, 465, 466, 481, 482, 497, 2, 2, 3, - 18, 19, 34, 35, 50, 51, 66, 67, 82, 83, 98, 99, 114, 115, 130, - 131, 146, 147, 162, 163, 178, 179, 194, 195, 210, 211, 226, 227, 242, 243, - 258, 259, 274, 275, 290, 291, 306, 307, 322, 323, 338, 339, 354, 355, 370, - 371, 386, 387, 402, 403, 418, 419, 434, 435, 450, 451, 466, 467, 482, 483, - 498, 3, 3, 4, 19, 20, 35, 36, 51, 52, 67, 68, 83, 84, 99, - 100, 115, 116, 131, 132, 147, 148, 163, 164, 179, 180, 195, 196, 211, 212, - 227, 228, 243, 244, 259, 260, 275, 276, 291, 292, 307, 308, 323, 324, 339, - 340, 355, 356, 371, 372, 387, 388, 403, 404, 419, 420, 435, 436, 451, 452, - 467, 468, 483, 484, 499, 4, 4, 5, 20, 21, 36, 37, 52, 53, 68, - 69, 84, 85, 100, 101, 116, 117, 132, 133, 148, 149, 164, 165, 180, 181, - 196, 197, 212, 213, 228, 229, 244, 245, 260, 261, 276, 277, 292, 293, 308, - 309, 324, 325, 340, 341, 356, 357, 372, 373, 388, 389, 404, 405, 420, 421, - 436, 437, 452, 453, 468, 469, 484, 485, 500, 5, 5, 6, 21, 22, 37, - 38, 53, 54, 69, 70, 85, 86, 101, 102, 117, 118, 133, 134, 149, 150, - 165, 166, 181, 182, 197, 198, 213, 214, 229, 230, 245, 246, 261, 262, 277, - 278, 293, 294, 309, 310, 325, 326, 341, 342, 357, 358, 373, 374, 389, 390, - 405, 406, 421, 422, 437, 438, 453, 454, 469, 470, 485, 486, 501, 6, 6, - 7, 22, 23, 38, 39, 54, 55, 70, 71, 86, 87, 102, 103, 118, 119, - 134, 135, 150, 151, 166, 167, 182, 183, 198, 199, 214, 215, 230, 231, 246, - 247, 262, 263, 278, 279, 294, 295, 310, 311, 326, 327, 342, 343, 358, 359, - 374, 375, 390, 391, 406, 407, 422, 423, 438, 439, 454, 455, 470, 471, 486, - 487, 502, 7, 7, 8, 23, 24, 39, 40, 55, 56, 71, 72, 87, 88, - 103, 104, 119, 120, 135, 136, 151, 152, 167, 168, 183, 184, 199, 200, 215, - 216, 231, 232, 247, 248, 263, 264, 279, 280, 295, 296, 311, 312, 327, 328, - 343, 344, 359, 360, 375, 376, 391, 392, 407, 408, 423, 424, 439, 440, 455, - 456, 471, 472, 487, 488, 503, 8, 8, 9, 24, 25, 40, 41, 56, 57, - 72, 73, 88, 89, 104, 105, 120, 121, 136, 137, 152, 153, 168, 169, 184, - 185, 200, 201, 216, 217, 232, 233, 248, 249, 264, 265, 280, 281, 296, 297, - 312, 313, 328, 329, 344, 345, 360, 361, 376, 377, 392, 393, 408, 409, 424, - 425, 440, 441, 456, 457, 472, 473, 488, 489, 504, 9, 9, 10, 25, 26, - 41, 42, 57, 58, 73, 74, 89, 90, 105, 106, 121, 122, 137, 138, 153, - 154, 169, 170, 185, 186, 201, 202, 217, 218, 233, 234, 249, 250, 265, 266, - 281, 282, 297, 298, 313, 314, 329, 330, 345, 346, 361, 362, 377, 378, 393, - 394, 409, 410, 425, 426, 441, 442, 457, 458, 473, 474, 489, 490, 505, 10, - 10, 11, 26, 27, 42, 43, 58, 59, 74, 75, 90, 91, 106, 107, 122, - 123, 138, 139, 154, 155, 170, 171, 186, 187, 202, 203, 218, 219, 234, 235, - 250, 251, 266, 267, 282, 283, 298, 299, 314, 315, 330, 331, 346, 347, 362, - 363, 378, 379, 394, 395, 410, 411, 426, 427, 442, 443, 458, 459, 474, 475, - 490, 491, 506, 11, 11, 12, 27, 28, 43, 44, 59, 60, 75, 76, 91, - 92, 107, 108, 123, 124, 139, 140, 155, 156, 171, 172, 187, 188, 203, 204, - 219, 220, 235, 236, 251, 252, 267, 268, 283, 284, 299, 300, 315, 316, 331, - 332, 347, 348, 363, 364, 379, 380, 395, 396, 411, 412, 427, 428, 443, 444, - 459, 460, 475, 476, 491, 492, 507, 12, 12, 13, 28, 29, 44, 45, 60, - 61, 76, 77, 92, 93, 108, 109, 124, 125, 140, 141, 156, 157, 172, 173, - 188, 189, 204, 205, 220, 221, 236, 237, 252, 253, 268, 269, 284, 285, 300, - 301, 316, 317, 332, 333, 348, 349, 364, 365, 380, 381, 396, 397, 412, 413, - 428, 429, 444, 445, 460, 461, 476, 477, 492, 493, 508, 13, 13, 14, 29, - 30, 45, 46, 61, 62, 77, 78, 93, 94, 109, 110, 125, 126, 141, 142, - 157, 158, 173, 174, 189, 190, 205, 206, 221, 222, 237, 238, 253, 254, 269, - 270, 285, 286, 301, 302, 317, 318, 333, 334, 349, 350, 365, 366, 381, 382, - 397, 398, 413, 414, 429, 430, 445, 446, 461, 462, 477, 478, 493, 494, 509, - 14, 14, 15, 30, 31, 46, 47, 62, 63, 78, 79, 94, 95, 110, 111, - 126, 127, 142, 143, 158, 159, 174, 175, 190, 191, 206, 207, 222, 223, 238, - 239, 254, 255, 270, 271, 286, 287, 302, 303, 318, 319, 334, 335, 350, 351, - 366, 367, 382, 383, 398, 399, 414, 415, 430, 431, 446, 447, 462, 463, 478, - 479, 494, 495, 510, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mcol_scan_32x16_neighbors[513 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 32, 32, 64, 64, 96, 96, 128, 128, 160, 160, 192, - 192, 224, 224, 256, 256, 288, 288, 320, 320, 352, 352, 384, 384, 416, 416, - 448, 448, 0, 0, 1, 32, 33, 64, 65, 96, 97, 128, 129, 160, 161, - 192, 193, 224, 225, 256, 257, 288, 289, 320, 321, 352, 353, 384, 385, 416, - 417, 448, 449, 480, 1, 1, 2, 33, 34, 65, 66, 97, 98, 129, 130, - 161, 162, 193, 194, 225, 226, 257, 258, 289, 290, 321, 322, 353, 354, 385, - 386, 417, 418, 449, 450, 481, 2, 2, 3, 34, 35, 66, 67, 98, 99, - 130, 131, 162, 163, 194, 195, 226, 227, 258, 259, 290, 291, 322, 323, 354, - 355, 386, 387, 418, 419, 450, 451, 482, 3, 3, 4, 35, 36, 67, 68, - 99, 100, 131, 132, 163, 164, 195, 196, 227, 228, 259, 260, 291, 292, 323, - 324, 355, 356, 387, 388, 419, 420, 451, 452, 483, 4, 4, 5, 36, 37, - 68, 69, 100, 101, 132, 133, 164, 165, 196, 197, 228, 229, 260, 261, 292, - 293, 324, 325, 356, 357, 388, 389, 420, 421, 452, 453, 484, 5, 5, 6, - 37, 38, 69, 70, 101, 102, 133, 134, 165, 166, 197, 198, 229, 230, 261, - 262, 293, 294, 325, 326, 357, 358, 389, 390, 421, 422, 453, 454, 485, 6, - 6, 7, 38, 39, 70, 71, 102, 103, 134, 135, 166, 167, 198, 199, 230, - 231, 262, 263, 294, 295, 326, 327, 358, 359, 390, 391, 422, 423, 454, 455, - 486, 7, 7, 8, 39, 40, 71, 72, 103, 104, 135, 136, 167, 168, 199, - 200, 231, 232, 263, 264, 295, 296, 327, 328, 359, 360, 391, 392, 423, 424, - 455, 456, 487, 8, 8, 9, 40, 41, 72, 73, 104, 105, 136, 137, 168, - 169, 200, 201, 232, 233, 264, 265, 296, 297, 328, 329, 360, 361, 392, 393, - 424, 425, 456, 457, 488, 9, 9, 10, 41, 42, 73, 74, 105, 106, 137, - 138, 169, 170, 201, 202, 233, 234, 265, 266, 297, 298, 329, 330, 361, 362, - 393, 394, 425, 426, 457, 458, 489, 10, 10, 11, 42, 43, 74, 75, 106, - 107, 138, 139, 170, 171, 202, 203, 234, 235, 266, 267, 298, 299, 330, 331, - 362, 363, 394, 395, 426, 427, 458, 459, 490, 11, 11, 12, 43, 44, 75, - 76, 107, 108, 139, 140, 171, 172, 203, 204, 235, 236, 267, 268, 299, 300, - 331, 332, 363, 364, 395, 396, 427, 428, 459, 460, 491, 12, 12, 13, 44, - 45, 76, 77, 108, 109, 140, 141, 172, 173, 204, 205, 236, 237, 268, 269, - 300, 301, 332, 333, 364, 365, 396, 397, 428, 429, 460, 461, 492, 13, 13, - 14, 45, 46, 77, 78, 109, 110, 141, 142, 173, 174, 205, 206, 237, 238, - 269, 270, 301, 302, 333, 334, 365, 366, 397, 398, 429, 430, 461, 462, 493, - 14, 14, 15, 46, 47, 78, 79, 110, 111, 142, 143, 174, 175, 206, 207, - 238, 239, 270, 271, 302, 303, 334, 335, 366, 367, 398, 399, 430, 431, 462, - 463, 494, 15, 15, 16, 47, 48, 79, 80, 111, 112, 143, 144, 175, 176, - 207, 208, 239, 240, 271, 272, 303, 304, 335, 336, 367, 368, 399, 400, 431, - 432, 463, 464, 495, 16, 16, 17, 48, 49, 80, 81, 112, 113, 144, 145, - 176, 177, 208, 209, 240, 241, 272, 273, 304, 305, 336, 337, 368, 369, 400, - 401, 432, 433, 464, 465, 496, 17, 17, 18, 49, 50, 81, 82, 113, 114, - 145, 146, 177, 178, 209, 210, 241, 242, 273, 274, 305, 306, 337, 338, 369, - 370, 401, 402, 433, 434, 465, 466, 497, 18, 18, 19, 50, 51, 82, 83, - 114, 115, 146, 147, 178, 179, 210, 211, 242, 243, 274, 275, 306, 307, 338, - 339, 370, 371, 402, 403, 434, 435, 466, 467, 498, 19, 19, 20, 51, 52, - 83, 84, 115, 116, 147, 148, 179, 180, 211, 212, 243, 244, 275, 276, 307, - 308, 339, 340, 371, 372, 403, 404, 435, 436, 467, 468, 499, 20, 20, 21, - 52, 53, 84, 85, 116, 117, 148, 149, 180, 181, 212, 213, 244, 245, 276, - 277, 308, 309, 340, 341, 372, 373, 404, 405, 436, 437, 468, 469, 500, 21, - 21, 22, 53, 54, 85, 86, 117, 118, 149, 150, 181, 182, 213, 214, 245, - 246, 277, 278, 309, 310, 341, 342, 373, 374, 405, 406, 437, 438, 469, 470, - 501, 22, 22, 23, 54, 55, 86, 87, 118, 119, 150, 151, 182, 183, 214, - 215, 246, 247, 278, 279, 310, 311, 342, 343, 374, 375, 406, 407, 438, 439, - 470, 471, 502, 23, 23, 24, 55, 56, 87, 88, 119, 120, 151, 152, 183, - 184, 215, 216, 247, 248, 279, 280, 311, 312, 343, 344, 375, 376, 407, 408, - 439, 440, 471, 472, 503, 24, 24, 25, 56, 57, 88, 89, 120, 121, 152, - 153, 184, 185, 216, 217, 248, 249, 280, 281, 312, 313, 344, 345, 376, 377, - 408, 409, 440, 441, 472, 473, 504, 25, 25, 26, 57, 58, 89, 90, 121, - 122, 153, 154, 185, 186, 217, 218, 249, 250, 281, 282, 313, 314, 345, 346, - 377, 378, 409, 410, 441, 442, 473, 474, 505, 26, 26, 27, 58, 59, 90, - 91, 122, 123, 154, 155, 186, 187, 218, 219, 250, 251, 282, 283, 314, 315, - 346, 347, 378, 379, 410, 411, 442, 443, 474, 475, 506, 27, 27, 28, 59, - 60, 91, 92, 123, 124, 155, 156, 187, 188, 219, 220, 251, 252, 283, 284, - 315, 316, 347, 348, 379, 380, 411, 412, 443, 444, 475, 476, 507, 28, 28, - 29, 60, 61, 92, 93, 124, 125, 156, 157, 188, 189, 220, 221, 252, 253, - 284, 285, 316, 317, 348, 349, 380, 381, 412, 413, 444, 445, 476, 477, 508, - 29, 29, 30, 61, 62, 93, 94, 125, 126, 157, 158, 189, 190, 221, 222, - 253, 254, 285, 286, 317, 318, 349, 350, 381, 382, 413, 414, 445, 446, 477, - 478, 509, 30, 30, 31, 62, 63, 94, 95, 126, 127, 158, 159, 190, 191, - 222, 223, 254, 255, 286, 287, 318, 319, 350, 351, 382, 383, 414, 415, 446, - 447, 478, 479, 510, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mrow_scan_16x32_neighbors[513 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, - 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, - 14, 14, 0, 0, 1, 16, 2, 17, 3, 18, 4, 19, 5, 20, 6, - 21, 7, 22, 8, 23, 9, 24, 10, 25, 11, 26, 12, 27, 13, 28, - 14, 29, 15, 30, 16, 16, 17, 32, 18, 33, 19, 34, 20, 35, 21, - 36, 22, 37, 23, 38, 24, 39, 25, 40, 26, 41, 27, 42, 28, 43, - 29, 44, 30, 45, 31, 46, 32, 32, 33, 48, 34, 49, 35, 50, 36, - 51, 37, 52, 38, 53, 39, 54, 40, 55, 41, 56, 42, 57, 43, 58, - 44, 59, 45, 60, 46, 61, 47, 62, 48, 48, 49, 64, 50, 65, 51, - 66, 52, 67, 53, 68, 54, 69, 55, 70, 56, 71, 57, 72, 58, 73, - 59, 74, 60, 75, 61, 76, 62, 77, 63, 78, 64, 64, 65, 80, 66, - 81, 67, 82, 68, 83, 69, 84, 70, 85, 71, 86, 72, 87, 73, 88, - 74, 89, 75, 90, 76, 91, 77, 92, 78, 93, 79, 94, 80, 80, 81, - 96, 82, 97, 83, 98, 84, 99, 85, 100, 86, 101, 87, 102, 88, 103, - 89, 104, 90, 105, 91, 106, 92, 107, 93, 108, 94, 109, 95, 110, 96, - 96, 97, 112, 98, 113, 99, 114, 100, 115, 101, 116, 102, 117, 103, 118, - 104, 119, 105, 120, 106, 121, 107, 122, 108, 123, 109, 124, 110, 125, 111, - 126, 112, 112, 113, 128, 114, 129, 115, 130, 116, 131, 117, 132, 118, 133, - 119, 134, 120, 135, 121, 136, 122, 137, 123, 138, 124, 139, 125, 140, 126, - 141, 127, 142, 128, 128, 129, 144, 130, 145, 131, 146, 132, 147, 133, 148, - 134, 149, 135, 150, 136, 151, 137, 152, 138, 153, 139, 154, 140, 155, 141, - 156, 142, 157, 143, 158, 144, 144, 145, 160, 146, 161, 147, 162, 148, 163, - 149, 164, 150, 165, 151, 166, 152, 167, 153, 168, 154, 169, 155, 170, 156, - 171, 157, 172, 158, 173, 159, 174, 160, 160, 161, 176, 162, 177, 163, 178, - 164, 179, 165, 180, 166, 181, 167, 182, 168, 183, 169, 184, 170, 185, 171, - 186, 172, 187, 173, 188, 174, 189, 175, 190, 176, 176, 177, 192, 178, 193, - 179, 194, 180, 195, 181, 196, 182, 197, 183, 198, 184, 199, 185, 200, 186, - 201, 187, 202, 188, 203, 189, 204, 190, 205, 191, 206, 192, 192, 193, 208, - 194, 209, 195, 210, 196, 211, 197, 212, 198, 213, 199, 214, 200, 215, 201, - 216, 202, 217, 203, 218, 204, 219, 205, 220, 206, 221, 207, 222, 208, 208, - 209, 224, 210, 225, 211, 226, 212, 227, 213, 228, 214, 229, 215, 230, 216, - 231, 217, 232, 218, 233, 219, 234, 220, 235, 221, 236, 222, 237, 223, 238, - 224, 224, 225, 240, 226, 241, 227, 242, 228, 243, 229, 244, 230, 245, 231, - 246, 232, 247, 233, 248, 234, 249, 235, 250, 236, 251, 237, 252, 238, 253, - 239, 254, 240, 240, 241, 256, 242, 257, 243, 258, 244, 259, 245, 260, 246, - 261, 247, 262, 248, 263, 249, 264, 250, 265, 251, 266, 252, 267, 253, 268, - 254, 269, 255, 270, 256, 256, 257, 272, 258, 273, 259, 274, 260, 275, 261, - 276, 262, 277, 263, 278, 264, 279, 265, 280, 266, 281, 267, 282, 268, 283, - 269, 284, 270, 285, 271, 286, 272, 272, 273, 288, 274, 289, 275, 290, 276, - 291, 277, 292, 278, 293, 279, 294, 280, 295, 281, 296, 282, 297, 283, 298, - 284, 299, 285, 300, 286, 301, 287, 302, 288, 288, 289, 304, 290, 305, 291, - 306, 292, 307, 293, 308, 294, 309, 295, 310, 296, 311, 297, 312, 298, 313, - 299, 314, 300, 315, 301, 316, 302, 317, 303, 318, 304, 304, 305, 320, 306, - 321, 307, 322, 308, 323, 309, 324, 310, 325, 311, 326, 312, 327, 313, 328, - 314, 329, 315, 330, 316, 331, 317, 332, 318, 333, 319, 334, 320, 320, 321, - 336, 322, 337, 323, 338, 324, 339, 325, 340, 326, 341, 327, 342, 328, 343, - 329, 344, 330, 345, 331, 346, 332, 347, 333, 348, 334, 349, 335, 350, 336, - 336, 337, 352, 338, 353, 339, 354, 340, 355, 341, 356, 342, 357, 343, 358, - 344, 359, 345, 360, 346, 361, 347, 362, 348, 363, 349, 364, 350, 365, 351, - 366, 352, 352, 353, 368, 354, 369, 355, 370, 356, 371, 357, 372, 358, 373, - 359, 374, 360, 375, 361, 376, 362, 377, 363, 378, 364, 379, 365, 380, 366, - 381, 367, 382, 368, 368, 369, 384, 370, 385, 371, 386, 372, 387, 373, 388, - 374, 389, 375, 390, 376, 391, 377, 392, 378, 393, 379, 394, 380, 395, 381, - 396, 382, 397, 383, 398, 384, 384, 385, 400, 386, 401, 387, 402, 388, 403, - 389, 404, 390, 405, 391, 406, 392, 407, 393, 408, 394, 409, 395, 410, 396, - 411, 397, 412, 398, 413, 399, 414, 400, 400, 401, 416, 402, 417, 403, 418, - 404, 419, 405, 420, 406, 421, 407, 422, 408, 423, 409, 424, 410, 425, 411, - 426, 412, 427, 413, 428, 414, 429, 415, 430, 416, 416, 417, 432, 418, 433, - 419, 434, 420, 435, 421, 436, 422, 437, 423, 438, 424, 439, 425, 440, 426, - 441, 427, 442, 428, 443, 429, 444, 430, 445, 431, 446, 432, 432, 433, 448, - 434, 449, 435, 450, 436, 451, 437, 452, 438, 453, 439, 454, 440, 455, 441, - 456, 442, 457, 443, 458, 444, 459, 445, 460, 446, 461, 447, 462, 448, 448, - 449, 464, 450, 465, 451, 466, 452, 467, 453, 468, 454, 469, 455, 470, 456, - 471, 457, 472, 458, 473, 459, 474, 460, 475, 461, 476, 462, 477, 463, 478, - 464, 464, 465, 480, 466, 481, 467, 482, 468, 483, 469, 484, 470, 485, 471, - 486, 472, 487, 473, 488, 474, 489, 475, 490, 476, 491, 477, 492, 478, 493, - 479, 494, 480, 480, 481, 496, 482, 497, 483, 498, 484, 499, 485, 500, 486, - 501, 487, 502, 488, 503, 489, 504, 490, 505, 491, 506, 492, 507, 493, 508, - 494, 509, 495, 510, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mrow_scan_32x16_neighbors[513 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, - 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, - 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, - 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, - 29, 29, 30, 30, 0, 0, 1, 32, 2, 33, 3, 34, 4, 35, 5, - 36, 6, 37, 7, 38, 8, 39, 9, 40, 10, 41, 11, 42, 12, 43, - 13, 44, 14, 45, 15, 46, 16, 47, 17, 48, 18, 49, 19, 50, 20, - 51, 21, 52, 22, 53, 23, 54, 24, 55, 25, 56, 26, 57, 27, 58, - 28, 59, 29, 60, 30, 61, 31, 62, 32, 32, 33, 64, 34, 65, 35, - 66, 36, 67, 37, 68, 38, 69, 39, 70, 40, 71, 41, 72, 42, 73, - 43, 74, 44, 75, 45, 76, 46, 77, 47, 78, 48, 79, 49, 80, 50, - 81, 51, 82, 52, 83, 53, 84, 54, 85, 55, 86, 56, 87, 57, 88, - 58, 89, 59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 64, 64, 65, - 96, 66, 97, 67, 98, 68, 99, 69, 100, 70, 101, 71, 102, 72, 103, - 73, 104, 74, 105, 75, 106, 76, 107, 77, 108, 78, 109, 79, 110, 80, - 111, 81, 112, 82, 113, 83, 114, 84, 115, 85, 116, 86, 117, 87, 118, - 88, 119, 89, 120, 90, 121, 91, 122, 92, 123, 93, 124, 94, 125, 95, - 126, 96, 96, 97, 128, 98, 129, 99, 130, 100, 131, 101, 132, 102, 133, - 103, 134, 104, 135, 105, 136, 106, 137, 107, 138, 108, 139, 109, 140, 110, - 141, 111, 142, 112, 143, 113, 144, 114, 145, 115, 146, 116, 147, 117, 148, - 118, 149, 119, 150, 120, 151, 121, 152, 122, 153, 123, 154, 124, 155, 125, - 156, 126, 157, 127, 158, 128, 128, 129, 160, 130, 161, 131, 162, 132, 163, - 133, 164, 134, 165, 135, 166, 136, 167, 137, 168, 138, 169, 139, 170, 140, - 171, 141, 172, 142, 173, 143, 174, 144, 175, 145, 176, 146, 177, 147, 178, - 148, 179, 149, 180, 150, 181, 151, 182, 152, 183, 153, 184, 154, 185, 155, - 186, 156, 187, 157, 188, 158, 189, 159, 190, 160, 160, 161, 192, 162, 193, - 163, 194, 164, 195, 165, 196, 166, 197, 167, 198, 168, 199, 169, 200, 170, - 201, 171, 202, 172, 203, 173, 204, 174, 205, 175, 206, 176, 207, 177, 208, - 178, 209, 179, 210, 180, 211, 181, 212, 182, 213, 183, 214, 184, 215, 185, - 216, 186, 217, 187, 218, 188, 219, 189, 220, 190, 221, 191, 222, 192, 192, - 193, 224, 194, 225, 195, 226, 196, 227, 197, 228, 198, 229, 199, 230, 200, - 231, 201, 232, 202, 233, 203, 234, 204, 235, 205, 236, 206, 237, 207, 238, - 208, 239, 209, 240, 210, 241, 211, 242, 212, 243, 213, 244, 214, 245, 215, - 246, 216, 247, 217, 248, 218, 249, 219, 250, 220, 251, 221, 252, 222, 253, - 223, 254, 224, 224, 225, 256, 226, 257, 227, 258, 228, 259, 229, 260, 230, - 261, 231, 262, 232, 263, 233, 264, 234, 265, 235, 266, 236, 267, 237, 268, - 238, 269, 239, 270, 240, 271, 241, 272, 242, 273, 243, 274, 244, 275, 245, - 276, 246, 277, 247, 278, 248, 279, 249, 280, 250, 281, 251, 282, 252, 283, - 253, 284, 254, 285, 255, 286, 256, 256, 257, 288, 258, 289, 259, 290, 260, - 291, 261, 292, 262, 293, 263, 294, 264, 295, 265, 296, 266, 297, 267, 298, - 268, 299, 269, 300, 270, 301, 271, 302, 272, 303, 273, 304, 274, 305, 275, - 306, 276, 307, 277, 308, 278, 309, 279, 310, 280, 311, 281, 312, 282, 313, - 283, 314, 284, 315, 285, 316, 286, 317, 287, 318, 288, 288, 289, 320, 290, - 321, 291, 322, 292, 323, 293, 324, 294, 325, 295, 326, 296, 327, 297, 328, - 298, 329, 299, 330, 300, 331, 301, 332, 302, 333, 303, 334, 304, 335, 305, - 336, 306, 337, 307, 338, 308, 339, 309, 340, 310, 341, 311, 342, 312, 343, - 313, 344, 314, 345, 315, 346, 316, 347, 317, 348, 318, 349, 319, 350, 320, - 320, 321, 352, 322, 353, 323, 354, 324, 355, 325, 356, 326, 357, 327, 358, - 328, 359, 329, 360, 330, 361, 331, 362, 332, 363, 333, 364, 334, 365, 335, - 366, 336, 367, 337, 368, 338, 369, 339, 370, 340, 371, 341, 372, 342, 373, - 343, 374, 344, 375, 345, 376, 346, 377, 347, 378, 348, 379, 349, 380, 350, - 381, 351, 382, 352, 352, 353, 384, 354, 385, 355, 386, 356, 387, 357, 388, - 358, 389, 359, 390, 360, 391, 361, 392, 362, 393, 363, 394, 364, 395, 365, - 396, 366, 397, 367, 398, 368, 399, 369, 400, 370, 401, 371, 402, 372, 403, - 373, 404, 374, 405, 375, 406, 376, 407, 377, 408, 378, 409, 379, 410, 380, - 411, 381, 412, 382, 413, 383, 414, 384, 384, 385, 416, 386, 417, 387, 418, - 388, 419, 389, 420, 390, 421, 391, 422, 392, 423, 393, 424, 394, 425, 395, - 426, 396, 427, 397, 428, 398, 429, 399, 430, 400, 431, 401, 432, 402, 433, - 403, 434, 404, 435, 405, 436, 406, 437, 407, 438, 408, 439, 409, 440, 410, - 441, 411, 442, 412, 443, 413, 444, 414, 445, 415, 446, 416, 416, 417, 448, - 418, 449, 419, 450, 420, 451, 421, 452, 422, 453, 423, 454, 424, 455, 425, - 456, 426, 457, 427, 458, 428, 459, 429, 460, 430, 461, 431, 462, 432, 463, - 433, 464, 434, 465, 435, 466, 436, 467, 437, 468, 438, 469, 439, 470, 440, - 471, 441, 472, 442, 473, 443, 474, 444, 475, 445, 476, 446, 477, 447, 478, - 448, 448, 449, 480, 450, 481, 451, 482, 452, 483, 453, 484, 454, 485, 455, - 486, 456, 487, 457, 488, 458, 489, 459, 490, 460, 491, 461, 492, 462, 493, - 463, 494, 464, 495, 465, 496, 466, 497, 467, 498, 468, 499, 469, 500, 470, - 501, 471, 502, 472, 503, 473, 504, 474, 505, 475, 506, 476, 507, 477, 508, - 478, 509, 479, 510, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mcol_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 16, 16, 32, 32, 48, 48, 64, 64, 80, 80, 96, - 96, 112, 112, 128, 128, 144, 144, 160, 160, 176, 176, 192, 192, 208, 208, - 224, 224, 0, 0, 1, 16, 17, 32, 33, 48, 49, 64, 65, 80, 81, - 96, 97, 112, 113, 128, 129, 144, 145, 160, 161, 176, 177, 192, 193, 208, - 209, 224, 225, 240, 1, 1, 2, 17, 18, 33, 34, 49, 50, 65, 66, - 81, 82, 97, 98, 113, 114, 129, 130, 145, 146, 161, 162, 177, 178, 193, - 194, 209, 210, 225, 226, 241, 2, 2, 3, 18, 19, 34, 35, 50, 51, - 66, 67, 82, 83, 98, 99, 114, 115, 130, 131, 146, 147, 162, 163, 178, - 179, 194, 195, 210, 211, 226, 227, 242, 3, 3, 4, 19, 20, 35, 36, - 51, 52, 67, 68, 83, 84, 99, 100, 115, 116, 131, 132, 147, 148, 163, - 164, 179, 180, 195, 196, 211, 212, 227, 228, 243, 4, 4, 5, 20, 21, - 36, 37, 52, 53, 68, 69, 84, 85, 100, 101, 116, 117, 132, 133, 148, - 149, 164, 165, 180, 181, 196, 197, 212, 213, 228, 229, 244, 5, 5, 6, - 21, 22, 37, 38, 53, 54, 69, 70, 85, 86, 101, 102, 117, 118, 133, - 134, 149, 150, 165, 166, 181, 182, 197, 198, 213, 214, 229, 230, 245, 6, - 6, 7, 22, 23, 38, 39, 54, 55, 70, 71, 86, 87, 102, 103, 118, - 119, 134, 135, 150, 151, 166, 167, 182, 183, 198, 199, 214, 215, 230, 231, - 246, 7, 7, 8, 23, 24, 39, 40, 55, 56, 71, 72, 87, 88, 103, - 104, 119, 120, 135, 136, 151, 152, 167, 168, 183, 184, 199, 200, 215, 216, - 231, 232, 247, 8, 8, 9, 24, 25, 40, 41, 56, 57, 72, 73, 88, - 89, 104, 105, 120, 121, 136, 137, 152, 153, 168, 169, 184, 185, 200, 201, - 216, 217, 232, 233, 248, 9, 9, 10, 25, 26, 41, 42, 57, 58, 73, - 74, 89, 90, 105, 106, 121, 122, 137, 138, 153, 154, 169, 170, 185, 186, - 201, 202, 217, 218, 233, 234, 249, 10, 10, 11, 26, 27, 42, 43, 58, - 59, 74, 75, 90, 91, 106, 107, 122, 123, 138, 139, 154, 155, 170, 171, - 186, 187, 202, 203, 218, 219, 234, 235, 250, 11, 11, 12, 27, 28, 43, - 44, 59, 60, 75, 76, 91, 92, 107, 108, 123, 124, 139, 140, 155, 156, - 171, 172, 187, 188, 203, 204, 219, 220, 235, 236, 251, 12, 12, 13, 28, - 29, 44, 45, 60, 61, 76, 77, 92, 93, 108, 109, 124, 125, 140, 141, - 156, 157, 172, 173, 188, 189, 204, 205, 220, 221, 236, 237, 252, 13, 13, - 14, 29, 30, 45, 46, 61, 62, 77, 78, 93, 94, 109, 110, 125, 126, - 141, 142, 157, 158, 173, 174, 189, 190, 205, 206, 221, 222, 237, 238, 253, - 14, 14, 15, 30, 31, 46, 47, 62, 63, 78, 79, 94, 95, 110, 111, - 126, 127, 142, 143, 158, 159, 174, 175, 190, 191, 206, 207, 222, 223, 238, - 239, 254, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - mrow_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, - 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, - 14, 14, 0, 0, 1, 16, 2, 17, 3, 18, 4, 19, 5, 20, 6, - 21, 7, 22, 8, 23, 9, 24, 10, 25, 11, 26, 12, 27, 13, 28, - 14, 29, 15, 30, 16, 16, 17, 32, 18, 33, 19, 34, 20, 35, 21, - 36, 22, 37, 23, 38, 24, 39, 25, 40, 26, 41, 27, 42, 28, 43, - 29, 44, 30, 45, 31, 46, 32, 32, 33, 48, 34, 49, 35, 50, 36, - 51, 37, 52, 38, 53, 39, 54, 40, 55, 41, 56, 42, 57, 43, 58, - 44, 59, 45, 60, 46, 61, 47, 62, 48, 48, 49, 64, 50, 65, 51, - 66, 52, 67, 53, 68, 54, 69, 55, 70, 56, 71, 57, 72, 58, 73, - 59, 74, 60, 75, 61, 76, 62, 77, 63, 78, 64, 64, 65, 80, 66, - 81, 67, 82, 68, 83, 69, 84, 70, 85, 71, 86, 72, 87, 73, 88, - 74, 89, 75, 90, 76, 91, 77, 92, 78, 93, 79, 94, 80, 80, 81, - 96, 82, 97, 83, 98, 84, 99, 85, 100, 86, 101, 87, 102, 88, 103, - 89, 104, 90, 105, 91, 106, 92, 107, 93, 108, 94, 109, 95, 110, 96, - 96, 97, 112, 98, 113, 99, 114, 100, 115, 101, 116, 102, 117, 103, 118, - 104, 119, 105, 120, 106, 121, 107, 122, 108, 123, 109, 124, 110, 125, 111, - 126, 112, 112, 113, 128, 114, 129, 115, 130, 116, 131, 117, 132, 118, 133, - 119, 134, 120, 135, 121, 136, 122, 137, 123, 138, 124, 139, 125, 140, 126, - 141, 127, 142, 128, 128, 129, 144, 130, 145, 131, 146, 132, 147, 133, 148, - 134, 149, 135, 150, 136, 151, 137, 152, 138, 153, 139, 154, 140, 155, 141, - 156, 142, 157, 143, 158, 144, 144, 145, 160, 146, 161, 147, 162, 148, 163, - 149, 164, 150, 165, 151, 166, 152, 167, 153, 168, 154, 169, 155, 170, 156, - 171, 157, 172, 158, 173, 159, 174, 160, 160, 161, 176, 162, 177, 163, 178, - 164, 179, 165, 180, 166, 181, 167, 182, 168, 183, 169, 184, 170, 185, 171, - 186, 172, 187, 173, 188, 174, 189, 175, 190, 176, 176, 177, 192, 178, 193, - 179, 194, 180, 195, 181, 196, 182, 197, 183, 198, 184, 199, 185, 200, 186, - 201, 187, 202, 188, 203, 189, 204, 190, 205, 191, 206, 192, 192, 193, 208, - 194, 209, 195, 210, 196, 211, 197, 212, 198, 213, 199, 214, 200, 215, 201, - 216, 202, 217, 203, 218, 204, 219, 205, 220, 206, 221, 207, 222, 208, 208, - 209, 224, 210, 225, 211, 226, 212, 227, 213, 228, 214, 229, 215, 230, 216, - 231, 217, 232, 218, 233, 219, 234, 220, 235, 221, 236, 222, 237, 223, 238, - 224, 224, 225, 240, 226, 241, 227, 242, 228, 243, 229, 244, 230, 245, 231, - 246, 232, 247, 233, 248, 234, 249, 235, 250, 236, 251, 237, 252, 238, 253, - 239, 254, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 16, 16, 1, 16, 1, 1, 2, 2, 2, - 17, 17, 32, 32, 32, 48, 48, 33, 48, 18, 33, 3, 18, 3, 3, - 4, 4, 4, 19, 19, 34, 34, 49, 49, 64, 64, 64, 80, 80, 65, - 80, 50, 65, 35, 50, 20, 35, 5, 20, 5, 5, 6, 6, 6, 21, - 21, 36, 36, 51, 51, 66, 66, 81, 81, 96, 96, 96, 112, 112, 97, - 112, 82, 97, 67, 82, 52, 67, 37, 52, 22, 37, 7, 22, 7, 7, - 8, 8, 8, 23, 23, 38, 38, 53, 53, 68, 68, 83, 83, 98, 98, - 113, 113, 128, 128, 128, 144, 144, 129, 144, 114, 129, 99, 114, 84, 99, - 69, 84, 54, 69, 39, 54, 24, 39, 9, 24, 9, 9, 10, 10, 10, - 25, 25, 40, 40, 55, 55, 70, 70, 85, 85, 100, 100, 115, 115, 130, - 130, 145, 145, 160, 160, 160, 176, 176, 161, 176, 146, 161, 131, 146, 116, - 131, 101, 116, 86, 101, 71, 86, 56, 71, 41, 56, 26, 41, 11, 26, - 11, 11, 12, 12, 12, 27, 27, 42, 42, 57, 57, 72, 72, 87, 87, - 102, 102, 117, 117, 132, 132, 147, 147, 162, 162, 177, 177, 192, 192, 192, - 208, 208, 193, 208, 178, 193, 163, 178, 148, 163, 133, 148, 118, 133, 103, - 118, 88, 103, 73, 88, 58, 73, 43, 58, 28, 43, 13, 28, 13, 13, - 14, 14, 14, 29, 29, 44, 44, 59, 59, 74, 74, 89, 89, 104, 104, - 119, 119, 134, 134, 149, 149, 164, 164, 179, 179, 194, 194, 209, 209, 224, - 224, 224, 225, 240, 210, 225, 195, 210, 180, 195, 165, 180, 150, 165, 135, - 150, 120, 135, 105, 120, 90, 105, 75, 90, 60, 75, 45, 60, 30, 45, - 15, 30, 31, 46, 46, 61, 61, 76, 76, 91, 91, 106, 106, 121, 121, - 136, 136, 151, 151, 166, 166, 181, 181, 196, 196, 211, 211, 226, 226, 241, - 227, 242, 212, 227, 197, 212, 182, 197, 167, 182, 152, 167, 137, 152, 122, - 137, 107, 122, 92, 107, 77, 92, 62, 77, 47, 62, 63, 78, 78, 93, - 93, 108, 108, 123, 123, 138, 138, 153, 153, 168, 168, 183, 183, 198, 198, - 213, 213, 228, 228, 243, 229, 244, 214, 229, 199, 214, 184, 199, 169, 184, - 154, 169, 139, 154, 124, 139, 109, 124, 94, 109, 79, 94, 95, 110, 110, - 125, 125, 140, 140, 155, 155, 170, 170, 185, 185, 200, 200, 215, 215, 230, - 230, 245, 231, 246, 216, 231, 201, 216, 186, 201, 171, 186, 156, 171, 141, - 156, 126, 141, 111, 126, 127, 142, 142, 157, 157, 172, 172, 187, 187, 202, - 202, 217, 217, 232, 232, 247, 233, 248, 218, 233, 203, 218, 188, 203, 173, - 188, 158, 173, 143, 158, 159, 174, 174, 189, 189, 204, 204, 219, 219, 234, - 234, 249, 235, 250, 220, 235, 205, 220, 190, 205, 175, 190, 191, 206, 206, - 221, 221, 236, 236, 251, 237, 252, 222, 237, 207, 222, 223, 238, 238, 253, - 239, 254, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, - mcol_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 32, 32, 64, 64, 96, 96, 128, 128, 160, 160, - 192, 192, 224, 224, 256, 256, 288, 288, 320, 320, 352, 352, 384, 384, - 416, 416, 448, 448, 480, 480, 512, 512, 544, 544, 576, 576, 608, 608, - 640, 640, 672, 672, 704, 704, 736, 736, 768, 768, 800, 800, 832, 832, - 864, 864, 896, 896, 928, 928, 960, 960, 0, 0, 1, 32, 33, 64, - 65, 96, 97, 128, 129, 160, 161, 192, 193, 224, 225, 256, 257, 288, - 289, 320, 321, 352, 353, 384, 385, 416, 417, 448, 449, 480, 481, 512, - 513, 544, 545, 576, 577, 608, 609, 640, 641, 672, 673, 704, 705, 736, - 737, 768, 769, 800, 801, 832, 833, 864, 865, 896, 897, 928, 929, 960, - 961, 992, 1, 1, 2, 33, 34, 65, 66, 97, 98, 129, 130, 161, - 162, 193, 194, 225, 226, 257, 258, 289, 290, 321, 322, 353, 354, 385, - 386, 417, 418, 449, 450, 481, 482, 513, 514, 545, 546, 577, 578, 609, - 610, 641, 642, 673, 674, 705, 706, 737, 738, 769, 770, 801, 802, 833, - 834, 865, 866, 897, 898, 929, 930, 961, 962, 993, 2, 2, 3, 34, - 35, 66, 67, 98, 99, 130, 131, 162, 163, 194, 195, 226, 227, 258, - 259, 290, 291, 322, 323, 354, 355, 386, 387, 418, 419, 450, 451, 482, - 483, 514, 515, 546, 547, 578, 579, 610, 611, 642, 643, 674, 675, 706, - 707, 738, 739, 770, 771, 802, 803, 834, 835, 866, 867, 898, 899, 930, - 931, 962, 963, 994, 3, 3, 4, 35, 36, 67, 68, 99, 100, 131, - 132, 163, 164, 195, 196, 227, 228, 259, 260, 291, 292, 323, 324, 355, - 356, 387, 388, 419, 420, 451, 452, 483, 484, 515, 516, 547, 548, 579, - 580, 611, 612, 643, 644, 675, 676, 707, 708, 739, 740, 771, 772, 803, - 804, 835, 836, 867, 868, 899, 900, 931, 932, 963, 964, 995, 4, 4, - 5, 36, 37, 68, 69, 100, 101, 132, 133, 164, 165, 196, 197, 228, - 229, 260, 261, 292, 293, 324, 325, 356, 357, 388, 389, 420, 421, 452, - 453, 484, 485, 516, 517, 548, 549, 580, 581, 612, 613, 644, 645, 676, - 677, 708, 709, 740, 741, 772, 773, 804, 805, 836, 837, 868, 869, 900, - 901, 932, 933, 964, 965, 996, 5, 5, 6, 37, 38, 69, 70, 101, - 102, 133, 134, 165, 166, 197, 198, 229, 230, 261, 262, 293, 294, 325, - 326, 357, 358, 389, 390, 421, 422, 453, 454, 485, 486, 517, 518, 549, - 550, 581, 582, 613, 614, 645, 646, 677, 678, 709, 710, 741, 742, 773, - 774, 805, 806, 837, 838, 869, 870, 901, 902, 933, 934, 965, 966, 997, - 6, 6, 7, 38, 39, 70, 71, 102, 103, 134, 135, 166, 167, 198, - 199, 230, 231, 262, 263, 294, 295, 326, 327, 358, 359, 390, 391, 422, - 423, 454, 455, 486, 487, 518, 519, 550, 551, 582, 583, 614, 615, 646, - 647, 678, 679, 710, 711, 742, 743, 774, 775, 806, 807, 838, 839, 870, - 871, 902, 903, 934, 935, 966, 967, 998, 7, 7, 8, 39, 40, 71, - 72, 103, 104, 135, 136, 167, 168, 199, 200, 231, 232, 263, 264, 295, - 296, 327, 328, 359, 360, 391, 392, 423, 424, 455, 456, 487, 488, 519, - 520, 551, 552, 583, 584, 615, 616, 647, 648, 679, 680, 711, 712, 743, - 744, 775, 776, 807, 808, 839, 840, 871, 872, 903, 904, 935, 936, 967, - 968, 999, 8, 8, 9, 40, 41, 72, 73, 104, 105, 136, 137, 168, - 169, 200, 201, 232, 233, 264, 265, 296, 297, 328, 329, 360, 361, 392, - 393, 424, 425, 456, 457, 488, 489, 520, 521, 552, 553, 584, 585, 616, - 617, 648, 649, 680, 681, 712, 713, 744, 745, 776, 777, 808, 809, 840, - 841, 872, 873, 904, 905, 936, 937, 968, 969, 1000, 9, 9, 10, 41, - 42, 73, 74, 105, 106, 137, 138, 169, 170, 201, 202, 233, 234, 265, - 266, 297, 298, 329, 330, 361, 362, 393, 394, 425, 426, 457, 458, 489, - 490, 521, 522, 553, 554, 585, 586, 617, 618, 649, 650, 681, 682, 713, - 714, 745, 746, 777, 778, 809, 810, 841, 842, 873, 874, 905, 906, 937, - 938, 969, 970, 1001, 10, 10, 11, 42, 43, 74, 75, 106, 107, 138, - 139, 170, 171, 202, 203, 234, 235, 266, 267, 298, 299, 330, 331, 362, - 363, 394, 395, 426, 427, 458, 459, 490, 491, 522, 523, 554, 555, 586, - 587, 618, 619, 650, 651, 682, 683, 714, 715, 746, 747, 778, 779, 810, - 811, 842, 843, 874, 875, 906, 907, 938, 939, 970, 971, 1002, 11, 11, - 12, 43, 44, 75, 76, 107, 108, 139, 140, 171, 172, 203, 204, 235, - 236, 267, 268, 299, 300, 331, 332, 363, 364, 395, 396, 427, 428, 459, - 460, 491, 492, 523, 524, 555, 556, 587, 588, 619, 620, 651, 652, 683, - 684, 715, 716, 747, 748, 779, 780, 811, 812, 843, 844, 875, 876, 907, - 908, 939, 940, 971, 972, 1003, 12, 12, 13, 44, 45, 76, 77, 108, - 109, 140, 141, 172, 173, 204, 205, 236, 237, 268, 269, 300, 301, 332, - 333, 364, 365, 396, 397, 428, 429, 460, 461, 492, 493, 524, 525, 556, - 557, 588, 589, 620, 621, 652, 653, 684, 685, 716, 717, 748, 749, 780, - 781, 812, 813, 844, 845, 876, 877, 908, 909, 940, 941, 972, 973, 1004, - 13, 13, 14, 45, 46, 77, 78, 109, 110, 141, 142, 173, 174, 205, - 206, 237, 238, 269, 270, 301, 302, 333, 334, 365, 366, 397, 398, 429, - 430, 461, 462, 493, 494, 525, 526, 557, 558, 589, 590, 621, 622, 653, - 654, 685, 686, 717, 718, 749, 750, 781, 782, 813, 814, 845, 846, 877, - 878, 909, 910, 941, 942, 973, 974, 1005, 14, 14, 15, 46, 47, 78, - 79, 110, 111, 142, 143, 174, 175, 206, 207, 238, 239, 270, 271, 302, - 303, 334, 335, 366, 367, 398, 399, 430, 431, 462, 463, 494, 495, 526, - 527, 558, 559, 590, 591, 622, 623, 654, 655, 686, 687, 718, 719, 750, - 751, 782, 783, 814, 815, 846, 847, 878, 879, 910, 911, 942, 943, 974, - 975, 1006, 15, 15, 16, 47, 48, 79, 80, 111, 112, 143, 144, 175, - 176, 207, 208, 239, 240, 271, 272, 303, 304, 335, 336, 367, 368, 399, - 400, 431, 432, 463, 464, 495, 496, 527, 528, 559, 560, 591, 592, 623, - 624, 655, 656, 687, 688, 719, 720, 751, 752, 783, 784, 815, 816, 847, - 848, 879, 880, 911, 912, 943, 944, 975, 976, 1007, 16, 16, 17, 48, - 49, 80, 81, 112, 113, 144, 145, 176, 177, 208, 209, 240, 241, 272, - 273, 304, 305, 336, 337, 368, 369, 400, 401, 432, 433, 464, 465, 496, - 497, 528, 529, 560, 561, 592, 593, 624, 625, 656, 657, 688, 689, 720, - 721, 752, 753, 784, 785, 816, 817, 848, 849, 880, 881, 912, 913, 944, - 945, 976, 977, 1008, 17, 17, 18, 49, 50, 81, 82, 113, 114, 145, - 146, 177, 178, 209, 210, 241, 242, 273, 274, 305, 306, 337, 338, 369, - 370, 401, 402, 433, 434, 465, 466, 497, 498, 529, 530, 561, 562, 593, - 594, 625, 626, 657, 658, 689, 690, 721, 722, 753, 754, 785, 786, 817, - 818, 849, 850, 881, 882, 913, 914, 945, 946, 977, 978, 1009, 18, 18, - 19, 50, 51, 82, 83, 114, 115, 146, 147, 178, 179, 210, 211, 242, - 243, 274, 275, 306, 307, 338, 339, 370, 371, 402, 403, 434, 435, 466, - 467, 498, 499, 530, 531, 562, 563, 594, 595, 626, 627, 658, 659, 690, - 691, 722, 723, 754, 755, 786, 787, 818, 819, 850, 851, 882, 883, 914, - 915, 946, 947, 978, 979, 1010, 19, 19, 20, 51, 52, 83, 84, 115, - 116, 147, 148, 179, 180, 211, 212, 243, 244, 275, 276, 307, 308, 339, - 340, 371, 372, 403, 404, 435, 436, 467, 468, 499, 500, 531, 532, 563, - 564, 595, 596, 627, 628, 659, 660, 691, 692, 723, 724, 755, 756, 787, - 788, 819, 820, 851, 852, 883, 884, 915, 916, 947, 948, 979, 980, 1011, - 20, 20, 21, 52, 53, 84, 85, 116, 117, 148, 149, 180, 181, 212, - 213, 244, 245, 276, 277, 308, 309, 340, 341, 372, 373, 404, 405, 436, - 437, 468, 469, 500, 501, 532, 533, 564, 565, 596, 597, 628, 629, 660, - 661, 692, 693, 724, 725, 756, 757, 788, 789, 820, 821, 852, 853, 884, - 885, 916, 917, 948, 949, 980, 981, 1012, 21, 21, 22, 53, 54, 85, - 86, 117, 118, 149, 150, 181, 182, 213, 214, 245, 246, 277, 278, 309, - 310, 341, 342, 373, 374, 405, 406, 437, 438, 469, 470, 501, 502, 533, - 534, 565, 566, 597, 598, 629, 630, 661, 662, 693, 694, 725, 726, 757, - 758, 789, 790, 821, 822, 853, 854, 885, 886, 917, 918, 949, 950, 981, - 982, 1013, 22, 22, 23, 54, 55, 86, 87, 118, 119, 150, 151, 182, - 183, 214, 215, 246, 247, 278, 279, 310, 311, 342, 343, 374, 375, 406, - 407, 438, 439, 470, 471, 502, 503, 534, 535, 566, 567, 598, 599, 630, - 631, 662, 663, 694, 695, 726, 727, 758, 759, 790, 791, 822, 823, 854, - 855, 886, 887, 918, 919, 950, 951, 982, 983, 1014, 23, 23, 24, 55, - 56, 87, 88, 119, 120, 151, 152, 183, 184, 215, 216, 247, 248, 279, - 280, 311, 312, 343, 344, 375, 376, 407, 408, 439, 440, 471, 472, 503, - 504, 535, 536, 567, 568, 599, 600, 631, 632, 663, 664, 695, 696, 727, - 728, 759, 760, 791, 792, 823, 824, 855, 856, 887, 888, 919, 920, 951, - 952, 983, 984, 1015, 24, 24, 25, 56, 57, 88, 89, 120, 121, 152, - 153, 184, 185, 216, 217, 248, 249, 280, 281, 312, 313, 344, 345, 376, - 377, 408, 409, 440, 441, 472, 473, 504, 505, 536, 537, 568, 569, 600, - 601, 632, 633, 664, 665, 696, 697, 728, 729, 760, 761, 792, 793, 824, - 825, 856, 857, 888, 889, 920, 921, 952, 953, 984, 985, 1016, 25, 25, - 26, 57, 58, 89, 90, 121, 122, 153, 154, 185, 186, 217, 218, 249, - 250, 281, 282, 313, 314, 345, 346, 377, 378, 409, 410, 441, 442, 473, - 474, 505, 506, 537, 538, 569, 570, 601, 602, 633, 634, 665, 666, 697, - 698, 729, 730, 761, 762, 793, 794, 825, 826, 857, 858, 889, 890, 921, - 922, 953, 954, 985, 986, 1017, 26, 26, 27, 58, 59, 90, 91, 122, - 123, 154, 155, 186, 187, 218, 219, 250, 251, 282, 283, 314, 315, 346, - 347, 378, 379, 410, 411, 442, 443, 474, 475, 506, 507, 538, 539, 570, - 571, 602, 603, 634, 635, 666, 667, 698, 699, 730, 731, 762, 763, 794, - 795, 826, 827, 858, 859, 890, 891, 922, 923, 954, 955, 986, 987, 1018, - 27, 27, 28, 59, 60, 91, 92, 123, 124, 155, 156, 187, 188, 219, - 220, 251, 252, 283, 284, 315, 316, 347, 348, 379, 380, 411, 412, 443, - 444, 475, 476, 507, 508, 539, 540, 571, 572, 603, 604, 635, 636, 667, - 668, 699, 700, 731, 732, 763, 764, 795, 796, 827, 828, 859, 860, 891, - 892, 923, 924, 955, 956, 987, 988, 1019, 28, 28, 29, 60, 61, 92, - 93, 124, 125, 156, 157, 188, 189, 220, 221, 252, 253, 284, 285, 316, - 317, 348, 349, 380, 381, 412, 413, 444, 445, 476, 477, 508, 509, 540, - 541, 572, 573, 604, 605, 636, 637, 668, 669, 700, 701, 732, 733, 764, - 765, 796, 797, 828, 829, 860, 861, 892, 893, 924, 925, 956, 957, 988, - 989, 1020, 29, 29, 30, 61, 62, 93, 94, 125, 126, 157, 158, 189, - 190, 221, 222, 253, 254, 285, 286, 317, 318, 349, 350, 381, 382, 413, - 414, 445, 446, 477, 478, 509, 510, 541, 542, 573, 574, 605, 606, 637, - 638, 669, 670, 701, 702, 733, 734, 765, 766, 797, 798, 829, 830, 861, - 862, 893, 894, 925, 926, 957, 958, 989, 990, 1021, 30, 30, 31, 62, - 63, 94, 95, 126, 127, 158, 159, 190, 191, 222, 223, 254, 255, 286, - 287, 318, 319, 350, 351, 382, 383, 414, 415, 446, 447, 478, 479, 510, - 511, 542, 543, 574, 575, 606, 607, 638, 639, 670, 671, 702, 703, 734, - 735, 766, 767, 798, 799, 830, 831, 862, 863, 894, 895, 926, 927, 958, - 959, 990, 991, 1022, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - mrow_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, - 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, - 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, - 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, - 27, 27, 28, 28, 29, 29, 30, 30, 0, 0, 1, 32, 2, 33, - 3, 34, 4, 35, 5, 36, 6, 37, 7, 38, 8, 39, 9, 40, - 10, 41, 11, 42, 12, 43, 13, 44, 14, 45, 15, 46, 16, 47, - 17, 48, 18, 49, 19, 50, 20, 51, 21, 52, 22, 53, 23, 54, - 24, 55, 25, 56, 26, 57, 27, 58, 28, 59, 29, 60, 30, 61, - 31, 62, 32, 32, 33, 64, 34, 65, 35, 66, 36, 67, 37, 68, - 38, 69, 39, 70, 40, 71, 41, 72, 42, 73, 43, 74, 44, 75, - 45, 76, 46, 77, 47, 78, 48, 79, 49, 80, 50, 81, 51, 82, - 52, 83, 53, 84, 54, 85, 55, 86, 56, 87, 57, 88, 58, 89, - 59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 64, 64, 65, 96, - 66, 97, 67, 98, 68, 99, 69, 100, 70, 101, 71, 102, 72, 103, - 73, 104, 74, 105, 75, 106, 76, 107, 77, 108, 78, 109, 79, 110, - 80, 111, 81, 112, 82, 113, 83, 114, 84, 115, 85, 116, 86, 117, - 87, 118, 88, 119, 89, 120, 90, 121, 91, 122, 92, 123, 93, 124, - 94, 125, 95, 126, 96, 96, 97, 128, 98, 129, 99, 130, 100, 131, - 101, 132, 102, 133, 103, 134, 104, 135, 105, 136, 106, 137, 107, 138, - 108, 139, 109, 140, 110, 141, 111, 142, 112, 143, 113, 144, 114, 145, - 115, 146, 116, 147, 117, 148, 118, 149, 119, 150, 120, 151, 121, 152, - 122, 153, 123, 154, 124, 155, 125, 156, 126, 157, 127, 158, 128, 128, - 129, 160, 130, 161, 131, 162, 132, 163, 133, 164, 134, 165, 135, 166, - 136, 167, 137, 168, 138, 169, 139, 170, 140, 171, 141, 172, 142, 173, - 143, 174, 144, 175, 145, 176, 146, 177, 147, 178, 148, 179, 149, 180, - 150, 181, 151, 182, 152, 183, 153, 184, 154, 185, 155, 186, 156, 187, - 157, 188, 158, 189, 159, 190, 160, 160, 161, 192, 162, 193, 163, 194, - 164, 195, 165, 196, 166, 197, 167, 198, 168, 199, 169, 200, 170, 201, - 171, 202, 172, 203, 173, 204, 174, 205, 175, 206, 176, 207, 177, 208, - 178, 209, 179, 210, 180, 211, 181, 212, 182, 213, 183, 214, 184, 215, - 185, 216, 186, 217, 187, 218, 188, 219, 189, 220, 190, 221, 191, 222, - 192, 192, 193, 224, 194, 225, 195, 226, 196, 227, 197, 228, 198, 229, - 199, 230, 200, 231, 201, 232, 202, 233, 203, 234, 204, 235, 205, 236, - 206, 237, 207, 238, 208, 239, 209, 240, 210, 241, 211, 242, 212, 243, - 213, 244, 214, 245, 215, 246, 216, 247, 217, 248, 218, 249, 219, 250, - 220, 251, 221, 252, 222, 253, 223, 254, 224, 224, 225, 256, 226, 257, - 227, 258, 228, 259, 229, 260, 230, 261, 231, 262, 232, 263, 233, 264, - 234, 265, 235, 266, 236, 267, 237, 268, 238, 269, 239, 270, 240, 271, - 241, 272, 242, 273, 243, 274, 244, 275, 245, 276, 246, 277, 247, 278, - 248, 279, 249, 280, 250, 281, 251, 282, 252, 283, 253, 284, 254, 285, - 255, 286, 256, 256, 257, 288, 258, 289, 259, 290, 260, 291, 261, 292, - 262, 293, 263, 294, 264, 295, 265, 296, 266, 297, 267, 298, 268, 299, - 269, 300, 270, 301, 271, 302, 272, 303, 273, 304, 274, 305, 275, 306, - 276, 307, 277, 308, 278, 309, 279, 310, 280, 311, 281, 312, 282, 313, - 283, 314, 284, 315, 285, 316, 286, 317, 287, 318, 288, 288, 289, 320, - 290, 321, 291, 322, 292, 323, 293, 324, 294, 325, 295, 326, 296, 327, - 297, 328, 298, 329, 299, 330, 300, 331, 301, 332, 302, 333, 303, 334, - 304, 335, 305, 336, 306, 337, 307, 338, 308, 339, 309, 340, 310, 341, - 311, 342, 312, 343, 313, 344, 314, 345, 315, 346, 316, 347, 317, 348, - 318, 349, 319, 350, 320, 320, 321, 352, 322, 353, 323, 354, 324, 355, - 325, 356, 326, 357, 327, 358, 328, 359, 329, 360, 330, 361, 331, 362, - 332, 363, 333, 364, 334, 365, 335, 366, 336, 367, 337, 368, 338, 369, - 339, 370, 340, 371, 341, 372, 342, 373, 343, 374, 344, 375, 345, 376, - 346, 377, 347, 378, 348, 379, 349, 380, 350, 381, 351, 382, 352, 352, - 353, 384, 354, 385, 355, 386, 356, 387, 357, 388, 358, 389, 359, 390, - 360, 391, 361, 392, 362, 393, 363, 394, 364, 395, 365, 396, 366, 397, - 367, 398, 368, 399, 369, 400, 370, 401, 371, 402, 372, 403, 373, 404, - 374, 405, 375, 406, 376, 407, 377, 408, 378, 409, 379, 410, 380, 411, - 381, 412, 382, 413, 383, 414, 384, 384, 385, 416, 386, 417, 387, 418, - 388, 419, 389, 420, 390, 421, 391, 422, 392, 423, 393, 424, 394, 425, - 395, 426, 396, 427, 397, 428, 398, 429, 399, 430, 400, 431, 401, 432, - 402, 433, 403, 434, 404, 435, 405, 436, 406, 437, 407, 438, 408, 439, - 409, 440, 410, 441, 411, 442, 412, 443, 413, 444, 414, 445, 415, 446, - 416, 416, 417, 448, 418, 449, 419, 450, 420, 451, 421, 452, 422, 453, - 423, 454, 424, 455, 425, 456, 426, 457, 427, 458, 428, 459, 429, 460, - 430, 461, 431, 462, 432, 463, 433, 464, 434, 465, 435, 466, 436, 467, - 437, 468, 438, 469, 439, 470, 440, 471, 441, 472, 442, 473, 443, 474, - 444, 475, 445, 476, 446, 477, 447, 478, 448, 448, 449, 480, 450, 481, - 451, 482, 452, 483, 453, 484, 454, 485, 455, 486, 456, 487, 457, 488, - 458, 489, 459, 490, 460, 491, 461, 492, 462, 493, 463, 494, 464, 495, - 465, 496, 466, 497, 467, 498, 468, 499, 469, 500, 470, 501, 471, 502, - 472, 503, 473, 504, 474, 505, 475, 506, 476, 507, 477, 508, 478, 509, - 479, 510, 480, 480, 481, 512, 482, 513, 483, 514, 484, 515, 485, 516, - 486, 517, 487, 518, 488, 519, 489, 520, 490, 521, 491, 522, 492, 523, - 493, 524, 494, 525, 495, 526, 496, 527, 497, 528, 498, 529, 499, 530, - 500, 531, 501, 532, 502, 533, 503, 534, 504, 535, 505, 536, 506, 537, - 507, 538, 508, 539, 509, 540, 510, 541, 511, 542, 512, 512, 513, 544, - 514, 545, 515, 546, 516, 547, 517, 548, 518, 549, 519, 550, 520, 551, - 521, 552, 522, 553, 523, 554, 524, 555, 525, 556, 526, 557, 527, 558, - 528, 559, 529, 560, 530, 561, 531, 562, 532, 563, 533, 564, 534, 565, - 535, 566, 536, 567, 537, 568, 538, 569, 539, 570, 540, 571, 541, 572, - 542, 573, 543, 574, 544, 544, 545, 576, 546, 577, 547, 578, 548, 579, - 549, 580, 550, 581, 551, 582, 552, 583, 553, 584, 554, 585, 555, 586, - 556, 587, 557, 588, 558, 589, 559, 590, 560, 591, 561, 592, 562, 593, - 563, 594, 564, 595, 565, 596, 566, 597, 567, 598, 568, 599, 569, 600, - 570, 601, 571, 602, 572, 603, 573, 604, 574, 605, 575, 606, 576, 576, - 577, 608, 578, 609, 579, 610, 580, 611, 581, 612, 582, 613, 583, 614, - 584, 615, 585, 616, 586, 617, 587, 618, 588, 619, 589, 620, 590, 621, - 591, 622, 592, 623, 593, 624, 594, 625, 595, 626, 596, 627, 597, 628, - 598, 629, 599, 630, 600, 631, 601, 632, 602, 633, 603, 634, 604, 635, - 605, 636, 606, 637, 607, 638, 608, 608, 609, 640, 610, 641, 611, 642, - 612, 643, 613, 644, 614, 645, 615, 646, 616, 647, 617, 648, 618, 649, - 619, 650, 620, 651, 621, 652, 622, 653, 623, 654, 624, 655, 625, 656, - 626, 657, 627, 658, 628, 659, 629, 660, 630, 661, 631, 662, 632, 663, - 633, 664, 634, 665, 635, 666, 636, 667, 637, 668, 638, 669, 639, 670, - 640, 640, 641, 672, 642, 673, 643, 674, 644, 675, 645, 676, 646, 677, - 647, 678, 648, 679, 649, 680, 650, 681, 651, 682, 652, 683, 653, 684, - 654, 685, 655, 686, 656, 687, 657, 688, 658, 689, 659, 690, 660, 691, - 661, 692, 662, 693, 663, 694, 664, 695, 665, 696, 666, 697, 667, 698, - 668, 699, 669, 700, 670, 701, 671, 702, 672, 672, 673, 704, 674, 705, - 675, 706, 676, 707, 677, 708, 678, 709, 679, 710, 680, 711, 681, 712, - 682, 713, 683, 714, 684, 715, 685, 716, 686, 717, 687, 718, 688, 719, - 689, 720, 690, 721, 691, 722, 692, 723, 693, 724, 694, 725, 695, 726, - 696, 727, 697, 728, 698, 729, 699, 730, 700, 731, 701, 732, 702, 733, - 703, 734, 704, 704, 705, 736, 706, 737, 707, 738, 708, 739, 709, 740, - 710, 741, 711, 742, 712, 743, 713, 744, 714, 745, 715, 746, 716, 747, - 717, 748, 718, 749, 719, 750, 720, 751, 721, 752, 722, 753, 723, 754, - 724, 755, 725, 756, 726, 757, 727, 758, 728, 759, 729, 760, 730, 761, - 731, 762, 732, 763, 733, 764, 734, 765, 735, 766, 736, 736, 737, 768, - 738, 769, 739, 770, 740, 771, 741, 772, 742, 773, 743, 774, 744, 775, - 745, 776, 746, 777, 747, 778, 748, 779, 749, 780, 750, 781, 751, 782, - 752, 783, 753, 784, 754, 785, 755, 786, 756, 787, 757, 788, 758, 789, - 759, 790, 760, 791, 761, 792, 762, 793, 763, 794, 764, 795, 765, 796, - 766, 797, 767, 798, 768, 768, 769, 800, 770, 801, 771, 802, 772, 803, - 773, 804, 774, 805, 775, 806, 776, 807, 777, 808, 778, 809, 779, 810, - 780, 811, 781, 812, 782, 813, 783, 814, 784, 815, 785, 816, 786, 817, - 787, 818, 788, 819, 789, 820, 790, 821, 791, 822, 792, 823, 793, 824, - 794, 825, 795, 826, 796, 827, 797, 828, 798, 829, 799, 830, 800, 800, - 801, 832, 802, 833, 803, 834, 804, 835, 805, 836, 806, 837, 807, 838, - 808, 839, 809, 840, 810, 841, 811, 842, 812, 843, 813, 844, 814, 845, - 815, 846, 816, 847, 817, 848, 818, 849, 819, 850, 820, 851, 821, 852, - 822, 853, 823, 854, 824, 855, 825, 856, 826, 857, 827, 858, 828, 859, - 829, 860, 830, 861, 831, 862, 832, 832, 833, 864, 834, 865, 835, 866, - 836, 867, 837, 868, 838, 869, 839, 870, 840, 871, 841, 872, 842, 873, - 843, 874, 844, 875, 845, 876, 846, 877, 847, 878, 848, 879, 849, 880, - 850, 881, 851, 882, 852, 883, 853, 884, 854, 885, 855, 886, 856, 887, - 857, 888, 858, 889, 859, 890, 860, 891, 861, 892, 862, 893, 863, 894, - 864, 864, 865, 896, 866, 897, 867, 898, 868, 899, 869, 900, 870, 901, - 871, 902, 872, 903, 873, 904, 874, 905, 875, 906, 876, 907, 877, 908, - 878, 909, 879, 910, 880, 911, 881, 912, 882, 913, 883, 914, 884, 915, - 885, 916, 886, 917, 887, 918, 888, 919, 889, 920, 890, 921, 891, 922, - 892, 923, 893, 924, 894, 925, 895, 926, 896, 896, 897, 928, 898, 929, - 899, 930, 900, 931, 901, 932, 902, 933, 903, 934, 904, 935, 905, 936, - 906, 937, 907, 938, 908, 939, 909, 940, 910, 941, 911, 942, 912, 943, - 913, 944, 914, 945, 915, 946, 916, 947, 917, 948, 918, 949, 919, 950, - 920, 951, 921, 952, 922, 953, 923, 954, 924, 955, 925, 956, 926, 957, - 927, 958, 928, 928, 929, 960, 930, 961, 931, 962, 932, 963, 933, 964, - 934, 965, 935, 966, 936, 967, 937, 968, 938, 969, 939, 970, 940, 971, - 941, 972, 942, 973, 943, 974, 944, 975, 945, 976, 946, 977, 947, 978, - 948, 979, 949, 980, 950, 981, 951, 982, 952, 983, 953, 984, 954, 985, - 955, 986, 956, 987, 957, 988, 958, 989, 959, 990, 960, 960, 961, 992, - 962, 993, 963, 994, 964, 995, 965, 996, 966, 997, 967, 998, 968, 999, - 969, 1000, 970, 1001, 971, 1002, 972, 1003, 973, 1004, 974, 1005, 975, 1006, - 976, 1007, 977, 1008, 978, 1009, 979, 1010, 980, 1011, 981, 1012, 982, 1013, - 983, 1014, 984, 1015, 985, 1016, 986, 1017, 987, 1018, 988, 1019, 989, 1020, - 990, 1021, 991, 1022, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 32, 32, 1, 32, 1, 1, 2, 2, - 2, 33, 33, 64, 64, 64, 96, 96, 65, 96, 34, 65, 3, 34, - 3, 3, 4, 4, 4, 35, 35, 66, 66, 97, 97, 128, 128, 128, - 160, 160, 129, 160, 98, 129, 67, 98, 36, 67, 5, 36, 5, 5, - 6, 6, 6, 37, 37, 68, 68, 99, 99, 130, 130, 161, 161, 192, - 192, 192, 224, 224, 193, 224, 162, 193, 131, 162, 100, 131, 69, 100, - 38, 69, 7, 38, 7, 7, 8, 8, 8, 39, 39, 70, 70, 101, - 101, 132, 132, 163, 163, 194, 194, 225, 225, 256, 256, 256, 288, 288, - 257, 288, 226, 257, 195, 226, 164, 195, 133, 164, 102, 133, 71, 102, - 40, 71, 9, 40, 9, 9, 10, 10, 10, 41, 41, 72, 72, 103, - 103, 134, 134, 165, 165, 196, 196, 227, 227, 258, 258, 289, 289, 320, - 320, 320, 352, 352, 321, 352, 290, 321, 259, 290, 228, 259, 197, 228, - 166, 197, 135, 166, 104, 135, 73, 104, 42, 73, 11, 42, 11, 11, - 12, 12, 12, 43, 43, 74, 74, 105, 105, 136, 136, 167, 167, 198, - 198, 229, 229, 260, 260, 291, 291, 322, 322, 353, 353, 384, 384, 384, - 416, 416, 385, 416, 354, 385, 323, 354, 292, 323, 261, 292, 230, 261, - 199, 230, 168, 199, 137, 168, 106, 137, 75, 106, 44, 75, 13, 44, - 13, 13, 14, 14, 14, 45, 45, 76, 76, 107, 107, 138, 138, 169, - 169, 200, 200, 231, 231, 262, 262, 293, 293, 324, 324, 355, 355, 386, - 386, 417, 417, 448, 448, 448, 480, 480, 449, 480, 418, 449, 387, 418, - 356, 387, 325, 356, 294, 325, 263, 294, 232, 263, 201, 232, 170, 201, - 139, 170, 108, 139, 77, 108, 46, 77, 15, 46, 15, 15, 16, 16, - 16, 47, 47, 78, 78, 109, 109, 140, 140, 171, 171, 202, 202, 233, - 233, 264, 264, 295, 295, 326, 326, 357, 357, 388, 388, 419, 419, 450, - 450, 481, 481, 512, 512, 512, 544, 544, 513, 544, 482, 513, 451, 482, - 420, 451, 389, 420, 358, 389, 327, 358, 296, 327, 265, 296, 234, 265, - 203, 234, 172, 203, 141, 172, 110, 141, 79, 110, 48, 79, 17, 48, - 17, 17, 18, 18, 18, 49, 49, 80, 80, 111, 111, 142, 142, 173, - 173, 204, 204, 235, 235, 266, 266, 297, 297, 328, 328, 359, 359, 390, - 390, 421, 421, 452, 452, 483, 483, 514, 514, 545, 545, 576, 576, 576, - 608, 608, 577, 608, 546, 577, 515, 546, 484, 515, 453, 484, 422, 453, - 391, 422, 360, 391, 329, 360, 298, 329, 267, 298, 236, 267, 205, 236, - 174, 205, 143, 174, 112, 143, 81, 112, 50, 81, 19, 50, 19, 19, - 20, 20, 20, 51, 51, 82, 82, 113, 113, 144, 144, 175, 175, 206, - 206, 237, 237, 268, 268, 299, 299, 330, 330, 361, 361, 392, 392, 423, - 423, 454, 454, 485, 485, 516, 516, 547, 547, 578, 578, 609, 609, 640, - 640, 640, 672, 672, 641, 672, 610, 641, 579, 610, 548, 579, 517, 548, - 486, 517, 455, 486, 424, 455, 393, 424, 362, 393, 331, 362, 300, 331, - 269, 300, 238, 269, 207, 238, 176, 207, 145, 176, 114, 145, 83, 114, - 52, 83, 21, 52, 21, 21, 22, 22, 22, 53, 53, 84, 84, 115, - 115, 146, 146, 177, 177, 208, 208, 239, 239, 270, 270, 301, 301, 332, - 332, 363, 363, 394, 394, 425, 425, 456, 456, 487, 487, 518, 518, 549, - 549, 580, 580, 611, 611, 642, 642, 673, 673, 704, 704, 704, 736, 736, - 705, 736, 674, 705, 643, 674, 612, 643, 581, 612, 550, 581, 519, 550, - 488, 519, 457, 488, 426, 457, 395, 426, 364, 395, 333, 364, 302, 333, - 271, 302, 240, 271, 209, 240, 178, 209, 147, 178, 116, 147, 85, 116, - 54, 85, 23, 54, 23, 23, 24, 24, 24, 55, 55, 86, 86, 117, - 117, 148, 148, 179, 179, 210, 210, 241, 241, 272, 272, 303, 303, 334, - 334, 365, 365, 396, 396, 427, 427, 458, 458, 489, 489, 520, 520, 551, - 551, 582, 582, 613, 613, 644, 644, 675, 675, 706, 706, 737, 737, 768, - 768, 768, 800, 800, 769, 800, 738, 769, 707, 738, 676, 707, 645, 676, - 614, 645, 583, 614, 552, 583, 521, 552, 490, 521, 459, 490, 428, 459, - 397, 428, 366, 397, 335, 366, 304, 335, 273, 304, 242, 273, 211, 242, - 180, 211, 149, 180, 118, 149, 87, 118, 56, 87, 25, 56, 25, 25, - 26, 26, 26, 57, 57, 88, 88, 119, 119, 150, 150, 181, 181, 212, - 212, 243, 243, 274, 274, 305, 305, 336, 336, 367, 367, 398, 398, 429, - 429, 460, 460, 491, 491, 522, 522, 553, 553, 584, 584, 615, 615, 646, - 646, 677, 677, 708, 708, 739, 739, 770, 770, 801, 801, 832, 832, 832, - 864, 864, 833, 864, 802, 833, 771, 802, 740, 771, 709, 740, 678, 709, - 647, 678, 616, 647, 585, 616, 554, 585, 523, 554, 492, 523, 461, 492, - 430, 461, 399, 430, 368, 399, 337, 368, 306, 337, 275, 306, 244, 275, - 213, 244, 182, 213, 151, 182, 120, 151, 89, 120, 58, 89, 27, 58, - 27, 27, 28, 28, 28, 59, 59, 90, 90, 121, 121, 152, 152, 183, - 183, 214, 214, 245, 245, 276, 276, 307, 307, 338, 338, 369, 369, 400, - 400, 431, 431, 462, 462, 493, 493, 524, 524, 555, 555, 586, 586, 617, - 617, 648, 648, 679, 679, 710, 710, 741, 741, 772, 772, 803, 803, 834, - 834, 865, 865, 896, 896, 896, 928, 928, 897, 928, 866, 897, 835, 866, - 804, 835, 773, 804, 742, 773, 711, 742, 680, 711, 649, 680, 618, 649, - 587, 618, 556, 587, 525, 556, 494, 525, 463, 494, 432, 463, 401, 432, - 370, 401, 339, 370, 308, 339, 277, 308, 246, 277, 215, 246, 184, 215, - 153, 184, 122, 153, 91, 122, 60, 91, 29, 60, 29, 29, 30, 30, - 30, 61, 61, 92, 92, 123, 123, 154, 154, 185, 185, 216, 216, 247, - 247, 278, 278, 309, 309, 340, 340, 371, 371, 402, 402, 433, 433, 464, - 464, 495, 495, 526, 526, 557, 557, 588, 588, 619, 619, 650, 650, 681, - 681, 712, 712, 743, 743, 774, 774, 805, 805, 836, 836, 867, 867, 898, - 898, 929, 929, 960, 960, 960, 961, 992, 930, 961, 899, 930, 868, 899, - 837, 868, 806, 837, 775, 806, 744, 775, 713, 744, 682, 713, 651, 682, - 620, 651, 589, 620, 558, 589, 527, 558, 496, 527, 465, 496, 434, 465, - 403, 434, 372, 403, 341, 372, 310, 341, 279, 310, 248, 279, 217, 248, - 186, 217, 155, 186, 124, 155, 93, 124, 62, 93, 31, 62, 63, 94, - 94, 125, 125, 156, 156, 187, 187, 218, 218, 249, 249, 280, 280, 311, - 311, 342, 342, 373, 373, 404, 404, 435, 435, 466, 466, 497, 497, 528, - 528, 559, 559, 590, 590, 621, 621, 652, 652, 683, 683, 714, 714, 745, - 745, 776, 776, 807, 807, 838, 838, 869, 869, 900, 900, 931, 931, 962, - 962, 993, 963, 994, 932, 963, 901, 932, 870, 901, 839, 870, 808, 839, - 777, 808, 746, 777, 715, 746, 684, 715, 653, 684, 622, 653, 591, 622, - 560, 591, 529, 560, 498, 529, 467, 498, 436, 467, 405, 436, 374, 405, - 343, 374, 312, 343, 281, 312, 250, 281, 219, 250, 188, 219, 157, 188, - 126, 157, 95, 126, 127, 158, 158, 189, 189, 220, 220, 251, 251, 282, - 282, 313, 313, 344, 344, 375, 375, 406, 406, 437, 437, 468, 468, 499, - 499, 530, 530, 561, 561, 592, 592, 623, 623, 654, 654, 685, 685, 716, - 716, 747, 747, 778, 778, 809, 809, 840, 840, 871, 871, 902, 902, 933, - 933, 964, 964, 995, 965, 996, 934, 965, 903, 934, 872, 903, 841, 872, - 810, 841, 779, 810, 748, 779, 717, 748, 686, 717, 655, 686, 624, 655, - 593, 624, 562, 593, 531, 562, 500, 531, 469, 500, 438, 469, 407, 438, - 376, 407, 345, 376, 314, 345, 283, 314, 252, 283, 221, 252, 190, 221, - 159, 190, 191, 222, 222, 253, 253, 284, 284, 315, 315, 346, 346, 377, - 377, 408, 408, 439, 439, 470, 470, 501, 501, 532, 532, 563, 563, 594, - 594, 625, 625, 656, 656, 687, 687, 718, 718, 749, 749, 780, 780, 811, - 811, 842, 842, 873, 873, 904, 904, 935, 935, 966, 966, 997, 967, 998, - 936, 967, 905, 936, 874, 905, 843, 874, 812, 843, 781, 812, 750, 781, - 719, 750, 688, 719, 657, 688, 626, 657, 595, 626, 564, 595, 533, 564, - 502, 533, 471, 502, 440, 471, 409, 440, 378, 409, 347, 378, 316, 347, - 285, 316, 254, 285, 223, 254, 255, 286, 286, 317, 317, 348, 348, 379, - 379, 410, 410, 441, 441, 472, 472, 503, 503, 534, 534, 565, 565, 596, - 596, 627, 627, 658, 658, 689, 689, 720, 720, 751, 751, 782, 782, 813, - 813, 844, 844, 875, 875, 906, 906, 937, 937, 968, 968, 999, 969, 1000, - 938, 969, 907, 938, 876, 907, 845, 876, 814, 845, 783, 814, 752, 783, - 721, 752, 690, 721, 659, 690, 628, 659, 597, 628, 566, 597, 535, 566, - 504, 535, 473, 504, 442, 473, 411, 442, 380, 411, 349, 380, 318, 349, - 287, 318, 319, 350, 350, 381, 381, 412, 412, 443, 443, 474, 474, 505, - 505, 536, 536, 567, 567, 598, 598, 629, 629, 660, 660, 691, 691, 722, - 722, 753, 753, 784, 784, 815, 815, 846, 846, 877, 877, 908, 908, 939, - 939, 970, 970, 1001, 971, 1002, 940, 971, 909, 940, 878, 909, 847, 878, - 816, 847, 785, 816, 754, 785, 723, 754, 692, 723, 661, 692, 630, 661, - 599, 630, 568, 599, 537, 568, 506, 537, 475, 506, 444, 475, 413, 444, - 382, 413, 351, 382, 383, 414, 414, 445, 445, 476, 476, 507, 507, 538, - 538, 569, 569, 600, 600, 631, 631, 662, 662, 693, 693, 724, 724, 755, - 755, 786, 786, 817, 817, 848, 848, 879, 879, 910, 910, 941, 941, 972, - 972, 1003, 973, 1004, 942, 973, 911, 942, 880, 911, 849, 880, 818, 849, - 787, 818, 756, 787, 725, 756, 694, 725, 663, 694, 632, 663, 601, 632, - 570, 601, 539, 570, 508, 539, 477, 508, 446, 477, 415, 446, 447, 478, - 478, 509, 509, 540, 540, 571, 571, 602, 602, 633, 633, 664, 664, 695, - 695, 726, 726, 757, 757, 788, 788, 819, 819, 850, 850, 881, 881, 912, - 912, 943, 943, 974, 974, 1005, 975, 1006, 944, 975, 913, 944, 882, 913, - 851, 882, 820, 851, 789, 820, 758, 789, 727, 758, 696, 727, 665, 696, - 634, 665, 603, 634, 572, 603, 541, 572, 510, 541, 479, 510, 511, 542, - 542, 573, 573, 604, 604, 635, 635, 666, 666, 697, 697, 728, 728, 759, - 759, 790, 790, 821, 821, 852, 852, 883, 883, 914, 914, 945, 945, 976, - 976, 1007, 977, 1008, 946, 977, 915, 946, 884, 915, 853, 884, 822, 853, - 791, 822, 760, 791, 729, 760, 698, 729, 667, 698, 636, 667, 605, 636, - 574, 605, 543, 574, 575, 606, 606, 637, 637, 668, 668, 699, 699, 730, - 730, 761, 761, 792, 792, 823, 823, 854, 854, 885, 885, 916, 916, 947, - 947, 978, 978, 1009, 979, 1010, 948, 979, 917, 948, 886, 917, 855, 886, - 824, 855, 793, 824, 762, 793, 731, 762, 700, 731, 669, 700, 638, 669, - 607, 638, 639, 670, 670, 701, 701, 732, 732, 763, 763, 794, 794, 825, - 825, 856, 856, 887, 887, 918, 918, 949, 949, 980, 980, 1011, 981, 1012, - 950, 981, 919, 950, 888, 919, 857, 888, 826, 857, 795, 826, 764, 795, - 733, 764, 702, 733, 671, 702, 703, 734, 734, 765, 765, 796, 796, 827, - 827, 858, 858, 889, 889, 920, 920, 951, 951, 982, 982, 1013, 983, 1014, - 952, 983, 921, 952, 890, 921, 859, 890, 828, 859, 797, 828, 766, 797, - 735, 766, 767, 798, 798, 829, 829, 860, 860, 891, 891, 922, 922, 953, - 953, 984, 984, 1015, 985, 1016, 954, 985, 923, 954, 892, 923, 861, 892, - 830, 861, 799, 830, 831, 862, 862, 893, 893, 924, 924, 955, 955, 986, - 986, 1017, 987, 1018, 956, 987, 925, 956, 894, 925, 863, 894, 895, 926, - 926, 957, 957, 988, 988, 1019, 989, 1020, 958, 989, 927, 958, 959, 990, - 990, 1021, 991, 1022, 0, 0 -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x4[16]) = { - 0, 1, 5, 6, 2, 4, 7, 12, 3, 8, 11, 13, 9, 10, 14, 15 -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_4x4[16]) = { - 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_4x4[16]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x8[32]) = { - 0, 1, 3, 6, 2, 4, 7, 10, 5, 8, 11, 14, 9, 12, 15, 18, - 13, 16, 19, 22, 17, 20, 23, 26, 21, 24, 27, 29, 25, 28, 30, 31, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_4x8[32]) = { - 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27, - 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_4x8[32]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x4[32]) = { - 0, 2, 5, 9, 13, 17, 21, 25, 1, 4, 8, 12, 16, 20, 24, 28, - 3, 7, 11, 15, 19, 23, 27, 30, 6, 10, 14, 18, 22, 26, 29, 31, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_8x4[32]) = { - 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29, - 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_8x4[32]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x16[64]) = { - 0, 1, 3, 6, 2, 4, 7, 10, 5, 8, 11, 14, 9, 12, 15, 18, - 13, 16, 19, 22, 17, 20, 23, 26, 21, 24, 27, 30, 25, 28, 31, 34, - 29, 32, 35, 38, 33, 36, 39, 42, 37, 40, 43, 46, 41, 44, 47, 50, - 45, 48, 51, 54, 49, 52, 55, 58, 53, 56, 59, 61, 57, 60, 62, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x4[64]) = { - 0, 2, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, - 1, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, - 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 62, - 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 61, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_4x16[64]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_16x4[64]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_4x16[64]) = { - 0, 16, 32, 48, 1, 17, 33, 49, 2, 18, 34, 50, 3, 19, 35, 51, - 4, 20, 36, 52, 5, 21, 37, 53, 6, 22, 38, 54, 7, 23, 39, 55, - 8, 24, 40, 56, 9, 25, 41, 57, 10, 26, 42, 58, 11, 27, 43, 59, - 12, 28, 44, 60, 13, 29, 45, 61, 14, 30, 46, 62, 15, 31, 47, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_16x4[64]) = { - 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, - 1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, - 2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62, - 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x32[256]) = { - 0, 1, 3, 6, 10, 15, 21, 28, 2, 4, 7, 11, 16, 22, 29, - 36, 5, 8, 12, 17, 23, 30, 37, 44, 9, 13, 18, 24, 31, 38, - 45, 52, 14, 19, 25, 32, 39, 46, 53, 60, 20, 26, 33, 40, 47, - 54, 61, 68, 27, 34, 41, 48, 55, 62, 69, 76, 35, 42, 49, 56, - 63, 70, 77, 84, 43, 50, 57, 64, 71, 78, 85, 92, 51, 58, 65, - 72, 79, 86, 93, 100, 59, 66, 73, 80, 87, 94, 101, 108, 67, 74, - 81, 88, 95, 102, 109, 116, 75, 82, 89, 96, 103, 110, 117, 124, 83, - 90, 97, 104, 111, 118, 125, 132, 91, 98, 105, 112, 119, 126, 133, 140, - 99, 106, 113, 120, 127, 134, 141, 148, 107, 114, 121, 128, 135, 142, 149, - 156, 115, 122, 129, 136, 143, 150, 157, 164, 123, 130, 137, 144, 151, 158, - 165, 172, 131, 138, 145, 152, 159, 166, 173, 180, 139, 146, 153, 160, 167, - 174, 181, 188, 147, 154, 161, 168, 175, 182, 189, 196, 155, 162, 169, 176, - 183, 190, 197, 204, 163, 170, 177, 184, 191, 198, 205, 212, 171, 178, 185, - 192, 199, 206, 213, 220, 179, 186, 193, 200, 207, 214, 221, 228, 187, 194, - 201, 208, 215, 222, 229, 235, 195, 202, 209, 216, 223, 230, 236, 241, 203, - 210, 217, 224, 231, 237, 242, 246, 211, 218, 225, 232, 238, 243, 247, 250, - 219, 226, 233, 239, 244, 248, 251, 253, 227, 234, 240, 245, 249, 252, 254, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x8[256]) = { - 0, 2, 5, 9, 14, 20, 27, 35, 43, 51, 59, 67, 75, 83, 91, - 99, 107, 115, 123, 131, 139, 147, 155, 163, 171, 179, 187, 195, 203, 211, - 219, 227, 1, 4, 8, 13, 19, 26, 34, 42, 50, 58, 66, 74, 82, - 90, 98, 106, 114, 122, 130, 138, 146, 154, 162, 170, 178, 186, 194, 202, - 210, 218, 226, 234, 3, 7, 12, 18, 25, 33, 41, 49, 57, 65, 73, - 81, 89, 97, 105, 113, 121, 129, 137, 145, 153, 161, 169, 177, 185, 193, - 201, 209, 217, 225, 233, 240, 6, 11, 17, 24, 32, 40, 48, 56, 64, - 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, - 192, 200, 208, 216, 224, 232, 239, 245, 10, 16, 23, 31, 39, 47, 55, - 63, 71, 79, 87, 95, 103, 111, 119, 127, 135, 143, 151, 159, 167, 175, - 183, 191, 199, 207, 215, 223, 231, 238, 244, 249, 15, 22, 30, 38, 46, - 54, 62, 70, 78, 86, 94, 102, 110, 118, 126, 134, 142, 150, 158, 166, - 174, 182, 190, 198, 206, 214, 222, 230, 237, 243, 248, 252, 21, 29, 37, - 45, 53, 61, 69, 77, 85, 93, 101, 109, 117, 125, 133, 141, 149, 157, - 165, 173, 181, 189, 197, 205, 213, 221, 229, 236, 242, 247, 251, 254, 28, - 36, 44, 52, 60, 68, 76, 84, 92, 100, 108, 116, 124, 132, 140, 148, - 156, 164, 172, 180, 188, 196, 204, 212, 220, 228, 235, 241, 246, 250, 253, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_8x32[256]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, - 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, - 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, - 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, - 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_32x8[256]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, - 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, - 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, - 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, - 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_8x32[256]) = { - 0, 32, 64, 96, 128, 160, 192, 224, 1, 33, 65, 97, 129, 161, 193, 225, - 2, 34, 66, 98, 130, 162, 194, 226, 3, 35, 67, 99, 131, 163, 195, 227, - 4, 36, 68, 100, 132, 164, 196, 228, 5, 37, 69, 101, 133, 165, 197, 229, - 6, 38, 70, 102, 134, 166, 198, 230, 7, 39, 71, 103, 135, 167, 199, 231, - 8, 40, 72, 104, 136, 168, 200, 232, 9, 41, 73, 105, 137, 169, 201, 233, - 10, 42, 74, 106, 138, 170, 202, 234, 11, 43, 75, 107, 139, 171, 203, 235, - 12, 44, 76, 108, 140, 172, 204, 236, 13, 45, 77, 109, 141, 173, 205, 237, - 14, 46, 78, 110, 142, 174, 206, 238, 15, 47, 79, 111, 143, 175, 207, 239, - 16, 48, 80, 112, 144, 176, 208, 240, 17, 49, 81, 113, 145, 177, 209, 241, - 18, 50, 82, 114, 146, 178, 210, 242, 19, 51, 83, 115, 147, 179, 211, 243, - 20, 52, 84, 116, 148, 180, 212, 244, 21, 53, 85, 117, 149, 181, 213, 245, - 22, 54, 86, 118, 150, 182, 214, 246, 23, 55, 87, 119, 151, 183, 215, 247, - 24, 56, 88, 120, 152, 184, 216, 248, 25, 57, 89, 121, 153, 185, 217, 249, - 26, 58, 90, 122, 154, 186, 218, 250, 27, 59, 91, 123, 155, 187, 219, 251, - 28, 60, 92, 124, 156, 188, 220, 252, 29, 61, 93, 125, 157, 189, 221, 253, - 30, 62, 94, 126, 158, 190, 222, 254, 31, 63, 95, 127, 159, 191, 223, 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_32x8[256]) = { - 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, - 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, - 240, 248, 1, 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97, - 105, 113, 121, 129, 137, 145, 153, 161, 169, 177, 185, 193, 201, 209, 217, - 225, 233, 241, 249, 2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, - 90, 98, 106, 114, 122, 130, 138, 146, 154, 162, 170, 178, 186, 194, 202, - 210, 218, 226, 234, 242, 250, 3, 11, 19, 27, 35, 43, 51, 59, 67, - 75, 83, 91, 99, 107, 115, 123, 131, 139, 147, 155, 163, 171, 179, 187, - 195, 203, 211, 219, 227, 235, 243, 251, 4, 12, 20, 28, 36, 44, 52, - 60, 68, 76, 84, 92, 100, 108, 116, 124, 132, 140, 148, 156, 164, 172, - 180, 188, 196, 204, 212, 220, 228, 236, 244, 252, 5, 13, 21, 29, 37, - 45, 53, 61, 69, 77, 85, 93, 101, 109, 117, 125, 133, 141, 149, 157, - 165, 173, 181, 189, 197, 205, 213, 221, 229, 237, 245, 253, 6, 14, 22, - 30, 38, 46, 54, 62, 70, 78, 86, 94, 102, 110, 118, 126, 134, 142, - 150, 158, 166, 174, 182, 190, 198, 206, 214, 222, 230, 238, 246, 254, 7, - 15, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 111, 119, 127, - 135, 143, 151, 159, 167, 175, 183, 191, 199, 207, 215, 223, 231, 239, 247, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_8x8[64]) = { - 0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57, - 2, 10, 18, 26, 34, 42, 50, 58, 3, 11, 19, 27, 35, 43, 51, 59, - 4, 12, 20, 28, 36, 44, 52, 60, 5, 13, 21, 29, 37, 45, 53, 61, - 6, 14, 22, 30, 38, 46, 54, 62, 7, 15, 23, 31, 39, 47, 55, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_8x8[64]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x8[64]) = { - 0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42, - 3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53, - 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60, - 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63 -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x16[128]) = { - 0, 1, 3, 6, 10, 15, 21, 28, 2, 4, 7, 11, 16, 22, 29, 36, - 5, 8, 12, 17, 23, 30, 37, 44, 9, 13, 18, 24, 31, 38, 45, 52, - 14, 19, 25, 32, 39, 46, 53, 60, 20, 26, 33, 40, 47, 54, 61, 68, - 27, 34, 41, 48, 55, 62, 69, 76, 35, 42, 49, 56, 63, 70, 77, 84, - 43, 50, 57, 64, 71, 78, 85, 92, 51, 58, 65, 72, 79, 86, 93, 100, - 59, 66, 73, 80, 87, 94, 101, 107, 67, 74, 81, 88, 95, 102, 108, 113, - 75, 82, 89, 96, 103, 109, 114, 118, 83, 90, 97, 104, 110, 115, 119, 122, - 91, 98, 105, 111, 116, 120, 123, 125, 99, 106, 112, 117, 121, 124, 126, 127, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x8[128]) = { - 0, 2, 5, 9, 14, 20, 27, 35, 43, 51, 59, 67, 75, 83, 91, 99, - 1, 4, 8, 13, 19, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, - 3, 7, 12, 18, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97, 105, 112, - 6, 11, 17, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 111, 117, - 10, 16, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 110, 116, 121, - 15, 22, 30, 38, 46, 54, 62, 70, 78, 86, 94, 102, 109, 115, 120, 124, - 21, 29, 37, 45, 53, 61, 69, 77, 85, 93, 101, 108, 114, 119, 123, 126, - 28, 36, 44, 52, 60, 68, 76, 84, 92, 100, 107, 113, 118, 122, 125, 127, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_8x16[128]) = { - 0, 16, 32, 48, 64, 80, 96, 112, 1, 17, 33, 49, 65, 81, 97, 113, - 2, 18, 34, 50, 66, 82, 98, 114, 3, 19, 35, 51, 67, 83, 99, 115, - 4, 20, 36, 52, 68, 84, 100, 116, 5, 21, 37, 53, 69, 85, 101, 117, - 6, 22, 38, 54, 70, 86, 102, 118, 7, 23, 39, 55, 71, 87, 103, 119, - 8, 24, 40, 56, 72, 88, 104, 120, 9, 25, 41, 57, 73, 89, 105, 121, - 10, 26, 42, 58, 74, 90, 106, 122, 11, 27, 43, 59, 75, 91, 107, 123, - 12, 28, 44, 60, 76, 92, 108, 124, 13, 29, 45, 61, 77, 93, 109, 125, - 14, 30, 46, 62, 78, 94, 110, 126, 15, 31, 47, 63, 79, 95, 111, 127, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_16x8[128]) = { - 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, - 1, 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97, 105, 113, 121, - 2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122, - 3, 11, 19, 27, 35, 43, 51, 59, 67, 75, 83, 91, 99, 107, 115, 123, - 4, 12, 20, 28, 36, 44, 52, 60, 68, 76, 84, 92, 100, 108, 116, 124, - 5, 13, 21, 29, 37, 45, 53, 61, 69, 77, 85, 93, 101, 109, 117, 125, - 6, 14, 22, 30, 38, 46, 54, 62, 70, 78, 86, 94, 102, 110, 118, 126, - 7, 15, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 111, 119, 127, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_8x16[128]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_16x8[128]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x32[512]) = { - 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, 105, - 120, 2, 4, 7, 11, 16, 22, 29, 37, 46, 56, 67, 79, 92, 106, - 121, 136, 5, 8, 12, 17, 23, 30, 38, 47, 57, 68, 80, 93, 107, - 122, 137, 152, 9, 13, 18, 24, 31, 39, 48, 58, 69, 81, 94, 108, - 123, 138, 153, 168, 14, 19, 25, 32, 40, 49, 59, 70, 82, 95, 109, - 124, 139, 154, 169, 184, 20, 26, 33, 41, 50, 60, 71, 83, 96, 110, - 125, 140, 155, 170, 185, 200, 27, 34, 42, 51, 61, 72, 84, 97, 111, - 126, 141, 156, 171, 186, 201, 216, 35, 43, 52, 62, 73, 85, 98, 112, - 127, 142, 157, 172, 187, 202, 217, 232, 44, 53, 63, 74, 86, 99, 113, - 128, 143, 158, 173, 188, 203, 218, 233, 248, 54, 64, 75, 87, 100, 114, - 129, 144, 159, 174, 189, 204, 219, 234, 249, 264, 65, 76, 88, 101, 115, - 130, 145, 160, 175, 190, 205, 220, 235, 250, 265, 280, 77, 89, 102, 116, - 131, 146, 161, 176, 191, 206, 221, 236, 251, 266, 281, 296, 90, 103, 117, - 132, 147, 162, 177, 192, 207, 222, 237, 252, 267, 282, 297, 312, 104, 118, - 133, 148, 163, 178, 193, 208, 223, 238, 253, 268, 283, 298, 313, 328, 119, - 134, 149, 164, 179, 194, 209, 224, 239, 254, 269, 284, 299, 314, 329, 344, - 135, 150, 165, 180, 195, 210, 225, 240, 255, 270, 285, 300, 315, 330, 345, - 360, 151, 166, 181, 196, 211, 226, 241, 256, 271, 286, 301, 316, 331, 346, - 361, 376, 167, 182, 197, 212, 227, 242, 257, 272, 287, 302, 317, 332, 347, - 362, 377, 392, 183, 198, 213, 228, 243, 258, 273, 288, 303, 318, 333, 348, - 363, 378, 393, 407, 199, 214, 229, 244, 259, 274, 289, 304, 319, 334, 349, - 364, 379, 394, 408, 421, 215, 230, 245, 260, 275, 290, 305, 320, 335, 350, - 365, 380, 395, 409, 422, 434, 231, 246, 261, 276, 291, 306, 321, 336, 351, - 366, 381, 396, 410, 423, 435, 446, 247, 262, 277, 292, 307, 322, 337, 352, - 367, 382, 397, 411, 424, 436, 447, 457, 263, 278, 293, 308, 323, 338, 353, - 368, 383, 398, 412, 425, 437, 448, 458, 467, 279, 294, 309, 324, 339, 354, - 369, 384, 399, 413, 426, 438, 449, 459, 468, 476, 295, 310, 325, 340, 355, - 370, 385, 400, 414, 427, 439, 450, 460, 469, 477, 484, 311, 326, 341, 356, - 371, 386, 401, 415, 428, 440, 451, 461, 470, 478, 485, 491, 327, 342, 357, - 372, 387, 402, 416, 429, 441, 452, 462, 471, 479, 486, 492, 497, 343, 358, - 373, 388, 403, 417, 430, 442, 453, 463, 472, 480, 487, 493, 498, 502, 359, - 374, 389, 404, 418, 431, 443, 454, 464, 473, 481, 488, 494, 499, 503, 506, - 375, 390, 405, 419, 432, 444, 455, 465, 474, 482, 489, 495, 500, 504, 507, - 509, 391, 406, 420, 433, 445, 456, 466, 475, 483, 490, 496, 501, 505, 508, - 510, 511, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x16[512]) = { - 0, 2, 5, 9, 14, 20, 27, 35, 44, 54, 65, 77, 90, 104, 119, - 135, 151, 167, 183, 199, 215, 231, 247, 263, 279, 295, 311, 327, 343, 359, - 375, 391, 1, 4, 8, 13, 19, 26, 34, 43, 53, 64, 76, 89, 103, - 118, 134, 150, 166, 182, 198, 214, 230, 246, 262, 278, 294, 310, 326, 342, - 358, 374, 390, 406, 3, 7, 12, 18, 25, 33, 42, 52, 63, 75, 88, - 102, 117, 133, 149, 165, 181, 197, 213, 229, 245, 261, 277, 293, 309, 325, - 341, 357, 373, 389, 405, 420, 6, 11, 17, 24, 32, 41, 51, 62, 74, - 87, 101, 116, 132, 148, 164, 180, 196, 212, 228, 244, 260, 276, 292, 308, - 324, 340, 356, 372, 388, 404, 419, 433, 10, 16, 23, 31, 40, 50, 61, - 73, 86, 100, 115, 131, 147, 163, 179, 195, 211, 227, 243, 259, 275, 291, - 307, 323, 339, 355, 371, 387, 403, 418, 432, 445, 15, 22, 30, 39, 49, - 60, 72, 85, 99, 114, 130, 146, 162, 178, 194, 210, 226, 242, 258, 274, - 290, 306, 322, 338, 354, 370, 386, 402, 417, 431, 444, 456, 21, 29, 38, - 48, 59, 71, 84, 98, 113, 129, 145, 161, 177, 193, 209, 225, 241, 257, - 273, 289, 305, 321, 337, 353, 369, 385, 401, 416, 430, 443, 455, 466, 28, - 37, 47, 58, 70, 83, 97, 112, 128, 144, 160, 176, 192, 208, 224, 240, - 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 415, 429, 442, 454, 465, - 475, 36, 46, 57, 69, 82, 96, 111, 127, 143, 159, 175, 191, 207, 223, - 239, 255, 271, 287, 303, 319, 335, 351, 367, 383, 399, 414, 428, 441, 453, - 464, 474, 483, 45, 56, 68, 81, 95, 110, 126, 142, 158, 174, 190, 206, - 222, 238, 254, 270, 286, 302, 318, 334, 350, 366, 382, 398, 413, 427, 440, - 452, 463, 473, 482, 490, 55, 67, 80, 94, 109, 125, 141, 157, 173, 189, - 205, 221, 237, 253, 269, 285, 301, 317, 333, 349, 365, 381, 397, 412, 426, - 439, 451, 462, 472, 481, 489, 496, 66, 79, 93, 108, 124, 140, 156, 172, - 188, 204, 220, 236, 252, 268, 284, 300, 316, 332, 348, 364, 380, 396, 411, - 425, 438, 450, 461, 471, 480, 488, 495, 501, 78, 92, 107, 123, 139, 155, - 171, 187, 203, 219, 235, 251, 267, 283, 299, 315, 331, 347, 363, 379, 395, - 410, 424, 437, 449, 460, 470, 479, 487, 494, 500, 505, 91, 106, 122, 138, - 154, 170, 186, 202, 218, 234, 250, 266, 282, 298, 314, 330, 346, 362, 378, - 394, 409, 423, 436, 448, 459, 469, 478, 486, 493, 499, 504, 508, 105, 121, - 137, 153, 169, 185, 201, 217, 233, 249, 265, 281, 297, 313, 329, 345, 361, - 377, 393, 408, 422, 435, 447, 458, 468, 477, 485, 492, 498, 503, 507, 510, - 120, 136, 152, 168, 184, 200, 216, 232, 248, 264, 280, 296, 312, 328, 344, - 360, 376, 392, 407, 421, 434, 446, 457, 467, 476, 484, 491, 497, 502, 506, - 509, 511, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_16x32[512]) = { - 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 480, - 1, 33, 65, 97, 129, 161, 193, 225, 257, 289, 321, 353, 385, 417, 449, 481, - 2, 34, 66, 98, 130, 162, 194, 226, 258, 290, 322, 354, 386, 418, 450, 482, - 3, 35, 67, 99, 131, 163, 195, 227, 259, 291, 323, 355, 387, 419, 451, 483, - 4, 36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356, 388, 420, 452, 484, - 5, 37, 69, 101, 133, 165, 197, 229, 261, 293, 325, 357, 389, 421, 453, 485, - 6, 38, 70, 102, 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, 454, 486, - 7, 39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423, 455, 487, - 8, 40, 72, 104, 136, 168, 200, 232, 264, 296, 328, 360, 392, 424, 456, 488, - 9, 41, 73, 105, 137, 169, 201, 233, 265, 297, 329, 361, 393, 425, 457, 489, - 10, 42, 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, 426, 458, 490, - 11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363, 395, 427, 459, 491, - 12, 44, 76, 108, 140, 172, 204, 236, 268, 300, 332, 364, 396, 428, 460, 492, - 13, 45, 77, 109, 141, 173, 205, 237, 269, 301, 333, 365, 397, 429, 461, 493, - 14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, 430, 462, 494, - 15, 47, 79, 111, 143, 175, 207, 239, 271, 303, 335, 367, 399, 431, 463, 495, - 16, 48, 80, 112, 144, 176, 208, 240, 272, 304, 336, 368, 400, 432, 464, 496, - 17, 49, 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, 433, 465, 497, - 18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370, 402, 434, 466, 498, - 19, 51, 83, 115, 147, 179, 211, 243, 275, 307, 339, 371, 403, 435, 467, 499, - 20, 52, 84, 116, 148, 180, 212, 244, 276, 308, 340, 372, 404, 436, 468, 500, - 21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, 437, 469, 501, - 22, 54, 86, 118, 150, 182, 214, 246, 278, 310, 342, 374, 406, 438, 470, 502, - 23, 55, 87, 119, 151, 183, 215, 247, 279, 311, 343, 375, 407, 439, 471, 503, - 24, 56, 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, 440, 472, 504, - 25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377, 409, 441, 473, 505, - 26, 58, 90, 122, 154, 186, 218, 250, 282, 314, 346, 378, 410, 442, 474, 506, - 27, 59, 91, 123, 155, 187, 219, 251, 283, 315, 347, 379, 411, 443, 475, 507, - 28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, 444, 476, 508, - 29, 61, 93, 125, 157, 189, 221, 253, 285, 317, 349, 381, 413, 445, 477, 509, - 30, 62, 94, 126, 158, 190, 222, 254, 286, 318, 350, 382, 414, 446, 478, 510, - 31, 63, 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, 447, 479, 511, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_32x16[512]) = { - 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, - 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 416, 432, 448, 464, - 480, 496, 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193, - 209, 225, 241, 257, 273, 289, 305, 321, 337, 353, 369, 385, 401, 417, 433, - 449, 465, 481, 497, 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162, - 178, 194, 210, 226, 242, 258, 274, 290, 306, 322, 338, 354, 370, 386, 402, - 418, 434, 450, 466, 482, 498, 3, 19, 35, 51, 67, 83, 99, 115, 131, - 147, 163, 179, 195, 211, 227, 243, 259, 275, 291, 307, 323, 339, 355, 371, - 387, 403, 419, 435, 451, 467, 483, 499, 4, 20, 36, 52, 68, 84, 100, - 116, 132, 148, 164, 180, 196, 212, 228, 244, 260, 276, 292, 308, 324, 340, - 356, 372, 388, 404, 420, 436, 452, 468, 484, 500, 5, 21, 37, 53, 69, - 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245, 261, 277, 293, 309, - 325, 341, 357, 373, 389, 405, 421, 437, 453, 469, 485, 501, 6, 22, 38, - 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246, 262, 278, - 294, 310, 326, 342, 358, 374, 390, 406, 422, 438, 454, 470, 486, 502, 7, - 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247, - 263, 279, 295, 311, 327, 343, 359, 375, 391, 407, 423, 439, 455, 471, 487, - 503, 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, - 232, 248, 264, 280, 296, 312, 328, 344, 360, 376, 392, 408, 424, 440, 456, - 472, 488, 504, 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185, - 201, 217, 233, 249, 265, 281, 297, 313, 329, 345, 361, 377, 393, 409, 425, - 441, 457, 473, 489, 505, 10, 26, 42, 58, 74, 90, 106, 122, 138, 154, - 170, 186, 202, 218, 234, 250, 266, 282, 298, 314, 330, 346, 362, 378, 394, - 410, 426, 442, 458, 474, 490, 506, 11, 27, 43, 59, 75, 91, 107, 123, - 139, 155, 171, 187, 203, 219, 235, 251, 267, 283, 299, 315, 331, 347, 363, - 379, 395, 411, 427, 443, 459, 475, 491, 507, 12, 28, 44, 60, 76, 92, - 108, 124, 140, 156, 172, 188, 204, 220, 236, 252, 268, 284, 300, 316, 332, - 348, 364, 380, 396, 412, 428, 444, 460, 476, 492, 508, 13, 29, 45, 61, - 77, 93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253, 269, 285, 301, - 317, 333, 349, 365, 381, 397, 413, 429, 445, 461, 477, 493, 509, 14, 30, - 46, 62, 78, 94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254, 270, - 286, 302, 318, 334, 350, 366, 382, 398, 414, 430, 446, 462, 478, 494, 510, - 15, 31, 47, 63, 79, 95, 111, 127, 143, 159, 175, 191, 207, 223, 239, - 255, 271, 287, 303, 319, 335, 351, 367, 383, 399, 415, 431, 447, 463, 479, - 495, 511, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_16x32[512]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, - 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, - 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, - 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, - 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, - 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, - 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, - 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, - 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, - 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, - 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, - 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, - 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, - 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, - 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, - 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, - 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, - 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, - 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, - 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, - 510, 511, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_32x16[512]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, - 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, - 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, - 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, - 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, - 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, - 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, - 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, - 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, - 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, - 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, - 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, - 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, - 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, - 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, - 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, - 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, - 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, - 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, - 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, - 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, - 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, - 510, 511, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_16x16[256]) = { - 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, - 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193, 209, 225, 241, - 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162, 178, 194, 210, 226, 242, - 3, 19, 35, 51, 67, 83, 99, 115, 131, 147, 163, 179, 195, 211, 227, 243, - 4, 20, 36, 52, 68, 84, 100, 116, 132, 148, 164, 180, 196, 212, 228, 244, - 5, 21, 37, 53, 69, 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245, - 6, 22, 38, 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246, - 7, 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247, - 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, 232, 248, - 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185, 201, 217, 233, 249, - 10, 26, 42, 58, 74, 90, 106, 122, 138, 154, 170, 186, 202, 218, 234, 250, - 11, 27, 43, 59, 75, 91, 107, 123, 139, 155, 171, 187, 203, 219, 235, 251, - 12, 28, 44, 60, 76, 92, 108, 124, 140, 156, 172, 188, 204, 220, 236, 252, - 13, 29, 45, 61, 77, 93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253, - 14, 30, 46, 62, 78, 94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254, - 15, 31, 47, 63, 79, 95, 111, 127, 143, 159, 175, 191, 207, 223, 239, 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_16x16[256]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, - 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, - 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, - 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, - 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x16[256]) = { - 0, 1, 5, 6, 14, 15, 27, 28, 44, 45, 65, 66, 90, 91, 119, - 120, 2, 4, 7, 13, 16, 26, 29, 43, 46, 64, 67, 89, 92, 118, - 121, 150, 3, 8, 12, 17, 25, 30, 42, 47, 63, 68, 88, 93, 117, - 122, 149, 151, 9, 11, 18, 24, 31, 41, 48, 62, 69, 87, 94, 116, - 123, 148, 152, 177, 10, 19, 23, 32, 40, 49, 61, 70, 86, 95, 115, - 124, 147, 153, 176, 178, 20, 22, 33, 39, 50, 60, 71, 85, 96, 114, - 125, 146, 154, 175, 179, 200, 21, 34, 38, 51, 59, 72, 84, 97, 113, - 126, 145, 155, 174, 180, 199, 201, 35, 37, 52, 58, 73, 83, 98, 112, - 127, 144, 156, 173, 181, 198, 202, 219, 36, 53, 57, 74, 82, 99, 111, - 128, 143, 157, 172, 182, 197, 203, 218, 220, 54, 56, 75, 81, 100, 110, - 129, 142, 158, 171, 183, 196, 204, 217, 221, 234, 55, 76, 80, 101, 109, - 130, 141, 159, 170, 184, 195, 205, 216, 222, 233, 235, 77, 79, 102, 108, - 131, 140, 160, 169, 185, 194, 206, 215, 223, 232, 236, 245, 78, 103, 107, - 132, 139, 161, 168, 186, 193, 207, 214, 224, 231, 237, 244, 246, 104, 106, - 133, 138, 162, 167, 187, 192, 208, 213, 225, 230, 238, 243, 247, 252, 105, - 134, 137, 163, 166, 188, 191, 209, 212, 226, 229, 239, 242, 248, 251, 253, - 135, 136, 164, 165, 189, 190, 210, 211, 227, 228, 240, 241, 249, 250, 254, - 255 -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_32x32[1024]) = { - 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, - 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, - 896, 928, 960, 992, 1, 33, 65, 97, 129, 161, 193, 225, 257, 289, - 321, 353, 385, 417, 449, 481, 513, 545, 577, 609, 641, 673, 705, 737, - 769, 801, 833, 865, 897, 929, 961, 993, 2, 34, 66, 98, 130, 162, - 194, 226, 258, 290, 322, 354, 386, 418, 450, 482, 514, 546, 578, 610, - 642, 674, 706, 738, 770, 802, 834, 866, 898, 930, 962, 994, 3, 35, - 67, 99, 131, 163, 195, 227, 259, 291, 323, 355, 387, 419, 451, 483, - 515, 547, 579, 611, 643, 675, 707, 739, 771, 803, 835, 867, 899, 931, - 963, 995, 4, 36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356, - 388, 420, 452, 484, 516, 548, 580, 612, 644, 676, 708, 740, 772, 804, - 836, 868, 900, 932, 964, 996, 5, 37, 69, 101, 133, 165, 197, 229, - 261, 293, 325, 357, 389, 421, 453, 485, 517, 549, 581, 613, 645, 677, - 709, 741, 773, 805, 837, 869, 901, 933, 965, 997, 6, 38, 70, 102, - 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, 454, 486, 518, 550, - 582, 614, 646, 678, 710, 742, 774, 806, 838, 870, 902, 934, 966, 998, - 7, 39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423, - 455, 487, 519, 551, 583, 615, 647, 679, 711, 743, 775, 807, 839, 871, - 903, 935, 967, 999, 8, 40, 72, 104, 136, 168, 200, 232, 264, 296, - 328, 360, 392, 424, 456, 488, 520, 552, 584, 616, 648, 680, 712, 744, - 776, 808, 840, 872, 904, 936, 968, 1000, 9, 41, 73, 105, 137, 169, - 201, 233, 265, 297, 329, 361, 393, 425, 457, 489, 521, 553, 585, 617, - 649, 681, 713, 745, 777, 809, 841, 873, 905, 937, 969, 1001, 10, 42, - 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, 426, 458, 490, - 522, 554, 586, 618, 650, 682, 714, 746, 778, 810, 842, 874, 906, 938, - 970, 1002, 11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363, - 395, 427, 459, 491, 523, 555, 587, 619, 651, 683, 715, 747, 779, 811, - 843, 875, 907, 939, 971, 1003, 12, 44, 76, 108, 140, 172, 204, 236, - 268, 300, 332, 364, 396, 428, 460, 492, 524, 556, 588, 620, 652, 684, - 716, 748, 780, 812, 844, 876, 908, 940, 972, 1004, 13, 45, 77, 109, - 141, 173, 205, 237, 269, 301, 333, 365, 397, 429, 461, 493, 525, 557, - 589, 621, 653, 685, 717, 749, 781, 813, 845, 877, 909, 941, 973, 1005, - 14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, 430, - 462, 494, 526, 558, 590, 622, 654, 686, 718, 750, 782, 814, 846, 878, - 910, 942, 974, 1006, 15, 47, 79, 111, 143, 175, 207, 239, 271, 303, - 335, 367, 399, 431, 463, 495, 527, 559, 591, 623, 655, 687, 719, 751, - 783, 815, 847, 879, 911, 943, 975, 1007, 16, 48, 80, 112, 144, 176, - 208, 240, 272, 304, 336, 368, 400, 432, 464, 496, 528, 560, 592, 624, - 656, 688, 720, 752, 784, 816, 848, 880, 912, 944, 976, 1008, 17, 49, - 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, 433, 465, 497, - 529, 561, 593, 625, 657, 689, 721, 753, 785, 817, 849, 881, 913, 945, - 977, 1009, 18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370, - 402, 434, 466, 498, 530, 562, 594, 626, 658, 690, 722, 754, 786, 818, - 850, 882, 914, 946, 978, 1010, 19, 51, 83, 115, 147, 179, 211, 243, - 275, 307, 339, 371, 403, 435, 467, 499, 531, 563, 595, 627, 659, 691, - 723, 755, 787, 819, 851, 883, 915, 947, 979, 1011, 20, 52, 84, 116, - 148, 180, 212, 244, 276, 308, 340, 372, 404, 436, 468, 500, 532, 564, - 596, 628, 660, 692, 724, 756, 788, 820, 852, 884, 916, 948, 980, 1012, - 21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, 437, - 469, 501, 533, 565, 597, 629, 661, 693, 725, 757, 789, 821, 853, 885, - 917, 949, 981, 1013, 22, 54, 86, 118, 150, 182, 214, 246, 278, 310, - 342, 374, 406, 438, 470, 502, 534, 566, 598, 630, 662, 694, 726, 758, - 790, 822, 854, 886, 918, 950, 982, 1014, 23, 55, 87, 119, 151, 183, - 215, 247, 279, 311, 343, 375, 407, 439, 471, 503, 535, 567, 599, 631, - 663, 695, 727, 759, 791, 823, 855, 887, 919, 951, 983, 1015, 24, 56, - 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, 440, 472, 504, - 536, 568, 600, 632, 664, 696, 728, 760, 792, 824, 856, 888, 920, 952, - 984, 1016, 25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377, - 409, 441, 473, 505, 537, 569, 601, 633, 665, 697, 729, 761, 793, 825, - 857, 889, 921, 953, 985, 1017, 26, 58, 90, 122, 154, 186, 218, 250, - 282, 314, 346, 378, 410, 442, 474, 506, 538, 570, 602, 634, 666, 698, - 730, 762, 794, 826, 858, 890, 922, 954, 986, 1018, 27, 59, 91, 123, - 155, 187, 219, 251, 283, 315, 347, 379, 411, 443, 475, 507, 539, 571, - 603, 635, 667, 699, 731, 763, 795, 827, 859, 891, 923, 955, 987, 1019, - 28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, 444, - 476, 508, 540, 572, 604, 636, 668, 700, 732, 764, 796, 828, 860, 892, - 924, 956, 988, 1020, 29, 61, 93, 125, 157, 189, 221, 253, 285, 317, - 349, 381, 413, 445, 477, 509, 541, 573, 605, 637, 669, 701, 733, 765, - 797, 829, 861, 893, 925, 957, 989, 1021, 30, 62, 94, 126, 158, 190, - 222, 254, 286, 318, 350, 382, 414, 446, 478, 510, 542, 574, 606, 638, - 670, 702, 734, 766, 798, 830, 862, 894, 926, 958, 990, 1022, 31, 63, - 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, 447, 479, 511, - 543, 575, 607, 639, 671, 703, 735, 767, 799, 831, 863, 895, 927, 959, - 991, 1023, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_32x32[1024]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, - 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, - 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, - 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, - 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, - 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, - 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, - 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, - 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, - 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, - 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, - 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, - 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, - 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, - 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, - 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, - 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, - 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, - 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, - 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, - 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, - 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, - 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, - 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, - 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, - 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, - 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, - 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, - 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, - 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, - 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, - 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, - 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, - 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, - 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, - 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, - 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, - 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, - 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, - 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, - 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, - 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, - 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, - 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, - 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, - 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, - 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, - 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, - 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, - 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, - 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, - 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, - 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, - 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, - 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, - 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, - 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, - 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, - 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, - 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, - 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, - 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, - 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, - 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, - 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, - 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, - 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, - 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, - 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, - 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, - 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, - 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, - 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, - 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x32[1024]) = { - 0, 1, 5, 6, 14, 15, 27, 28, 44, 45, 65, 66, 90, - 91, 119, 120, 152, 153, 189, 190, 230, 231, 275, 276, 324, 325, - 377, 378, 434, 435, 495, 496, 2, 4, 7, 13, 16, 26, 29, - 43, 46, 64, 67, 89, 92, 118, 121, 151, 154, 188, 191, 229, - 232, 274, 277, 323, 326, 376, 379, 433, 436, 494, 497, 558, 3, - 8, 12, 17, 25, 30, 42, 47, 63, 68, 88, 93, 117, 122, - 150, 155, 187, 192, 228, 233, 273, 278, 322, 327, 375, 380, 432, - 437, 493, 498, 557, 559, 9, 11, 18, 24, 31, 41, 48, 62, - 69, 87, 94, 116, 123, 149, 156, 186, 193, 227, 234, 272, 279, - 321, 328, 374, 381, 431, 438, 492, 499, 556, 560, 617, 10, 19, - 23, 32, 40, 49, 61, 70, 86, 95, 115, 124, 148, 157, 185, - 194, 226, 235, 271, 280, 320, 329, 373, 382, 430, 439, 491, 500, - 555, 561, 616, 618, 20, 22, 33, 39, 50, 60, 71, 85, 96, - 114, 125, 147, 158, 184, 195, 225, 236, 270, 281, 319, 330, 372, - 383, 429, 440, 490, 501, 554, 562, 615, 619, 672, 21, 34, 38, - 51, 59, 72, 84, 97, 113, 126, 146, 159, 183, 196, 224, 237, - 269, 282, 318, 331, 371, 384, 428, 441, 489, 502, 553, 563, 614, - 620, 671, 673, 35, 37, 52, 58, 73, 83, 98, 112, 127, 145, - 160, 182, 197, 223, 238, 268, 283, 317, 332, 370, 385, 427, 442, - 488, 503, 552, 564, 613, 621, 670, 674, 723, 36, 53, 57, 74, - 82, 99, 111, 128, 144, 161, 181, 198, 222, 239, 267, 284, 316, - 333, 369, 386, 426, 443, 487, 504, 551, 565, 612, 622, 669, 675, - 722, 724, 54, 56, 75, 81, 100, 110, 129, 143, 162, 180, 199, - 221, 240, 266, 285, 315, 334, 368, 387, 425, 444, 486, 505, 550, - 566, 611, 623, 668, 676, 721, 725, 770, 55, 76, 80, 101, 109, - 130, 142, 163, 179, 200, 220, 241, 265, 286, 314, 335, 367, 388, - 424, 445, 485, 506, 549, 567, 610, 624, 667, 677, 720, 726, 769, - 771, 77, 79, 102, 108, 131, 141, 164, 178, 201, 219, 242, 264, - 287, 313, 336, 366, 389, 423, 446, 484, 507, 548, 568, 609, 625, - 666, 678, 719, 727, 768, 772, 813, 78, 103, 107, 132, 140, 165, - 177, 202, 218, 243, 263, 288, 312, 337, 365, 390, 422, 447, 483, - 508, 547, 569, 608, 626, 665, 679, 718, 728, 767, 773, 812, 814, - 104, 106, 133, 139, 166, 176, 203, 217, 244, 262, 289, 311, 338, - 364, 391, 421, 448, 482, 509, 546, 570, 607, 627, 664, 680, 717, - 729, 766, 774, 811, 815, 852, 105, 134, 138, 167, 175, 204, 216, - 245, 261, 290, 310, 339, 363, 392, 420, 449, 481, 510, 545, 571, - 606, 628, 663, 681, 716, 730, 765, 775, 810, 816, 851, 853, 135, - 137, 168, 174, 205, 215, 246, 260, 291, 309, 340, 362, 393, 419, - 450, 480, 511, 544, 572, 605, 629, 662, 682, 715, 731, 764, 776, - 809, 817, 850, 854, 887, 136, 169, 173, 206, 214, 247, 259, 292, - 308, 341, 361, 394, 418, 451, 479, 512, 543, 573, 604, 630, 661, - 683, 714, 732, 763, 777, 808, 818, 849, 855, 886, 888, 170, 172, - 207, 213, 248, 258, 293, 307, 342, 360, 395, 417, 452, 478, 513, - 542, 574, 603, 631, 660, 684, 713, 733, 762, 778, 807, 819, 848, - 856, 885, 889, 918, 171, 208, 212, 249, 257, 294, 306, 343, 359, - 396, 416, 453, 477, 514, 541, 575, 602, 632, 659, 685, 712, 734, - 761, 779, 806, 820, 847, 857, 884, 890, 917, 919, 209, 211, 250, - 256, 295, 305, 344, 358, 397, 415, 454, 476, 515, 540, 576, 601, - 633, 658, 686, 711, 735, 760, 780, 805, 821, 846, 858, 883, 891, - 916, 920, 945, 210, 251, 255, 296, 304, 345, 357, 398, 414, 455, - 475, 516, 539, 577, 600, 634, 657, 687, 710, 736, 759, 781, 804, - 822, 845, 859, 882, 892, 915, 921, 944, 946, 252, 254, 297, 303, - 346, 356, 399, 413, 456, 474, 517, 538, 578, 599, 635, 656, 688, - 709, 737, 758, 782, 803, 823, 844, 860, 881, 893, 914, 922, 943, - 947, 968, 253, 298, 302, 347, 355, 400, 412, 457, 473, 518, 537, - 579, 598, 636, 655, 689, 708, 738, 757, 783, 802, 824, 843, 861, - 880, 894, 913, 923, 942, 948, 967, 969, 299, 301, 348, 354, 401, - 411, 458, 472, 519, 536, 580, 597, 637, 654, 690, 707, 739, 756, - 784, 801, 825, 842, 862, 879, 895, 912, 924, 941, 949, 966, 970, - 987, 300, 349, 353, 402, 410, 459, 471, 520, 535, 581, 596, 638, - 653, 691, 706, 740, 755, 785, 800, 826, 841, 863, 878, 896, 911, - 925, 940, 950, 965, 971, 986, 988, 350, 352, 403, 409, 460, 470, - 521, 534, 582, 595, 639, 652, 692, 705, 741, 754, 786, 799, 827, - 840, 864, 877, 897, 910, 926, 939, 951, 964, 972, 985, 989, 1002, - 351, 404, 408, 461, 469, 522, 533, 583, 594, 640, 651, 693, 704, - 742, 753, 787, 798, 828, 839, 865, 876, 898, 909, 927, 938, 952, - 963, 973, 984, 990, 1001, 1003, 405, 407, 462, 468, 523, 532, 584, - 593, 641, 650, 694, 703, 743, 752, 788, 797, 829, 838, 866, 875, - 899, 908, 928, 937, 953, 962, 974, 983, 991, 1000, 1004, 1013, 406, - 463, 467, 524, 531, 585, 592, 642, 649, 695, 702, 744, 751, 789, - 796, 830, 837, 867, 874, 900, 907, 929, 936, 954, 961, 975, 982, - 992, 999, 1005, 1012, 1014, 464, 466, 525, 530, 586, 591, 643, 648, - 696, 701, 745, 750, 790, 795, 831, 836, 868, 873, 901, 906, 930, - 935, 955, 960, 976, 981, 993, 998, 1006, 1011, 1015, 1020, 465, 526, - 529, 587, 590, 644, 647, 697, 700, 746, 749, 791, 794, 832, 835, - 869, 872, 902, 905, 931, 934, 956, 959, 977, 980, 994, 997, 1007, - 1010, 1016, 1019, 1021, 527, 528, 588, 589, 645, 646, 698, 699, 747, - 748, 792, 793, 833, 834, 870, 871, 903, 904, 932, 933, 957, 958, - 978, 979, 995, 996, 1008, 1009, 1017, 1018, 1022, 1023 -}; - -const SCAN_ORDER av1_default_scan_orders[TX_SIZES] = { - { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors }, - { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors }, - { default_scan_16x16, av1_default_iscan_16x16, default_scan_16x16_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, default_scan_32x32_neighbors }, - // Half of the coefficients of tx64 at higher frequencies are set to - // zeros. So tx32's scan order is used. - { default_scan_32x32, av1_default_iscan_32x32, default_scan_32x32_neighbors }, -}; - -const SCAN_ORDER av1_scan_orders[TX_SIZES_ALL][TX_TYPES] = { - { - // TX_4X4 - { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors }, - { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors }, - { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors }, - { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors }, - { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors }, - { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors }, - { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors }, - { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors }, - { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors }, - { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors }, - { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors }, - { mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors }, - { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors }, - { mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors }, - { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors }, - { mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors }, - }, - { - // TX_8X8 - { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors }, - { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors }, - { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors }, - { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors }, - { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors }, - { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors }, - { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors }, - { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors }, - { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors }, - { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors }, - { mrow_scan_8x8, av1_mrow_iscan_8x8, mrow_scan_8x8_neighbors }, - { mcol_scan_8x8, av1_mcol_iscan_8x8, mcol_scan_8x8_neighbors }, - { mrow_scan_8x8, av1_mrow_iscan_8x8, mrow_scan_8x8_neighbors }, - { mcol_scan_8x8, av1_mcol_iscan_8x8, mcol_scan_8x8_neighbors }, - { mrow_scan_8x8, av1_mrow_iscan_8x8, mrow_scan_8x8_neighbors }, - { mcol_scan_8x8, av1_mcol_iscan_8x8, mcol_scan_8x8_neighbors }, - }, - { - // TX_16X16 - { default_scan_16x16, av1_default_iscan_16x16, - default_scan_16x16_neighbors }, - { default_scan_16x16, av1_default_iscan_16x16, - default_scan_16x16_neighbors }, - { default_scan_16x16, av1_default_iscan_16x16, - default_scan_16x16_neighbors }, - { default_scan_16x16, av1_default_iscan_16x16, - default_scan_16x16_neighbors }, - { default_scan_16x16, av1_default_iscan_16x16, - default_scan_16x16_neighbors }, - { default_scan_16x16, av1_default_iscan_16x16, - default_scan_16x16_neighbors }, - { default_scan_16x16, av1_default_iscan_16x16, - default_scan_16x16_neighbors }, - { default_scan_16x16, av1_default_iscan_16x16, - default_scan_16x16_neighbors }, - { default_scan_16x16, av1_default_iscan_16x16, - default_scan_16x16_neighbors }, - { default_scan_16x16, av1_default_iscan_16x16, - default_scan_16x16_neighbors }, - { mrow_scan_16x16, av1_mrow_iscan_16x16, mrow_scan_16x16_neighbors }, - { mcol_scan_16x16, av1_mcol_iscan_16x16, mcol_scan_16x16_neighbors }, - { mrow_scan_16x16, av1_mrow_iscan_16x16, mrow_scan_16x16_neighbors }, - { mcol_scan_16x16, av1_mcol_iscan_16x16, mcol_scan_16x16_neighbors }, - { mrow_scan_16x16, av1_mrow_iscan_16x16, mrow_scan_16x16_neighbors }, - { mcol_scan_16x16, av1_mcol_iscan_16x16, mcol_scan_16x16_neighbors }, - }, - { - // TX_32X32 - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, - { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, - { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, - { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, - { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, - { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, - }, - { - // TX_64X64 - // Half of the coefficients of tx64 at higher frequencies are set to - // zeros. So tx32's scan order is used. - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, - { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, - { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, - { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, - { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, - { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, - }, - { - // TX_4X8 - { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors }, - { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors }, - { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors }, - { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors }, - { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors }, - { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors }, - { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors }, - { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors }, - { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors }, - { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors }, - { mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors }, - { mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors }, - { mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors }, - { mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors }, - { mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors }, - { mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors }, - }, - { - // TX_8X4 - { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors }, - { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors }, - { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors }, - { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors }, - { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors }, - { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors }, - { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors }, - { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors }, - { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors }, - { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors }, - { mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors }, - { mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors }, - { mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors }, - { mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors }, - { mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors }, - { mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors }, - }, - { - // TX_8X16 - { default_scan_8x16, av1_default_iscan_8x16, - default_scan_8x16_neighbors }, - { default_scan_8x16, av1_default_iscan_8x16, - default_scan_8x16_neighbors }, - { default_scan_8x16, av1_default_iscan_8x16, - default_scan_8x16_neighbors }, - { default_scan_8x16, av1_default_iscan_8x16, - default_scan_8x16_neighbors }, - { default_scan_8x16, av1_default_iscan_8x16, - default_scan_8x16_neighbors }, - { default_scan_8x16, av1_default_iscan_8x16, - default_scan_8x16_neighbors }, - { default_scan_8x16, av1_default_iscan_8x16, - default_scan_8x16_neighbors }, - { default_scan_8x16, av1_default_iscan_8x16, - default_scan_8x16_neighbors }, - { default_scan_8x16, av1_default_iscan_8x16, - default_scan_8x16_neighbors }, - { default_scan_8x16, av1_default_iscan_8x16, - default_scan_8x16_neighbors }, - { mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors }, - { mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors }, - { mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors }, - { mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors }, - { mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors }, - { mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors }, - }, - { - // TX_16X8 - { default_scan_16x8, av1_default_iscan_16x8, - default_scan_16x8_neighbors }, - { default_scan_16x8, av1_default_iscan_16x8, - default_scan_16x8_neighbors }, - { default_scan_16x8, av1_default_iscan_16x8, - default_scan_16x8_neighbors }, - { default_scan_16x8, av1_default_iscan_16x8, - default_scan_16x8_neighbors }, - { default_scan_16x8, av1_default_iscan_16x8, - default_scan_16x8_neighbors }, - { default_scan_16x8, av1_default_iscan_16x8, - default_scan_16x8_neighbors }, - { default_scan_16x8, av1_default_iscan_16x8, - default_scan_16x8_neighbors }, - { default_scan_16x8, av1_default_iscan_16x8, - default_scan_16x8_neighbors }, - { default_scan_16x8, av1_default_iscan_16x8, - default_scan_16x8_neighbors }, - { default_scan_16x8, av1_default_iscan_16x8, - default_scan_16x8_neighbors }, - { mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors }, - { mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors }, - { mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors }, - { mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors }, - { mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors }, - { mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors }, - }, - { - // TX_16X32 - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors }, - { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors }, - { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors }, - { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors }, - { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors }, - { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors }, - }, - { - // TX_32X16 - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors }, - { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors }, - { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors }, - { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors }, - { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors }, - { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors }, - }, - { - // TX_32X64 - // Half of the coefficients of tx64 at higher frequencies are set to - // zeros. So tx32's scan order is used. - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, - { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, - { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, - { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, - { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, - { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, - }, - { - // TX_64X32 - // Half of the coefficients of tx64 at higher frequencies are set to - // zeros. So tx32's scan order is used. - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { default_scan_32x32, av1_default_iscan_32x32, - default_scan_32x32_neighbors }, - { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, - { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, - { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, - { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, - { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, - { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, - }, - { - // TX_4X16 - { default_scan_4x16, av1_default_iscan_4x16, - default_scan_4x16_neighbors }, - { default_scan_4x16, av1_default_iscan_4x16, - default_scan_4x16_neighbors }, - { default_scan_4x16, av1_default_iscan_4x16, - default_scan_4x16_neighbors }, - { default_scan_4x16, av1_default_iscan_4x16, - default_scan_4x16_neighbors }, - { default_scan_4x16, av1_default_iscan_4x16, - default_scan_4x16_neighbors }, - { default_scan_4x16, av1_default_iscan_4x16, - default_scan_4x16_neighbors }, - { default_scan_4x16, av1_default_iscan_4x16, - default_scan_4x16_neighbors }, - { default_scan_4x16, av1_default_iscan_4x16, - default_scan_4x16_neighbors }, - { default_scan_4x16, av1_default_iscan_4x16, - default_scan_4x16_neighbors }, - { default_scan_4x16, av1_default_iscan_4x16, - default_scan_4x16_neighbors }, - { mrow_scan_4x16, av1_mrow_iscan_4x16, mrow_scan_4x16_neighbors }, - { mcol_scan_4x16, av1_mcol_iscan_4x16, mcol_scan_4x16_neighbors }, - { mrow_scan_4x16, av1_mrow_iscan_4x16, mrow_scan_4x16_neighbors }, - { mcol_scan_4x16, av1_mcol_iscan_4x16, mcol_scan_4x16_neighbors }, - { mrow_scan_4x16, av1_mrow_iscan_4x16, mrow_scan_4x16_neighbors }, - { mcol_scan_4x16, av1_mcol_iscan_4x16, mcol_scan_4x16_neighbors }, - }, - { - // TX_16X4 - { default_scan_16x4, av1_default_iscan_16x4, - default_scan_16x4_neighbors }, - { default_scan_16x4, av1_default_iscan_16x4, - default_scan_16x4_neighbors }, - { default_scan_16x4, av1_default_iscan_16x4, - default_scan_16x4_neighbors }, - { default_scan_16x4, av1_default_iscan_16x4, - default_scan_16x4_neighbors }, - { default_scan_16x4, av1_default_iscan_16x4, - default_scan_16x4_neighbors }, - { default_scan_16x4, av1_default_iscan_16x4, - default_scan_16x4_neighbors }, - { default_scan_16x4, av1_default_iscan_16x4, - default_scan_16x4_neighbors }, - { default_scan_16x4, av1_default_iscan_16x4, - default_scan_16x4_neighbors }, - { default_scan_16x4, av1_default_iscan_16x4, - default_scan_16x4_neighbors }, - { default_scan_16x4, av1_default_iscan_16x4, - default_scan_16x4_neighbors }, - { mrow_scan_16x4, av1_mrow_iscan_16x4, mrow_scan_16x4_neighbors }, - { mcol_scan_16x4, av1_mcol_iscan_16x4, mcol_scan_16x4_neighbors }, - { mrow_scan_16x4, av1_mrow_iscan_16x4, mrow_scan_16x4_neighbors }, - { mcol_scan_16x4, av1_mcol_iscan_16x4, mcol_scan_16x4_neighbors }, - { mrow_scan_16x4, av1_mrow_iscan_16x4, mrow_scan_16x4_neighbors }, - { mcol_scan_16x4, av1_mcol_iscan_16x4, mcol_scan_16x4_neighbors }, - }, - { - // TX_8X32 - { default_scan_8x32, av1_default_iscan_8x32, - default_scan_8x32_neighbors }, - { default_scan_8x32, av1_default_iscan_8x32, - default_scan_8x32_neighbors }, - { default_scan_8x32, av1_default_iscan_8x32, - default_scan_8x32_neighbors }, - { default_scan_8x32, av1_default_iscan_8x32, - default_scan_8x32_neighbors }, - { default_scan_8x32, av1_default_iscan_8x32, - default_scan_8x32_neighbors }, - { default_scan_8x32, av1_default_iscan_8x32, - default_scan_8x32_neighbors }, - { default_scan_8x32, av1_default_iscan_8x32, - default_scan_8x32_neighbors }, - { default_scan_8x32, av1_default_iscan_8x32, - default_scan_8x32_neighbors }, - { default_scan_8x32, av1_default_iscan_8x32, - default_scan_8x32_neighbors }, - { default_scan_8x32, av1_default_iscan_8x32, - default_scan_8x32_neighbors }, - { mrow_scan_8x32, av1_mrow_iscan_8x32, mrow_scan_8x32_neighbors }, - { mcol_scan_8x32, av1_mcol_iscan_8x32, mcol_scan_8x32_neighbors }, - { mrow_scan_8x32, av1_mrow_iscan_8x32, mrow_scan_8x32_neighbors }, - { mcol_scan_8x32, av1_mcol_iscan_8x32, mcol_scan_8x32_neighbors }, - { mrow_scan_8x32, av1_mrow_iscan_8x32, mrow_scan_8x32_neighbors }, - { mcol_scan_8x32, av1_mcol_iscan_8x32, mcol_scan_8x32_neighbors }, - }, - { - // TX_32X8 - { default_scan_32x8, av1_default_iscan_32x8, - default_scan_32x8_neighbors }, - { default_scan_32x8, av1_default_iscan_32x8, - default_scan_32x8_neighbors }, - { default_scan_32x8, av1_default_iscan_32x8, - default_scan_32x8_neighbors }, - { default_scan_32x8, av1_default_iscan_32x8, - default_scan_32x8_neighbors }, - { default_scan_32x8, av1_default_iscan_32x8, - default_scan_32x8_neighbors }, - { default_scan_32x8, av1_default_iscan_32x8, - default_scan_32x8_neighbors }, - { default_scan_32x8, av1_default_iscan_32x8, - default_scan_32x8_neighbors }, - { default_scan_32x8, av1_default_iscan_32x8, - default_scan_32x8_neighbors }, - { default_scan_32x8, av1_default_iscan_32x8, - default_scan_32x8_neighbors }, - { default_scan_32x8, av1_default_iscan_32x8, - default_scan_32x8_neighbors }, - { mrow_scan_32x8, av1_mrow_iscan_32x8, mrow_scan_32x8_neighbors }, - { mcol_scan_32x8, av1_mcol_iscan_32x8, mcol_scan_32x8_neighbors }, - { mrow_scan_32x8, av1_mrow_iscan_32x8, mrow_scan_32x8_neighbors }, - { mcol_scan_32x8, av1_mcol_iscan_32x8, mcol_scan_32x8_neighbors }, - { mrow_scan_32x8, av1_mrow_iscan_32x8, mrow_scan_32x8_neighbors }, - { mcol_scan_32x8, av1_mcol_iscan_32x8, mcol_scan_32x8_neighbors }, - }, - { - // TX_16X64 - // Half of the coefficients of tx64 at higher frequencies are set to - // zeros. So tx32's scan order is used. - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { default_scan_16x32, av1_default_iscan_16x32, - default_scan_16x32_neighbors }, - { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors }, - { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors }, - { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors }, - { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors }, - { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors }, - { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors }, - }, - { - // TX_64X16 - // Half of the coefficients of tx64 at higher frequencies are set to - // zeros. So tx32's scan order is used. - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { default_scan_32x16, av1_default_iscan_32x16, - default_scan_32x16_neighbors }, - { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors }, - { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors }, - { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors }, - { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors }, - { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors }, - { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors }, - }, -}; diff --git a/third_party/aom/av1/common/scan.h b/third_party/aom/av1/common/scan.h deleted file mode 100644 index 233dc0efa..000000000 --- a/third_party/aom/av1/common/scan.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_SCAN_H_ -#define AOM_AV1_COMMON_SCAN_H_ - -#include "aom/aom_integer.h" -#include "aom_ports/mem.h" - -#include "av1/common/enums.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_NEIGHBORS 2 - -typedef enum SCAN_MODE { - SCAN_MODE_ZIG_ZAG, - SCAN_MODE_COL_DIAG, - SCAN_MODE_ROW_DIAG, - SCAN_MODE_COL_1D, - SCAN_MODE_ROW_1D, - SCAN_MODES -} SCAN_MODE; - -extern const SCAN_ORDER av1_default_scan_orders[TX_SIZES]; -extern const SCAN_ORDER av1_scan_orders[TX_SIZES_ALL][TX_TYPES]; - -void av1_deliver_eob_threshold(const AV1_COMMON *cm, MACROBLOCKD *xd); - -static INLINE const SCAN_ORDER *get_default_scan(TX_SIZE tx_size, - TX_TYPE tx_type) { - return &av1_scan_orders[tx_size][tx_type]; -} - -static INLINE const SCAN_ORDER *get_scan(TX_SIZE tx_size, TX_TYPE tx_type) { - return get_default_scan(tx_size, tx_type); -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_SCAN_H_ diff --git a/third_party/aom/av1/common/seg_common.c b/third_party/aom/av1/common/seg_common.c deleted file mode 100644 index cd189ad76..000000000 --- a/third_party/aom/av1/common/seg_common.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <assert.h> - -#include "av1/common/av1_loopfilter.h" -#include "av1/common/blockd.h" -#include "av1/common/seg_common.h" -#include "av1/common/quant_common.h" - -static const int seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 1, 1, 1, 0, 0 }; - -static const int seg_feature_data_max[SEG_LVL_MAX] = { - MAXQ, MAX_LOOP_FILTER, MAX_LOOP_FILTER, MAX_LOOP_FILTER, MAX_LOOP_FILTER, 7, 0 -}; - -// These functions provide access to new segment level features. -// Eventually these function may be "optimized out" but for the moment, -// the coding mechanism is still subject to change so these provide a -// convenient single point of change. - -void av1_clearall_segfeatures(struct segmentation *seg) { - av1_zero(seg->feature_data); - av1_zero(seg->feature_mask); -} - -void calculate_segdata(struct segmentation *seg) { - seg->segid_preskip = 0; - seg->last_active_segid = 0; - for (int i = 0; i < MAX_SEGMENTS; i++) { - for (int j = 0; j < SEG_LVL_MAX; j++) { - if (seg->feature_mask[i] & (1 << j)) { - seg->segid_preskip |= (j >= SEG_LVL_REF_FRAME); - seg->last_active_segid = i; - } - } - } -} - -void av1_enable_segfeature(struct segmentation *seg, int segment_id, - SEG_LVL_FEATURES feature_id) { - seg->feature_mask[segment_id] |= 1 << feature_id; -} - -int av1_seg_feature_data_max(SEG_LVL_FEATURES feature_id) { - return seg_feature_data_max[feature_id]; -} - -int av1_is_segfeature_signed(SEG_LVL_FEATURES feature_id) { - return seg_feature_data_signed[feature_id]; -} - -// The 'seg_data' given for each segment can be either deltas (from the default -// value chosen for the frame) or absolute values. -// -// Valid range for abs values is (0-127 for MB_LVL_ALT_Q), (0-63 for -// SEGMENT_ALT_LF) -// Valid range for delta values are (+/-127 for MB_LVL_ALT_Q), (+/-63 for -// SEGMENT_ALT_LF) -// -// abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use -// the absolute values given). - -void av1_set_segdata(struct segmentation *seg, int segment_id, - SEG_LVL_FEATURES feature_id, int seg_data) { - if (seg_data < 0) { - assert(seg_feature_data_signed[feature_id]); - assert(-seg_data <= seg_feature_data_max[feature_id]); - } else { - assert(seg_data <= seg_feature_data_max[feature_id]); - } - - seg->feature_data[segment_id][feature_id] = seg_data; -} - -// TBD? Functions to read and write segment data with range / validity checking diff --git a/third_party/aom/av1/common/seg_common.h b/third_party/aom/av1/common/seg_common.h deleted file mode 100644 index 8c35bba86..000000000 --- a/third_party/aom/av1/common/seg_common.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_SEG_COMMON_H_ -#define AOM_AV1_COMMON_SEG_COMMON_H_ - -#include "aom_dsp/prob.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_SEGMENTS 8 -#define SEG_TREE_PROBS (MAX_SEGMENTS - 1) - -#define SEG_TEMPORAL_PRED_CTXS 3 -#define SPATIAL_PREDICTION_PROBS 3 - -typedef enum { - SEG_LVL_ALT_Q, // Use alternate Quantizer .... - SEG_LVL_ALT_LF_Y_V, // Use alternate loop filter value on y plane vertical - SEG_LVL_ALT_LF_Y_H, // Use alternate loop filter value on y plane horizontal - SEG_LVL_ALT_LF_U, // Use alternate loop filter value on u plane - SEG_LVL_ALT_LF_V, // Use alternate loop filter value on v plane - SEG_LVL_REF_FRAME, // Optional Segment reference frame - SEG_LVL_SKIP, // Optional Segment (0,0) + skip mode - SEG_LVL_GLOBALMV, - SEG_LVL_MAX -} SEG_LVL_FEATURES; - -struct segmentation { - uint8_t enabled; - uint8_t update_map; - uint8_t update_data; - uint8_t temporal_update; - - int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX]; - unsigned int feature_mask[MAX_SEGMENTS]; - int last_active_segid; // The highest numbered segment id that has some - // enabled feature. - uint8_t segid_preskip; // Whether the segment id will be read before the - // skip syntax element. - // 1: the segment id will be read first. - // 0: the skip syntax element will be read first. -}; - -struct segmentation_probs { - aom_cdf_prob tree_cdf[CDF_SIZE(MAX_SEGMENTS)]; - aom_cdf_prob pred_cdf[SEG_TEMPORAL_PRED_CTXS][CDF_SIZE(2)]; - aom_cdf_prob spatial_pred_seg_cdf[SPATIAL_PREDICTION_PROBS] - [CDF_SIZE(MAX_SEGMENTS)]; -}; - -static INLINE int segfeature_active(const struct segmentation *seg, - int segment_id, - SEG_LVL_FEATURES feature_id) { - return seg->enabled && (seg->feature_mask[segment_id] & (1 << feature_id)); -} - -static INLINE void segfeatures_copy(struct segmentation *dst, - const struct segmentation *src) { - int i, j; - for (i = 0; i < MAX_SEGMENTS; i++) { - dst->feature_mask[i] = src->feature_mask[i]; - for (j = 0; j < SEG_LVL_MAX; j++) { - dst->feature_data[i][j] = src->feature_data[i][j]; - } - } - dst->segid_preskip = src->segid_preskip; - dst->last_active_segid = src->last_active_segid; -} - -void av1_clearall_segfeatures(struct segmentation *seg); - -void av1_enable_segfeature(struct segmentation *seg, int segment_id, - SEG_LVL_FEATURES feature_id); - -void calculate_segdata(struct segmentation *seg); - -int av1_seg_feature_data_max(SEG_LVL_FEATURES feature_id); - -int av1_is_segfeature_signed(SEG_LVL_FEATURES feature_id); - -void av1_set_segdata(struct segmentation *seg, int segment_id, - SEG_LVL_FEATURES feature_id, int seg_data); - -static INLINE int get_segdata(const struct segmentation *seg, int segment_id, - SEG_LVL_FEATURES feature_id) { - return seg->feature_data[segment_id][feature_id]; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_SEG_COMMON_H_ diff --git a/third_party/aom/av1/common/thread_common.c b/third_party/aom/av1/common/thread_common.c deleted file mode 100644 index 8df4c9a09..000000000 --- a/third_party/aom/av1/common/thread_common.c +++ /dev/null @@ -1,786 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "config/aom_config.h" -#include "config/aom_scale_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_mem/aom_mem.h" -#include "av1/common/av1_loopfilter.h" -#include "av1/common/entropymode.h" -#include "av1/common/thread_common.h" -#include "av1/common/reconinter.h" - -// Set up nsync by width. -static INLINE int get_sync_range(int width) { - // nsync numbers are picked by testing. For example, for 4k - // video, using 4 gives best performance. - if (width < 640) - return 1; - else if (width <= 1280) - return 2; - else if (width <= 4096) - return 4; - else - return 8; -} - -static INLINE int get_lr_sync_range(int width) { -#if 0 - // nsync numbers are picked by testing. For example, for 4k - // video, using 4 gives best performance. - if (width < 640) - return 1; - else if (width <= 1280) - return 2; - else if (width <= 4096) - return 4; - else - return 8; -#else - (void)width; - return 1; -#endif -} - -// Allocate memory for lf row synchronization -static void loop_filter_alloc(AV1LfSync *lf_sync, AV1_COMMON *cm, int rows, - int width, int num_workers) { - lf_sync->rows = rows; -#if CONFIG_MULTITHREAD - { - int i, j; - - for (j = 0; j < MAX_MB_PLANE; j++) { - CHECK_MEM_ERROR(cm, lf_sync->mutex_[j], - aom_malloc(sizeof(*(lf_sync->mutex_[j])) * rows)); - if (lf_sync->mutex_[j]) { - for (i = 0; i < rows; ++i) { - pthread_mutex_init(&lf_sync->mutex_[j][i], NULL); - } - } - - CHECK_MEM_ERROR(cm, lf_sync->cond_[j], - aom_malloc(sizeof(*(lf_sync->cond_[j])) * rows)); - if (lf_sync->cond_[j]) { - for (i = 0; i < rows; ++i) { - pthread_cond_init(&lf_sync->cond_[j][i], NULL); - } - } - } - - CHECK_MEM_ERROR(cm, lf_sync->job_mutex, - aom_malloc(sizeof(*(lf_sync->job_mutex)))); - if (lf_sync->job_mutex) { - pthread_mutex_init(lf_sync->job_mutex, NULL); - } - } -#endif // CONFIG_MULTITHREAD - CHECK_MEM_ERROR(cm, lf_sync->lfdata, - aom_malloc(num_workers * sizeof(*(lf_sync->lfdata)))); - lf_sync->num_workers = num_workers; - - for (int j = 0; j < MAX_MB_PLANE; j++) { - CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col[j], - aom_malloc(sizeof(*(lf_sync->cur_sb_col[j])) * rows)); - } - CHECK_MEM_ERROR( - cm, lf_sync->job_queue, - aom_malloc(sizeof(*(lf_sync->job_queue)) * rows * MAX_MB_PLANE * 2)); - // Set up nsync. - lf_sync->sync_range = get_sync_range(width); -} - -// Deallocate lf synchronization related mutex and data -void av1_loop_filter_dealloc(AV1LfSync *lf_sync) { - if (lf_sync != NULL) { - int j; -#if CONFIG_MULTITHREAD - int i; - for (j = 0; j < MAX_MB_PLANE; j++) { - if (lf_sync->mutex_[j] != NULL) { - for (i = 0; i < lf_sync->rows; ++i) { - pthread_mutex_destroy(&lf_sync->mutex_[j][i]); - } - aom_free(lf_sync->mutex_[j]); - } - if (lf_sync->cond_[j] != NULL) { - for (i = 0; i < lf_sync->rows; ++i) { - pthread_cond_destroy(&lf_sync->cond_[j][i]); - } - aom_free(lf_sync->cond_[j]); - } - } - if (lf_sync->job_mutex != NULL) { - pthread_mutex_destroy(lf_sync->job_mutex); - aom_free(lf_sync->job_mutex); - } -#endif // CONFIG_MULTITHREAD - aom_free(lf_sync->lfdata); - for (j = 0; j < MAX_MB_PLANE; j++) { - aom_free(lf_sync->cur_sb_col[j]); - } - - aom_free(lf_sync->job_queue); - // clear the structure as the source of this call may be a resize in which - // case this call will be followed by an _alloc() which may fail. - av1_zero(*lf_sync); - } -} - -static void loop_filter_data_reset(LFWorkerData *lf_data, - YV12_BUFFER_CONFIG *frame_buffer, - struct AV1Common *cm, MACROBLOCKD *xd) { - struct macroblockd_plane *pd = xd->plane; - lf_data->frame_buffer = frame_buffer; - lf_data->cm = cm; - lf_data->xd = xd; - for (int i = 0; i < MAX_MB_PLANE; i++) { - memcpy(&lf_data->planes[i].dst, &pd[i].dst, sizeof(lf_data->planes[i].dst)); - lf_data->planes[i].subsampling_x = pd[i].subsampling_x; - lf_data->planes[i].subsampling_y = pd[i].subsampling_y; - } -} - -static INLINE void sync_read(AV1LfSync *const lf_sync, int r, int c, - int plane) { -#if CONFIG_MULTITHREAD - const int nsync = lf_sync->sync_range; - - if (r && !(c & (nsync - 1))) { - pthread_mutex_t *const mutex = &lf_sync->mutex_[plane][r - 1]; - pthread_mutex_lock(mutex); - - while (c > lf_sync->cur_sb_col[plane][r - 1] - nsync) { - pthread_cond_wait(&lf_sync->cond_[plane][r - 1], mutex); - } - pthread_mutex_unlock(mutex); - } -#else - (void)lf_sync; - (void)r; - (void)c; - (void)plane; -#endif // CONFIG_MULTITHREAD -} - -static INLINE void sync_write(AV1LfSync *const lf_sync, int r, int c, - const int sb_cols, int plane) { -#if CONFIG_MULTITHREAD - const int nsync = lf_sync->sync_range; - int cur; - // Only signal when there are enough filtered SB for next row to run. - int sig = 1; - - if (c < sb_cols - 1) { - cur = c; - if (c % nsync) sig = 0; - } else { - cur = sb_cols + nsync; - } - - if (sig) { - pthread_mutex_lock(&lf_sync->mutex_[plane][r]); - - lf_sync->cur_sb_col[plane][r] = cur; - - pthread_cond_broadcast(&lf_sync->cond_[plane][r]); - pthread_mutex_unlock(&lf_sync->mutex_[plane][r]); - } -#else - (void)lf_sync; - (void)r; - (void)c; - (void)sb_cols; - (void)plane; -#endif // CONFIG_MULTITHREAD -} - -static void enqueue_lf_jobs(AV1LfSync *lf_sync, AV1_COMMON *cm, int start, - int stop, int plane_start, int plane_end) { - int mi_row, plane, dir; - AV1LfMTInfo *lf_job_queue = lf_sync->job_queue; - lf_sync->jobs_enqueued = 0; - lf_sync->jobs_dequeued = 0; - - for (dir = 0; dir < 2; dir++) { - for (plane = plane_start; plane < plane_end; plane++) { - if (plane == 0 && !(cm->lf.filter_level[0]) && !(cm->lf.filter_level[1])) - break; - else if (plane == 1 && !(cm->lf.filter_level_u)) - continue; - else if (plane == 2 && !(cm->lf.filter_level_v)) - continue; - for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) { - lf_job_queue->mi_row = mi_row; - lf_job_queue->plane = plane; - lf_job_queue->dir = dir; - lf_job_queue++; - lf_sync->jobs_enqueued++; - } - } - } -} - -AV1LfMTInfo *get_lf_job_info(AV1LfSync *lf_sync) { - AV1LfMTInfo *cur_job_info = NULL; - -#if CONFIG_MULTITHREAD - pthread_mutex_lock(lf_sync->job_mutex); - - if (lf_sync->jobs_dequeued < lf_sync->jobs_enqueued) { - cur_job_info = lf_sync->job_queue + lf_sync->jobs_dequeued; - lf_sync->jobs_dequeued++; - } - - pthread_mutex_unlock(lf_sync->job_mutex); -#else - (void)lf_sync; -#endif - - return cur_job_info; -} - -// Implement row loopfiltering for each thread. -static INLINE void thread_loop_filter_rows( - const YV12_BUFFER_CONFIG *const frame_buffer, AV1_COMMON *const cm, - struct macroblockd_plane *planes, MACROBLOCKD *xd, - AV1LfSync *const lf_sync) { - const int sb_cols = - ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2) >> MAX_MIB_SIZE_LOG2; - int mi_row, mi_col, plane, dir; - int r, c; - - while (1) { - AV1LfMTInfo *cur_job_info = get_lf_job_info(lf_sync); - - if (cur_job_info != NULL) { - mi_row = cur_job_info->mi_row; - plane = cur_job_info->plane; - dir = cur_job_info->dir; - r = mi_row >> MAX_MIB_SIZE_LOG2; - - if (dir == 0) { - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) { - c = mi_col >> MAX_MIB_SIZE_LOG2; - - av1_setup_dst_planes(planes, cm->seq_params.sb_size, frame_buffer, - mi_row, mi_col, plane, plane + 1); - - av1_filter_block_plane_vert(cm, xd, plane, &planes[plane], mi_row, - mi_col); - sync_write(lf_sync, r, c, sb_cols, plane); - } - } else if (dir == 1) { - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) { - c = mi_col >> MAX_MIB_SIZE_LOG2; - - // Wait for vertical edge filtering of the top-right block to be - // completed - sync_read(lf_sync, r, c, plane); - - // Wait for vertical edge filtering of the right block to be - // completed - sync_read(lf_sync, r + 1, c, plane); - - av1_setup_dst_planes(planes, cm->seq_params.sb_size, frame_buffer, - mi_row, mi_col, plane, plane + 1); - av1_filter_block_plane_horz(cm, xd, plane, &planes[plane], mi_row, - mi_col); - } - } - } else { - break; - } - } -} - -// Row-based multi-threaded loopfilter hook -static int loop_filter_row_worker(void *arg1, void *arg2) { - AV1LfSync *const lf_sync = (AV1LfSync *)arg1; - LFWorkerData *const lf_data = (LFWorkerData *)arg2; - thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, - lf_data->xd, lf_sync); - return 1; -} - -static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, - MACROBLOCKD *xd, int start, int stop, - int plane_start, int plane_end, - AVxWorker *workers, int nworkers, - AV1LfSync *lf_sync) { - const AVxWorkerInterface *const winterface = aom_get_worker_interface(); - // Number of superblock rows and cols - const int sb_rows = - ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2) >> MAX_MIB_SIZE_LOG2; - const int num_workers = nworkers; - int i; - - if (!lf_sync->sync_range || sb_rows != lf_sync->rows || - num_workers > lf_sync->num_workers) { - av1_loop_filter_dealloc(lf_sync); - loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers); - } - - // Initialize cur_sb_col to -1 for all SB rows. - for (i = 0; i < MAX_MB_PLANE; i++) { - memset(lf_sync->cur_sb_col[i], -1, - sizeof(*(lf_sync->cur_sb_col[i])) * sb_rows); - } - - enqueue_lf_jobs(lf_sync, cm, start, stop, plane_start, plane_end); - - // Set up loopfilter thread data. - for (i = 0; i < num_workers; ++i) { - AVxWorker *const worker = &workers[i]; - LFWorkerData *const lf_data = &lf_sync->lfdata[i]; - - worker->hook = loop_filter_row_worker; - worker->data1 = lf_sync; - worker->data2 = lf_data; - - // Loopfilter data - loop_filter_data_reset(lf_data, frame, cm, xd); - - // Start loopfiltering - if (i == num_workers - 1) { - winterface->execute(worker); - } else { - winterface->launch(worker); - } - } - - // Wait till all rows are finished - for (i = 0; i < num_workers; ++i) { - winterface->sync(&workers[i]); - } -} - -void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, - MACROBLOCKD *xd, int plane_start, int plane_end, - int partial_frame, AVxWorker *workers, - int num_workers, AV1LfSync *lf_sync) { - int start_mi_row, end_mi_row, mi_rows_to_filter; - - start_mi_row = 0; - mi_rows_to_filter = cm->mi_rows; - if (partial_frame && cm->mi_rows > 8) { - start_mi_row = cm->mi_rows >> 1; - start_mi_row &= 0xfffffff8; - mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8); - } - end_mi_row = start_mi_row + mi_rows_to_filter; - av1_loop_filter_frame_init(cm, plane_start, plane_end); - - loop_filter_rows_mt(frame, cm, xd, start_mi_row, end_mi_row, plane_start, - plane_end, workers, num_workers, lf_sync); -} - -static INLINE void lr_sync_read(void *const lr_sync, int r, int c, int plane) { -#if CONFIG_MULTITHREAD - AV1LrSync *const loop_res_sync = (AV1LrSync *)lr_sync; - const int nsync = loop_res_sync->sync_range; - - if (r && !(c & (nsync - 1))) { - pthread_mutex_t *const mutex = &loop_res_sync->mutex_[plane][r - 1]; - pthread_mutex_lock(mutex); - - while (c > loop_res_sync->cur_sb_col[plane][r - 1] - nsync) { - pthread_cond_wait(&loop_res_sync->cond_[plane][r - 1], mutex); - } - pthread_mutex_unlock(mutex); - } -#else - (void)lr_sync; - (void)r; - (void)c; - (void)plane; -#endif // CONFIG_MULTITHREAD -} - -static INLINE void lr_sync_write(void *const lr_sync, int r, int c, - const int sb_cols, int plane) { -#if CONFIG_MULTITHREAD - AV1LrSync *const loop_res_sync = (AV1LrSync *)lr_sync; - const int nsync = loop_res_sync->sync_range; - int cur; - // Only signal when there are enough filtered SB for next row to run. - int sig = 1; - - if (c < sb_cols - 1) { - cur = c; - if (c % nsync) sig = 0; - } else { - cur = sb_cols + nsync; - } - - if (sig) { - pthread_mutex_lock(&loop_res_sync->mutex_[plane][r]); - - loop_res_sync->cur_sb_col[plane][r] = cur; - - pthread_cond_broadcast(&loop_res_sync->cond_[plane][r]); - pthread_mutex_unlock(&loop_res_sync->mutex_[plane][r]); - } -#else - (void)lr_sync; - (void)r; - (void)c; - (void)sb_cols; - (void)plane; -#endif // CONFIG_MULTITHREAD -} - -// Allocate memory for loop restoration row synchronization -static void loop_restoration_alloc(AV1LrSync *lr_sync, AV1_COMMON *cm, - int num_workers, int num_rows_lr, - int num_planes, int width) { - lr_sync->rows = num_rows_lr; - lr_sync->num_planes = num_planes; -#if CONFIG_MULTITHREAD - { - int i, j; - - for (j = 0; j < num_planes; j++) { - CHECK_MEM_ERROR(cm, lr_sync->mutex_[j], - aom_malloc(sizeof(*(lr_sync->mutex_[j])) * num_rows_lr)); - if (lr_sync->mutex_[j]) { - for (i = 0; i < num_rows_lr; ++i) { - pthread_mutex_init(&lr_sync->mutex_[j][i], NULL); - } - } - - CHECK_MEM_ERROR(cm, lr_sync->cond_[j], - aom_malloc(sizeof(*(lr_sync->cond_[j])) * num_rows_lr)); - if (lr_sync->cond_[j]) { - for (i = 0; i < num_rows_lr; ++i) { - pthread_cond_init(&lr_sync->cond_[j][i], NULL); - } - } - } - - CHECK_MEM_ERROR(cm, lr_sync->job_mutex, - aom_malloc(sizeof(*(lr_sync->job_mutex)))); - if (lr_sync->job_mutex) { - pthread_mutex_init(lr_sync->job_mutex, NULL); - } - } -#endif // CONFIG_MULTITHREAD - CHECK_MEM_ERROR(cm, lr_sync->lrworkerdata, - aom_malloc(num_workers * sizeof(*(lr_sync->lrworkerdata)))); - - for (int worker_idx = 0; worker_idx < num_workers; ++worker_idx) { - if (worker_idx < num_workers - 1) { - CHECK_MEM_ERROR(cm, lr_sync->lrworkerdata[worker_idx].rst_tmpbuf, - (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE)); - CHECK_MEM_ERROR(cm, lr_sync->lrworkerdata[worker_idx].rlbs, - aom_malloc(sizeof(RestorationLineBuffers))); - - } else { - lr_sync->lrworkerdata[worker_idx].rst_tmpbuf = cm->rst_tmpbuf; - lr_sync->lrworkerdata[worker_idx].rlbs = cm->rlbs; - } - } - - lr_sync->num_workers = num_workers; - - for (int j = 0; j < num_planes; j++) { - CHECK_MEM_ERROR( - cm, lr_sync->cur_sb_col[j], - aom_malloc(sizeof(*(lr_sync->cur_sb_col[j])) * num_rows_lr)); - } - CHECK_MEM_ERROR( - cm, lr_sync->job_queue, - aom_malloc(sizeof(*(lr_sync->job_queue)) * num_rows_lr * num_planes)); - // Set up nsync. - lr_sync->sync_range = get_lr_sync_range(width); -} - -// Deallocate loop restoration synchronization related mutex and data -void av1_loop_restoration_dealloc(AV1LrSync *lr_sync, int num_workers) { - if (lr_sync != NULL) { - int j; -#if CONFIG_MULTITHREAD - int i; - for (j = 0; j < MAX_MB_PLANE; j++) { - if (lr_sync->mutex_[j] != NULL) { - for (i = 0; i < lr_sync->rows; ++i) { - pthread_mutex_destroy(&lr_sync->mutex_[j][i]); - } - aom_free(lr_sync->mutex_[j]); - } - if (lr_sync->cond_[j] != NULL) { - for (i = 0; i < lr_sync->rows; ++i) { - pthread_cond_destroy(&lr_sync->cond_[j][i]); - } - aom_free(lr_sync->cond_[j]); - } - } - if (lr_sync->job_mutex != NULL) { - pthread_mutex_destroy(lr_sync->job_mutex); - aom_free(lr_sync->job_mutex); - } -#endif // CONFIG_MULTITHREAD - for (j = 0; j < MAX_MB_PLANE; j++) { - aom_free(lr_sync->cur_sb_col[j]); - } - - aom_free(lr_sync->job_queue); - - if (lr_sync->lrworkerdata) { - for (int worker_idx = 0; worker_idx < num_workers - 1; worker_idx++) { - LRWorkerData *const workerdata_data = - lr_sync->lrworkerdata + worker_idx; - - aom_free(workerdata_data->rst_tmpbuf); - aom_free(workerdata_data->rlbs); - } - aom_free(lr_sync->lrworkerdata); - } - - // clear the structure as the source of this call may be a resize in which - // case this call will be followed by an _alloc() which may fail. - av1_zero(*lr_sync); - } -} - -static void enqueue_lr_jobs(AV1LrSync *lr_sync, AV1LrStruct *lr_ctxt, - AV1_COMMON *cm) { - FilterFrameCtxt *ctxt = lr_ctxt->ctxt; - - const int num_planes = av1_num_planes(cm); - AV1LrMTInfo *lr_job_queue = lr_sync->job_queue; - int32_t lr_job_counter[2], num_even_lr_jobs = 0; - lr_sync->jobs_enqueued = 0; - lr_sync->jobs_dequeued = 0; - - for (int plane = 0; plane < num_planes; plane++) { - if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue; - num_even_lr_jobs = - num_even_lr_jobs + ((ctxt[plane].rsi->vert_units_per_tile + 1) >> 1); - } - lr_job_counter[0] = 0; - lr_job_counter[1] = num_even_lr_jobs; - - for (int plane = 0; plane < num_planes; plane++) { - if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue; - const int is_uv = plane > 0; - const int ss_y = is_uv && cm->seq_params.subsampling_y; - - AV1PixelRect tile_rect = ctxt[plane].tile_rect; - const int unit_size = ctxt[plane].rsi->restoration_unit_size; - - const int tile_h = tile_rect.bottom - tile_rect.top; - const int ext_size = unit_size * 3 / 2; - - int y0 = 0, i = 0; - while (y0 < tile_h) { - int remaining_h = tile_h - y0; - int h = (remaining_h < ext_size) ? remaining_h : unit_size; - - RestorationTileLimits limits; - limits.v_start = tile_rect.top + y0; - limits.v_end = tile_rect.top + y0 + h; - assert(limits.v_end <= tile_rect.bottom); - // Offset the tile upwards to align with the restoration processing stripe - const int voffset = RESTORATION_UNIT_OFFSET >> ss_y; - limits.v_start = AOMMAX(tile_rect.top, limits.v_start - voffset); - if (limits.v_end < tile_rect.bottom) limits.v_end -= voffset; - - assert(lr_job_counter[0] <= num_even_lr_jobs); - - lr_job_queue[lr_job_counter[i & 1]].lr_unit_row = i; - lr_job_queue[lr_job_counter[i & 1]].plane = plane; - lr_job_queue[lr_job_counter[i & 1]].v_start = limits.v_start; - lr_job_queue[lr_job_counter[i & 1]].v_end = limits.v_end; - lr_job_queue[lr_job_counter[i & 1]].sync_mode = i & 1; - if ((i & 1) == 0) { - lr_job_queue[lr_job_counter[i & 1]].v_copy_start = - limits.v_start + RESTORATION_BORDER; - lr_job_queue[lr_job_counter[i & 1]].v_copy_end = - limits.v_end - RESTORATION_BORDER; - if (i == 0) { - assert(limits.v_start == tile_rect.top); - lr_job_queue[lr_job_counter[i & 1]].v_copy_start = tile_rect.top; - } - if (i == (ctxt[plane].rsi->vert_units_per_tile - 1)) { - assert(limits.v_end == tile_rect.bottom); - lr_job_queue[lr_job_counter[i & 1]].v_copy_end = tile_rect.bottom; - } - } else { - lr_job_queue[lr_job_counter[i & 1]].v_copy_start = - AOMMAX(limits.v_start - RESTORATION_BORDER, tile_rect.top); - lr_job_queue[lr_job_counter[i & 1]].v_copy_end = - AOMMIN(limits.v_end + RESTORATION_BORDER, tile_rect.bottom); - } - lr_job_counter[i & 1]++; - lr_sync->jobs_enqueued++; - - y0 += h; - ++i; - } - } -} - -AV1LrMTInfo *get_lr_job_info(AV1LrSync *lr_sync) { - AV1LrMTInfo *cur_job_info = NULL; - -#if CONFIG_MULTITHREAD - pthread_mutex_lock(lr_sync->job_mutex); - - if (lr_sync->jobs_dequeued < lr_sync->jobs_enqueued) { - cur_job_info = lr_sync->job_queue + lr_sync->jobs_dequeued; - lr_sync->jobs_dequeued++; - } - - pthread_mutex_unlock(lr_sync->job_mutex); -#else - (void)lr_sync; -#endif - - return cur_job_info; -} - -// Implement row loop restoration for each thread. -static int loop_restoration_row_worker(void *arg1, void *arg2) { - AV1LrSync *const lr_sync = (AV1LrSync *)arg1; - LRWorkerData *lrworkerdata = (LRWorkerData *)arg2; - AV1LrStruct *lr_ctxt = (AV1LrStruct *)lrworkerdata->lr_ctxt; - FilterFrameCtxt *ctxt = lr_ctxt->ctxt; - int lr_unit_row; - int plane; - const int tile_row = LR_TILE_ROW; - const int tile_col = LR_TILE_COL; - const int tile_cols = LR_TILE_COLS; - const int tile_idx = tile_col + tile_row * tile_cols; - typedef void (*copy_fun)(const YV12_BUFFER_CONFIG *src_ybc, - YV12_BUFFER_CONFIG *dst_ybc, int hstart, int hend, - int vstart, int vend); - static const copy_fun copy_funs[3] = { - aom_yv12_partial_copy_y, aom_yv12_partial_copy_u, aom_yv12_partial_copy_v - }; - - while (1) { - AV1LrMTInfo *cur_job_info = get_lr_job_info(lr_sync); - if (cur_job_info != NULL) { - RestorationTileLimits limits; - sync_read_fn_t on_sync_read; - sync_write_fn_t on_sync_write; - limits.v_start = cur_job_info->v_start; - limits.v_end = cur_job_info->v_end; - lr_unit_row = cur_job_info->lr_unit_row; - plane = cur_job_info->plane; - const int unit_idx0 = tile_idx * ctxt[plane].rsi->units_per_tile; - - // sync_mode == 1 implies only sync read is required in LR Multi-threading - // sync_mode == 0 implies only sync write is required. - on_sync_read = - cur_job_info->sync_mode == 1 ? lr_sync_read : av1_lr_sync_read_dummy; - on_sync_write = cur_job_info->sync_mode == 0 ? lr_sync_write - : av1_lr_sync_write_dummy; - - av1_foreach_rest_unit_in_row( - &limits, &(ctxt[plane].tile_rect), lr_ctxt->on_rest_unit, lr_unit_row, - ctxt[plane].rsi->restoration_unit_size, unit_idx0, - ctxt[plane].rsi->horz_units_per_tile, - ctxt[plane].rsi->vert_units_per_tile, plane, &ctxt[plane], - lrworkerdata->rst_tmpbuf, lrworkerdata->rlbs, on_sync_read, - on_sync_write, lr_sync); - - copy_funs[plane](lr_ctxt->dst, lr_ctxt->frame, ctxt[plane].tile_rect.left, - ctxt[plane].tile_rect.right, cur_job_info->v_copy_start, - cur_job_info->v_copy_end); - } else { - break; - } - } - return 1; -} - -static void foreach_rest_unit_in_planes_mt(AV1LrStruct *lr_ctxt, - AVxWorker *workers, int nworkers, - AV1LrSync *lr_sync, AV1_COMMON *cm) { - FilterFrameCtxt *ctxt = lr_ctxt->ctxt; - - const int num_planes = av1_num_planes(cm); - - const AVxWorkerInterface *const winterface = aom_get_worker_interface(); - int num_rows_lr = 0; - - for (int plane = 0; plane < num_planes; plane++) { - if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue; - - const AV1PixelRect tile_rect = ctxt[plane].tile_rect; - const int max_tile_h = tile_rect.bottom - tile_rect.top; - - const int unit_size = cm->rst_info[plane].restoration_unit_size; - - num_rows_lr = - AOMMAX(num_rows_lr, av1_lr_count_units_in_tile(unit_size, max_tile_h)); - } - - const int num_workers = nworkers; - int i; - assert(MAX_MB_PLANE == 3); - - if (!lr_sync->sync_range || num_rows_lr != lr_sync->rows || - num_workers > lr_sync->num_workers || num_planes != lr_sync->num_planes) { - av1_loop_restoration_dealloc(lr_sync, num_workers); - loop_restoration_alloc(lr_sync, cm, num_workers, num_rows_lr, num_planes, - cm->width); - } - - // Initialize cur_sb_col to -1 for all SB rows. - for (i = 0; i < num_planes; i++) { - memset(lr_sync->cur_sb_col[i], -1, - sizeof(*(lr_sync->cur_sb_col[i])) * num_rows_lr); - } - - enqueue_lr_jobs(lr_sync, lr_ctxt, cm); - - // Set up looprestoration thread data. - for (i = 0; i < num_workers; ++i) { - AVxWorker *const worker = &workers[i]; - lr_sync->lrworkerdata[i].lr_ctxt = (void *)lr_ctxt; - worker->hook = loop_restoration_row_worker; - worker->data1 = lr_sync; - worker->data2 = &lr_sync->lrworkerdata[i]; - - // Start loopfiltering - if (i == num_workers - 1) { - winterface->execute(worker); - } else { - winterface->launch(worker); - } - } - - // Wait till all rows are finished - for (i = 0; i < num_workers; ++i) { - winterface->sync(&workers[i]); - } -} - -void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame, - AV1_COMMON *cm, int optimized_lr, - AVxWorker *workers, int num_workers, - AV1LrSync *lr_sync, void *lr_ctxt) { - assert(!cm->all_lossless); - - const int num_planes = av1_num_planes(cm); - - AV1LrStruct *loop_rest_ctxt = (AV1LrStruct *)lr_ctxt; - - av1_loop_restoration_filter_frame_init(loop_rest_ctxt, frame, cm, - optimized_lr, num_planes); - - foreach_rest_unit_in_planes_mt(loop_rest_ctxt, workers, num_workers, lr_sync, - cm); -} diff --git a/third_party/aom/av1/common/thread_common.h b/third_party/aom/av1/common/thread_common.h deleted file mode 100644 index 23d61d72a..000000000 --- a/third_party/aom/av1/common/thread_common.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_THREAD_COMMON_H_ -#define AOM_AV1_COMMON_THREAD_COMMON_H_ - -#include "config/aom_config.h" - -#include "av1/common/av1_loopfilter.h" -#include "aom_util/aom_thread.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct AV1Common; - -typedef struct AV1LfMTInfo { - int mi_row; - int plane; - int dir; -} AV1LfMTInfo; - -// Loopfilter row synchronization -typedef struct AV1LfSyncData { -#if CONFIG_MULTITHREAD - pthread_mutex_t *mutex_[MAX_MB_PLANE]; - pthread_cond_t *cond_[MAX_MB_PLANE]; -#endif - // Allocate memory to store the loop-filtered superblock index in each row. - int *cur_sb_col[MAX_MB_PLANE]; - // The optimal sync_range for different resolution and platform should be - // determined by testing. Currently, it is chosen to be a power-of-2 number. - int sync_range; - int rows; - - // Row-based parallel loopfilter data - LFWorkerData *lfdata; - int num_workers; - -#if CONFIG_MULTITHREAD - pthread_mutex_t *job_mutex; -#endif - AV1LfMTInfo *job_queue; - int jobs_enqueued; - int jobs_dequeued; -} AV1LfSync; - -typedef struct AV1LrMTInfo { - int v_start; - int v_end; - int lr_unit_row; - int plane; - int sync_mode; - int v_copy_start; - int v_copy_end; -} AV1LrMTInfo; - -typedef struct LoopRestorationWorkerData { - int32_t *rst_tmpbuf; - void *rlbs; - void *lr_ctxt; -} LRWorkerData; - -// Looprestoration row synchronization -typedef struct AV1LrSyncData { -#if CONFIG_MULTITHREAD - pthread_mutex_t *mutex_[MAX_MB_PLANE]; - pthread_cond_t *cond_[MAX_MB_PLANE]; -#endif - // Allocate memory to store the loop-restoration block index in each row. - int *cur_sb_col[MAX_MB_PLANE]; - // The optimal sync_range for different resolution and platform should be - // determined by testing. Currently, it is chosen to be a power-of-2 number. - int sync_range; - int rows; - int num_planes; - - int num_workers; - -#if CONFIG_MULTITHREAD - pthread_mutex_t *job_mutex; -#endif - // Row-based parallel loopfilter data - LRWorkerData *lrworkerdata; - - AV1LrMTInfo *job_queue; - int jobs_enqueued; - int jobs_dequeued; -} AV1LrSync; - -// Deallocate loopfilter synchronization related mutex and data. -void av1_loop_filter_dealloc(AV1LfSync *lf_sync); - -void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm, - struct macroblockd *mbd, int plane_start, - int plane_end, int partial_frame, - AVxWorker *workers, int num_workers, - AV1LfSync *lf_sync); -void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame, - struct AV1Common *cm, - int optimized_lr, AVxWorker *workers, - int num_workers, AV1LrSync *lr_sync, - void *lr_ctxt); -void av1_loop_restoration_dealloc(AV1LrSync *lr_sync, int num_workers); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_THREAD_COMMON_H_ diff --git a/third_party/aom/av1/common/tile_common.c b/third_party/aom/av1/common/tile_common.c deleted file mode 100644 index 1b413487f..000000000 --- a/third_party/aom/av1/common/tile_common.c +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "av1/common/tile_common.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/resize.h" -#include "aom_dsp/aom_dsp_common.h" - -void av1_tile_init(TileInfo *tile, const AV1_COMMON *cm, int row, int col) { - av1_tile_set_row(tile, cm, row); - av1_tile_set_col(tile, cm, col); -} - -// Find smallest k>=0 such that (blk_size << k) >= target -static int tile_log2(int blk_size, int target) { - int k; - for (k = 0; (blk_size << k) < target; k++) { - } - return k; -} - -void av1_get_tile_limits(AV1_COMMON *const cm) { - int mi_cols = ALIGN_POWER_OF_TWO(cm->mi_cols, cm->seq_params.mib_size_log2); - int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2); - int sb_cols = mi_cols >> cm->seq_params.mib_size_log2; - int sb_rows = mi_rows >> cm->seq_params.mib_size_log2; - - int sb_size_log2 = cm->seq_params.mib_size_log2 + MI_SIZE_LOG2; - cm->max_tile_width_sb = MAX_TILE_WIDTH >> sb_size_log2; - int max_tile_area_sb = MAX_TILE_AREA >> (2 * sb_size_log2); - - cm->min_log2_tile_cols = tile_log2(cm->max_tile_width_sb, sb_cols); - cm->max_log2_tile_cols = tile_log2(1, AOMMIN(sb_cols, MAX_TILE_COLS)); - cm->max_log2_tile_rows = tile_log2(1, AOMMIN(sb_rows, MAX_TILE_ROWS)); - cm->min_log2_tiles = tile_log2(max_tile_area_sb, sb_cols * sb_rows); - cm->min_log2_tiles = AOMMAX(cm->min_log2_tiles, cm->min_log2_tile_cols); -} - -void av1_calculate_tile_cols(AV1_COMMON *const cm) { - int mi_cols = ALIGN_POWER_OF_TWO(cm->mi_cols, cm->seq_params.mib_size_log2); - int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2); - int sb_cols = mi_cols >> cm->seq_params.mib_size_log2; - int sb_rows = mi_rows >> cm->seq_params.mib_size_log2; - int i; - - if (cm->uniform_tile_spacing_flag) { - int start_sb; - int size_sb = ALIGN_POWER_OF_TWO(sb_cols, cm->log2_tile_cols); - size_sb >>= cm->log2_tile_cols; - assert(size_sb > 0); - for (i = 0, start_sb = 0; start_sb < sb_cols; i++) { - cm->tile_col_start_sb[i] = start_sb; - start_sb += size_sb; - } - cm->tile_cols = i; - cm->tile_col_start_sb[i] = sb_cols; - cm->min_log2_tile_rows = AOMMAX(cm->min_log2_tiles - cm->log2_tile_cols, 0); - cm->max_tile_height_sb = sb_rows >> cm->min_log2_tile_rows; - - cm->tile_width = size_sb << cm->seq_params.mib_size_log2; - cm->tile_width = AOMMIN(cm->tile_width, cm->mi_cols); - } else { - int max_tile_area_sb = (sb_rows * sb_cols); - int widest_tile_sb = 1; - cm->log2_tile_cols = tile_log2(1, cm->tile_cols); - for (i = 0; i < cm->tile_cols; i++) { - int size_sb = cm->tile_col_start_sb[i + 1] - cm->tile_col_start_sb[i]; - widest_tile_sb = AOMMAX(widest_tile_sb, size_sb); - } - if (cm->min_log2_tiles) { - max_tile_area_sb >>= (cm->min_log2_tiles + 1); - } - cm->max_tile_height_sb = AOMMAX(max_tile_area_sb / widest_tile_sb, 1); - } -} - -void av1_calculate_tile_rows(AV1_COMMON *const cm) { - int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2); - int sb_rows = mi_rows >> cm->seq_params.mib_size_log2; - int start_sb, size_sb, i; - - if (cm->uniform_tile_spacing_flag) { - size_sb = ALIGN_POWER_OF_TWO(sb_rows, cm->log2_tile_rows); - size_sb >>= cm->log2_tile_rows; - assert(size_sb > 0); - for (i = 0, start_sb = 0; start_sb < sb_rows; i++) { - cm->tile_row_start_sb[i] = start_sb; - start_sb += size_sb; - } - cm->tile_rows = i; - cm->tile_row_start_sb[i] = sb_rows; - - cm->tile_height = size_sb << cm->seq_params.mib_size_log2; - cm->tile_height = AOMMIN(cm->tile_height, cm->mi_rows); - } else { - cm->log2_tile_rows = tile_log2(1, cm->tile_rows); - } -} - -void av1_tile_set_row(TileInfo *tile, const AV1_COMMON *cm, int row) { - assert(row < cm->tile_rows); - int mi_row_start = cm->tile_row_start_sb[row] << cm->seq_params.mib_size_log2; - int mi_row_end = cm->tile_row_start_sb[row + 1] - << cm->seq_params.mib_size_log2; - tile->tile_row = row; - tile->mi_row_start = mi_row_start; - tile->mi_row_end = AOMMIN(mi_row_end, cm->mi_rows); - assert(tile->mi_row_end > tile->mi_row_start); -} - -void av1_tile_set_col(TileInfo *tile, const AV1_COMMON *cm, int col) { - assert(col < cm->tile_cols); - int mi_col_start = cm->tile_col_start_sb[col] << cm->seq_params.mib_size_log2; - int mi_col_end = cm->tile_col_start_sb[col + 1] - << cm->seq_params.mib_size_log2; - tile->tile_col = col; - tile->mi_col_start = mi_col_start; - tile->mi_col_end = AOMMIN(mi_col_end, cm->mi_cols); - assert(tile->mi_col_end > tile->mi_col_start); -} - -int av1_get_sb_rows_in_tile(AV1_COMMON *cm, TileInfo tile) { - int mi_rows_aligned_to_sb = ALIGN_POWER_OF_TWO( - tile.mi_row_end - tile.mi_row_start, cm->seq_params.mib_size_log2); - int sb_rows = mi_rows_aligned_to_sb >> cm->seq_params.mib_size_log2; - - return sb_rows; -} - -int av1_get_sb_cols_in_tile(AV1_COMMON *cm, TileInfo tile) { - int mi_cols_aligned_to_sb = ALIGN_POWER_OF_TWO( - tile.mi_col_end - tile.mi_col_start, cm->seq_params.mib_size_log2); - int sb_cols = mi_cols_aligned_to_sb >> cm->seq_params.mib_size_log2; - - return sb_cols; -} - -int get_tile_size(int mi_frame_size, int log2_tile_num, int *ntiles) { - // Round the frame up to a whole number of max superblocks - mi_frame_size = ALIGN_POWER_OF_TWO(mi_frame_size, MAX_MIB_SIZE_LOG2); - - // Divide by the signalled number of tiles, rounding up to the multiple of - // the max superblock size. To do this, shift right (and round up) to get the - // tile size in max super-blocks and then shift left again to convert it to - // mi units. - const int shift = log2_tile_num + MAX_MIB_SIZE_LOG2; - const int max_sb_tile_size = - ALIGN_POWER_OF_TWO(mi_frame_size, shift) >> shift; - const int mi_tile_size = max_sb_tile_size << MAX_MIB_SIZE_LOG2; - - // The actual number of tiles is the ceiling of the frame size in mi units - // divided by mi_size. This is at most 1 << log2_tile_num but might be - // strictly less if max_sb_tile_size got rounded up significantly. - if (ntiles) { - *ntiles = (mi_frame_size + mi_tile_size - 1) / mi_tile_size; - assert(*ntiles <= (1 << log2_tile_num)); - } - - return mi_tile_size; -} - -AV1PixelRect av1_get_tile_rect(const TileInfo *tile_info, const AV1_COMMON *cm, - int is_uv) { - AV1PixelRect r; - - // Calculate position in the Y plane - r.left = tile_info->mi_col_start * MI_SIZE; - r.right = tile_info->mi_col_end * MI_SIZE; - r.top = tile_info->mi_row_start * MI_SIZE; - r.bottom = tile_info->mi_row_end * MI_SIZE; - - // If upscaling is enabled, the tile limits need scaling to match the - // upscaled frame where the restoration units live. To do this, scale up the - // top-left and bottom-right of the tile. - if (av1_superres_scaled(cm)) { - av1_calculate_unscaled_superres_size(&r.left, &r.top, - cm->superres_scale_denominator); - av1_calculate_unscaled_superres_size(&r.right, &r.bottom, - cm->superres_scale_denominator); - } - - const int frame_w = cm->superres_upscaled_width; - const int frame_h = cm->superres_upscaled_height; - - // Make sure we don't fall off the bottom-right of the frame. - r.right = AOMMIN(r.right, frame_w); - r.bottom = AOMMIN(r.bottom, frame_h); - - // Convert to coordinates in the appropriate plane - const int ss_x = is_uv && cm->seq_params.subsampling_x; - const int ss_y = is_uv && cm->seq_params.subsampling_y; - - r.left = ROUND_POWER_OF_TWO(r.left, ss_x); - r.right = ROUND_POWER_OF_TWO(r.right, ss_x); - r.top = ROUND_POWER_OF_TWO(r.top, ss_y); - r.bottom = ROUND_POWER_OF_TWO(r.bottom, ss_y); - - return r; -} diff --git a/third_party/aom/av1/common/tile_common.h b/third_party/aom/av1/common/tile_common.h deleted file mode 100644 index c03553dc6..000000000 --- a/third_party/aom/av1/common/tile_common.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_TILE_COMMON_H_ -#define AOM_AV1_COMMON_TILE_COMMON_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "config/aom_config.h" - -struct AV1Common; - -#define DEFAULT_MAX_NUM_TG 1 - -typedef struct TileInfo { - int mi_row_start, mi_row_end; - int mi_col_start, mi_col_end; - int tg_horz_boundary; - int tile_row; - int tile_col; -} TileInfo; - -// initializes 'tile->mi_(row|col)_(start|end)' for (row, col) based on -// 'cm->log2_tile_(rows|cols)' & 'cm->mi_(rows|cols)' -void av1_tile_init(TileInfo *tile, const struct AV1Common *cm, int row, - int col); - -void av1_tile_set_row(TileInfo *tile, const struct AV1Common *cm, int row); -void av1_tile_set_col(TileInfo *tile, const struct AV1Common *cm, int col); -void av1_get_tile_n_bits(int mi_cols, int *min_log2_tile_cols, - int *max_log2_tile_cols); - -// Calculate the correct tile size (width or height) for (1 << log2_tile_num) -// tiles horizontally or vertically in the frame. -int get_tile_size(int mi_frame_size, int log2_tile_num, int *ntiles); - -int av1_get_sb_rows_in_tile(struct AV1Common *cm, TileInfo tile); -int av1_get_sb_cols_in_tile(struct AV1Common *cm, TileInfo tile); - -typedef struct { - int left, top, right, bottom; -} AV1PixelRect; - -// Return the pixel extents of the given tile -AV1PixelRect av1_get_tile_rect(const TileInfo *tile_info, - const struct AV1Common *cm, int is_uv); - -// Define tile maximum width and area -// There is no maximum height since height is limited by area and width limits -// The minimum tile width or height is fixed at one superblock -#define MAX_TILE_WIDTH (4096) // Max Tile width in pixels -#define MAX_TILE_AREA (4096 * 2304) // Maximum tile area in pixels - -void av1_get_tile_limits(struct AV1Common *const cm); -void av1_calculate_tile_cols(struct AV1Common *const cm); -void av1_calculate_tile_rows(struct AV1Common *const cm); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_TILE_COMMON_H_ diff --git a/third_party/aom/av1/common/timing.c b/third_party/aom/av1/common/timing.c deleted file mode 100644 index 49dbde78f..000000000 --- a/third_party/aom/av1/common/timing.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "av1/common/timing.h" - -/* Tables for AV1 max bitrates for different levels of main and high tier. - * The tables are in Kbps instead of Mbps in the specification. - * Note that depending on the profile, a multiplier is needed. - */ - -/* Max Bitrates for levels of Main Tier in kbps. Bitrate in main_kbps [31] */ -/* is a dummy value. The decoder model is not applicable for level 31. */ -static int32_t main_kbps[1 << LEVEL_BITS] = { - 1500, 3000, 0, 0, 6000, 10000, 0, 0, 12000, 20000, 0, - 0, 30000, 40000, 60000, 60000, 60000, 100000, 160000, 160000, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, (1 << 26) -}; - -/* Max Bitrates for levels of High Tier in kbps. Bitrate in high_kbps [31] */ -/* is a dummy value. The decoder model is not applicable for level 31. */ -static int32_t high_kbps[1 << LEVEL_BITS] = { - 0, 0, 0, 0, 0, 0, 0, 0, - 30000, 50000, 0, 0, 100000, 160000, 240000, 240000, - 240000, 480000, 800000, 800000, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, (1 << 26) -}; - -/* BitrateProfileFactor */ -static int bitrate_profile_factor[1 << PROFILE_BITS] = { - 1, 2, 3, 0, 0, 0, 0, 0 -}; - -int64_t max_level_bitrate(BITSTREAM_PROFILE seq_profile, int seq_level_idx, - int seq_tier) { - int64_t bitrate; - - if (seq_tier) { - bitrate = high_kbps[seq_level_idx] * bitrate_profile_factor[seq_profile]; - } else { - bitrate = main_kbps[seq_level_idx] * bitrate_profile_factor[seq_profile]; - } - - return bitrate * 1000; -} - -void set_aom_dec_model_info(aom_dec_model_info_t *decoder_model) { - decoder_model->encoder_decoder_buffer_delay_length = 16; - decoder_model->buffer_removal_time_length = 10; - decoder_model->frame_presentation_time_length = 10; -} - -void set_dec_model_op_parameters(aom_dec_model_op_parameters_t *op_params) { - op_params->decoder_model_param_present_flag = 1; - op_params->decoder_buffer_delay = 90000 >> 1; // 0.5 s - op_params->encoder_buffer_delay = 90000 >> 1; // 0.5 s - op_params->low_delay_mode_flag = 0; - op_params->display_model_param_present_flag = 1; - op_params->initial_display_delay = 8; // 8 frames delay -} - -void set_resource_availability_parameters( - aom_dec_model_op_parameters_t *op_params) { - op_params->decoder_model_param_present_flag = 0; - op_params->decoder_buffer_delay = - 70000; // Resource availability mode default - op_params->encoder_buffer_delay = - 20000; // Resource availability mode default - op_params->low_delay_mode_flag = 0; // Resource availability mode default - op_params->display_model_param_present_flag = 1; - op_params->initial_display_delay = 8; // 8 frames delay -} diff --git a/third_party/aom/av1/common/timing.h b/third_party/aom/av1/common/timing.h deleted file mode 100644 index 06939ae43..000000000 --- a/third_party/aom/av1/common/timing.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_TIMING_H_ -#define AOM_AV1_COMMON_TIMING_H_ - -#include "aom/aom_integer.h" -#include "av1/common/enums.h" - -#define MAX_NUM_OP_POINTS 32 - -typedef struct aom_timing { - uint32_t num_units_in_display_tick; - uint32_t time_scale; - int equal_picture_interval; - uint32_t num_ticks_per_picture; -} aom_timing_info_t; - -typedef struct aom_dec_model_info { - uint32_t num_units_in_decoding_tick; - int encoder_decoder_buffer_delay_length; - int buffer_removal_time_length; - int frame_presentation_time_length; -} aom_dec_model_info_t; - -typedef struct aom_dec_model_op_parameters { - int decoder_model_param_present_flag; - int64_t bitrate; - int64_t buffer_size; - uint32_t decoder_buffer_delay; - uint32_t encoder_buffer_delay; - int low_delay_mode_flag; - int display_model_param_present_flag; - int initial_display_delay; -} aom_dec_model_op_parameters_t; - -typedef struct aom_op_timing_info_t { - uint32_t buffer_removal_time; -} aom_op_timing_info_t; - -void set_aom_dec_model_info(aom_dec_model_info_t *decoder_model); - -void set_dec_model_op_parameters(aom_dec_model_op_parameters_t *op_params); - -void set_resource_availability_parameters( - aom_dec_model_op_parameters_t *op_params); - -int64_t max_level_bitrate(BITSTREAM_PROFILE seq_profile, int seq_level_idx, - int seq_tier); - -#endif // AOM_AV1_COMMON_TIMING_H_ diff --git a/third_party/aom/av1/common/token_cdfs.h b/third_party/aom/av1/common/token_cdfs.h deleted file mode 100644 index 53e956450..000000000 --- a/third_party/aom/av1/common/token_cdfs.h +++ /dev/null @@ -1,3555 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_TOKEN_CDFS_H_ -#define AOM_AV1_COMMON_TOKEN_CDFS_H_ - -#include "config/aom_config.h" - -#include "av1/common/entropy.h" - -static const aom_cdf_prob - av1_default_dc_sign_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][DC_SIGN_CONTEXTS] - [CDF_SIZE(2)] = { - { { - { AOM_CDF2(128 * 125) }, - { AOM_CDF2(128 * 102) }, - { AOM_CDF2(128 * 147) }, - }, - { - { AOM_CDF2(128 * 119) }, - { AOM_CDF2(128 * 101) }, - { AOM_CDF2(128 * 135) }, - } }, - { { - { AOM_CDF2(128 * 125) }, - { AOM_CDF2(128 * 102) }, - { AOM_CDF2(128 * 147) }, - }, - { - { AOM_CDF2(128 * 119) }, - { AOM_CDF2(128 * 101) }, - { AOM_CDF2(128 * 135) }, - } }, - { { - { AOM_CDF2(128 * 125) }, - { AOM_CDF2(128 * 102) }, - { AOM_CDF2(128 * 147) }, - }, - { - { AOM_CDF2(128 * 119) }, - { AOM_CDF2(128 * 101) }, - { AOM_CDF2(128 * 135) }, - } }, - { { - { AOM_CDF2(128 * 125) }, - { AOM_CDF2(128 * 102) }, - { AOM_CDF2(128 * 147) }, - }, - { - { AOM_CDF2(128 * 119) }, - { AOM_CDF2(128 * 101) }, - { AOM_CDF2(128 * 135) }, - } }, - }; - -static const aom_cdf_prob - av1_default_txb_skip_cdfs[TOKEN_CDF_Q_CTXS][TX_SIZES][TXB_SKIP_CONTEXTS] - [CDF_SIZE(2)] = { { { { AOM_CDF2(31849) }, - { AOM_CDF2(5892) }, - { AOM_CDF2(12112) }, - { AOM_CDF2(21935) }, - { AOM_CDF2(20289) }, - { AOM_CDF2(27473) }, - { AOM_CDF2(32487) }, - { AOM_CDF2(7654) }, - { AOM_CDF2(19473) }, - { AOM_CDF2(29984) }, - { AOM_CDF2(9961) }, - { AOM_CDF2(30242) }, - { AOM_CDF2(32117) } }, - { { AOM_CDF2(31548) }, - { AOM_CDF2(1549) }, - { AOM_CDF2(10130) }, - { AOM_CDF2(16656) }, - { AOM_CDF2(18591) }, - { AOM_CDF2(26308) }, - { AOM_CDF2(32537) }, - { AOM_CDF2(5403) }, - { AOM_CDF2(18096) }, - { AOM_CDF2(30003) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } }, - { { AOM_CDF2(29957) }, - { AOM_CDF2(5391) }, - { AOM_CDF2(18039) }, - { AOM_CDF2(23566) }, - { AOM_CDF2(22431) }, - { AOM_CDF2(25822) }, - { AOM_CDF2(32197) }, - { AOM_CDF2(3778) }, - { AOM_CDF2(15336) }, - { AOM_CDF2(28981) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } }, - { { AOM_CDF2(17920) }, - { AOM_CDF2(1818) }, - { AOM_CDF2(7282) }, - { AOM_CDF2(25273) }, - { AOM_CDF2(10923) }, - { AOM_CDF2(31554) }, - { AOM_CDF2(32624) }, - { AOM_CDF2(1366) }, - { AOM_CDF2(15628) }, - { AOM_CDF2(30462) }, - { AOM_CDF2(146) }, - { AOM_CDF2(5132) }, - { AOM_CDF2(31657) } }, - { { AOM_CDF2(6308) }, - { AOM_CDF2(117) }, - { AOM_CDF2(1638) }, - { AOM_CDF2(2161) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(10923) }, - { AOM_CDF2(30247) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } } }, - { { { AOM_CDF2(30371) }, - { AOM_CDF2(7570) }, - { AOM_CDF2(13155) }, - { AOM_CDF2(20751) }, - { AOM_CDF2(20969) }, - { AOM_CDF2(27067) }, - { AOM_CDF2(32013) }, - { AOM_CDF2(5495) }, - { AOM_CDF2(17942) }, - { AOM_CDF2(28280) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } }, - { { AOM_CDF2(31782) }, - { AOM_CDF2(1836) }, - { AOM_CDF2(10689) }, - { AOM_CDF2(17604) }, - { AOM_CDF2(21622) }, - { AOM_CDF2(27518) }, - { AOM_CDF2(32399) }, - { AOM_CDF2(4419) }, - { AOM_CDF2(16294) }, - { AOM_CDF2(28345) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } }, - { { AOM_CDF2(31901) }, - { AOM_CDF2(10311) }, - { AOM_CDF2(18047) }, - { AOM_CDF2(24806) }, - { AOM_CDF2(23288) }, - { AOM_CDF2(27914) }, - { AOM_CDF2(32296) }, - { AOM_CDF2(4215) }, - { AOM_CDF2(15756) }, - { AOM_CDF2(28341) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } }, - { { AOM_CDF2(26726) }, - { AOM_CDF2(1045) }, - { AOM_CDF2(11703) }, - { AOM_CDF2(20590) }, - { AOM_CDF2(18554) }, - { AOM_CDF2(25970) }, - { AOM_CDF2(31938) }, - { AOM_CDF2(5583) }, - { AOM_CDF2(21313) }, - { AOM_CDF2(29390) }, - { AOM_CDF2(641) }, - { AOM_CDF2(22265) }, - { AOM_CDF2(31452) } }, - { { AOM_CDF2(26584) }, - { AOM_CDF2(188) }, - { AOM_CDF2(8847) }, - { AOM_CDF2(24519) }, - { AOM_CDF2(22938) }, - { AOM_CDF2(30583) }, - { AOM_CDF2(32608) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } } }, - { { { AOM_CDF2(29614) }, - { AOM_CDF2(9068) }, - { AOM_CDF2(12924) }, - { AOM_CDF2(19538) }, - { AOM_CDF2(17737) }, - { AOM_CDF2(24619) }, - { AOM_CDF2(30642) }, - { AOM_CDF2(4119) }, - { AOM_CDF2(16026) }, - { AOM_CDF2(25657) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } }, - { { AOM_CDF2(31957) }, - { AOM_CDF2(3230) }, - { AOM_CDF2(11153) }, - { AOM_CDF2(18123) }, - { AOM_CDF2(20143) }, - { AOM_CDF2(26536) }, - { AOM_CDF2(31986) }, - { AOM_CDF2(3050) }, - { AOM_CDF2(14603) }, - { AOM_CDF2(25155) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } }, - { { AOM_CDF2(32363) }, - { AOM_CDF2(10692) }, - { AOM_CDF2(19090) }, - { AOM_CDF2(24357) }, - { AOM_CDF2(24442) }, - { AOM_CDF2(28312) }, - { AOM_CDF2(32169) }, - { AOM_CDF2(3648) }, - { AOM_CDF2(15690) }, - { AOM_CDF2(26815) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } }, - { { AOM_CDF2(30669) }, - { AOM_CDF2(3832) }, - { AOM_CDF2(11663) }, - { AOM_CDF2(18889) }, - { AOM_CDF2(19782) }, - { AOM_CDF2(23313) }, - { AOM_CDF2(31330) }, - { AOM_CDF2(5124) }, - { AOM_CDF2(18719) }, - { AOM_CDF2(28468) }, - { AOM_CDF2(3082) }, - { AOM_CDF2(20982) }, - { AOM_CDF2(29443) } }, - { { AOM_CDF2(28573) }, - { AOM_CDF2(3183) }, - { AOM_CDF2(17802) }, - { AOM_CDF2(25977) }, - { AOM_CDF2(26677) }, - { AOM_CDF2(27832) }, - { AOM_CDF2(32387) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } } }, - { { { AOM_CDF2(26887) }, - { AOM_CDF2(6729) }, - { AOM_CDF2(10361) }, - { AOM_CDF2(17442) }, - { AOM_CDF2(15045) }, - { AOM_CDF2(22478) }, - { AOM_CDF2(29072) }, - { AOM_CDF2(2713) }, - { AOM_CDF2(11861) }, - { AOM_CDF2(20773) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } }, - { { AOM_CDF2(31903) }, - { AOM_CDF2(2044) }, - { AOM_CDF2(7528) }, - { AOM_CDF2(14618) }, - { AOM_CDF2(16182) }, - { AOM_CDF2(24168) }, - { AOM_CDF2(31037) }, - { AOM_CDF2(2786) }, - { AOM_CDF2(11194) }, - { AOM_CDF2(20155) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } }, - { { AOM_CDF2(32510) }, - { AOM_CDF2(8430) }, - { AOM_CDF2(17318) }, - { AOM_CDF2(24154) }, - { AOM_CDF2(23674) }, - { AOM_CDF2(28789) }, - { AOM_CDF2(32139) }, - { AOM_CDF2(3440) }, - { AOM_CDF2(13117) }, - { AOM_CDF2(22702) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } }, - { { AOM_CDF2(31671) }, - { AOM_CDF2(2056) }, - { AOM_CDF2(11746) }, - { AOM_CDF2(16852) }, - { AOM_CDF2(18635) }, - { AOM_CDF2(24715) }, - { AOM_CDF2(31484) }, - { AOM_CDF2(4656) }, - { AOM_CDF2(16074) }, - { AOM_CDF2(24704) }, - { AOM_CDF2(1806) }, - { AOM_CDF2(14645) }, - { AOM_CDF2(25336) } }, - { { AOM_CDF2(31539) }, - { AOM_CDF2(8433) }, - { AOM_CDF2(20576) }, - { AOM_CDF2(27904) }, - { AOM_CDF2(27852) }, - { AOM_CDF2(30026) }, - { AOM_CDF2(32441) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) } } } }; - -static const aom_cdf_prob - av1_default_eob_extra_cdfs[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES] - [EOB_COEF_CONTEXTS][CDF_SIZE(2)] = { - { { { - { AOM_CDF2(16961) }, - { AOM_CDF2(17223) }, - { AOM_CDF2(7621) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - }, - { - { AOM_CDF2(19069) }, - { AOM_CDF2(22525) }, - { AOM_CDF2(13377) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - } }, - { { - { AOM_CDF2(20401) }, - { AOM_CDF2(17025) }, - { AOM_CDF2(12845) }, - { AOM_CDF2(12873) }, - { AOM_CDF2(14094) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - }, - { - { AOM_CDF2(20681) }, - { AOM_CDF2(20701) }, - { AOM_CDF2(15250) }, - { AOM_CDF2(15017) }, - { AOM_CDF2(14928) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - } }, - { { - { AOM_CDF2(23905) }, - { AOM_CDF2(17194) }, - { AOM_CDF2(16170) }, - { AOM_CDF2(17695) }, - { AOM_CDF2(13826) }, - { AOM_CDF2(15810) }, - { AOM_CDF2(12036) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - }, - { - { AOM_CDF2(23959) }, - { AOM_CDF2(20799) }, - { AOM_CDF2(19021) }, - { AOM_CDF2(16203) }, - { AOM_CDF2(17886) }, - { AOM_CDF2(14144) }, - { AOM_CDF2(12010) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - } }, - { { - { AOM_CDF2(27399) }, - { AOM_CDF2(16327) }, - { AOM_CDF2(18071) }, - { AOM_CDF2(19584) }, - { AOM_CDF2(20721) }, - { AOM_CDF2(18432) }, - { AOM_CDF2(19560) }, - { AOM_CDF2(10150) }, - { AOM_CDF2(8805) }, - }, - { - { AOM_CDF2(24932) }, - { AOM_CDF2(20833) }, - { AOM_CDF2(12027) }, - { AOM_CDF2(16670) }, - { AOM_CDF2(19914) }, - { AOM_CDF2(15106) }, - { AOM_CDF2(17662) }, - { AOM_CDF2(13783) }, - { AOM_CDF2(28756) }, - } }, - { { - { AOM_CDF2(23406) }, - { AOM_CDF2(21845) }, - { AOM_CDF2(18432) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(17096) }, - { AOM_CDF2(12561) }, - { AOM_CDF2(17320) }, - { AOM_CDF2(22395) }, - { AOM_CDF2(21370) }, - }, - { - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - } } }, - { { { - { AOM_CDF2(17471) }, - { AOM_CDF2(20223) }, - { AOM_CDF2(11357) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - }, - { - { AOM_CDF2(20335) }, - { AOM_CDF2(21667) }, - { AOM_CDF2(14818) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - } }, - { { - { AOM_CDF2(20430) }, - { AOM_CDF2(20662) }, - { AOM_CDF2(15367) }, - { AOM_CDF2(16970) }, - { AOM_CDF2(14657) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - }, - { - { AOM_CDF2(22117) }, - { AOM_CDF2(22028) }, - { AOM_CDF2(18650) }, - { AOM_CDF2(16042) }, - { AOM_CDF2(15885) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - } }, - { { - { AOM_CDF2(22409) }, - { AOM_CDF2(21012) }, - { AOM_CDF2(15650) }, - { AOM_CDF2(17395) }, - { AOM_CDF2(15469) }, - { AOM_CDF2(20205) }, - { AOM_CDF2(19511) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - }, - { - { AOM_CDF2(24220) }, - { AOM_CDF2(22480) }, - { AOM_CDF2(17737) }, - { AOM_CDF2(18916) }, - { AOM_CDF2(19268) }, - { AOM_CDF2(18412) }, - { AOM_CDF2(18844) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - } }, - { { - { AOM_CDF2(25991) }, - { AOM_CDF2(20314) }, - { AOM_CDF2(17731) }, - { AOM_CDF2(19678) }, - { AOM_CDF2(18649) }, - { AOM_CDF2(17307) }, - { AOM_CDF2(21798) }, - { AOM_CDF2(17549) }, - { AOM_CDF2(15630) }, - }, - { - { AOM_CDF2(26585) }, - { AOM_CDF2(21469) }, - { AOM_CDF2(20432) }, - { AOM_CDF2(17735) }, - { AOM_CDF2(19280) }, - { AOM_CDF2(15235) }, - { AOM_CDF2(20297) }, - { AOM_CDF2(22471) }, - { AOM_CDF2(28997) }, - } }, - { { - { AOM_CDF2(26605) }, - { AOM_CDF2(11304) }, - { AOM_CDF2(16726) }, - { AOM_CDF2(16560) }, - { AOM_CDF2(20866) }, - { AOM_CDF2(23524) }, - { AOM_CDF2(19878) }, - { AOM_CDF2(13469) }, - { AOM_CDF2(23084) }, - }, - { - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - } } }, - { { { - { AOM_CDF2(18983) }, - { AOM_CDF2(20512) }, - { AOM_CDF2(14885) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - }, - { - { AOM_CDF2(20090) }, - { AOM_CDF2(19444) }, - { AOM_CDF2(17286) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - } }, - { { - { AOM_CDF2(19139) }, - { AOM_CDF2(21487) }, - { AOM_CDF2(18959) }, - { AOM_CDF2(20910) }, - { AOM_CDF2(19089) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - }, - { - { AOM_CDF2(20536) }, - { AOM_CDF2(20664) }, - { AOM_CDF2(20625) }, - { AOM_CDF2(19123) }, - { AOM_CDF2(14862) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - } }, - { { - { AOM_CDF2(19833) }, - { AOM_CDF2(21502) }, - { AOM_CDF2(17485) }, - { AOM_CDF2(20267) }, - { AOM_CDF2(18353) }, - { AOM_CDF2(23329) }, - { AOM_CDF2(21478) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - }, - { - { AOM_CDF2(22041) }, - { AOM_CDF2(23434) }, - { AOM_CDF2(20001) }, - { AOM_CDF2(20554) }, - { AOM_CDF2(20951) }, - { AOM_CDF2(20145) }, - { AOM_CDF2(15562) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - } }, - { { - { AOM_CDF2(23312) }, - { AOM_CDF2(21607) }, - { AOM_CDF2(16526) }, - { AOM_CDF2(18957) }, - { AOM_CDF2(18034) }, - { AOM_CDF2(18934) }, - { AOM_CDF2(24247) }, - { AOM_CDF2(16921) }, - { AOM_CDF2(17080) }, - }, - { - { AOM_CDF2(26579) }, - { AOM_CDF2(24910) }, - { AOM_CDF2(18637) }, - { AOM_CDF2(19800) }, - { AOM_CDF2(20388) }, - { AOM_CDF2(9887) }, - { AOM_CDF2(15642) }, - { AOM_CDF2(30198) }, - { AOM_CDF2(24721) }, - } }, - { { - { AOM_CDF2(26998) }, - { AOM_CDF2(16737) }, - { AOM_CDF2(17838) }, - { AOM_CDF2(18922) }, - { AOM_CDF2(19515) }, - { AOM_CDF2(18636) }, - { AOM_CDF2(17333) }, - { AOM_CDF2(15776) }, - { AOM_CDF2(22658) }, - }, - { - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - } } }, - { { { - { AOM_CDF2(20177) }, - { AOM_CDF2(20789) }, - { AOM_CDF2(20262) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - }, - { - { AOM_CDF2(21416) }, - { AOM_CDF2(20855) }, - { AOM_CDF2(23410) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - } }, - { { - { AOM_CDF2(20238) }, - { AOM_CDF2(21057) }, - { AOM_CDF2(19159) }, - { AOM_CDF2(22337) }, - { AOM_CDF2(20159) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - }, - { - { AOM_CDF2(20125) }, - { AOM_CDF2(20559) }, - { AOM_CDF2(21707) }, - { AOM_CDF2(22296) }, - { AOM_CDF2(17333) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - } }, - { { - { AOM_CDF2(19941) }, - { AOM_CDF2(20527) }, - { AOM_CDF2(21470) }, - { AOM_CDF2(22487) }, - { AOM_CDF2(19558) }, - { AOM_CDF2(22354) }, - { AOM_CDF2(20331) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - }, - { - { AOM_CDF2(22752) }, - { AOM_CDF2(25006) }, - { AOM_CDF2(22075) }, - { AOM_CDF2(21576) }, - { AOM_CDF2(17740) }, - { AOM_CDF2(21690) }, - { AOM_CDF2(19211) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - } }, - { { - { AOM_CDF2(21442) }, - { AOM_CDF2(22358) }, - { AOM_CDF2(18503) }, - { AOM_CDF2(20291) }, - { AOM_CDF2(19945) }, - { AOM_CDF2(21294) }, - { AOM_CDF2(21178) }, - { AOM_CDF2(19400) }, - { AOM_CDF2(10556) }, - }, - { - { AOM_CDF2(24648) }, - { AOM_CDF2(24949) }, - { AOM_CDF2(20708) }, - { AOM_CDF2(23905) }, - { AOM_CDF2(20501) }, - { AOM_CDF2(9558) }, - { AOM_CDF2(9423) }, - { AOM_CDF2(30365) }, - { AOM_CDF2(19253) }, - } }, - { { - { AOM_CDF2(26064) }, - { AOM_CDF2(22098) }, - { AOM_CDF2(19613) }, - { AOM_CDF2(20525) }, - { AOM_CDF2(17595) }, - { AOM_CDF2(16618) }, - { AOM_CDF2(20497) }, - { AOM_CDF2(18989) }, - { AOM_CDF2(15513) }, - }, - { - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - { AOM_CDF2(16384) }, - } } } - }; - -static const aom_cdf_prob - av1_default_eob_multi16_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE( - 5)] = { { { { AOM_CDF5(840, 1039, 1980, 4895) }, - { AOM_CDF5(370, 671, 1883, 4471) } }, - { { AOM_CDF5(3247, 4950, 9688, 14563) }, - { AOM_CDF5(1904, 3354, 7763, 14647) } } }, - { { { AOM_CDF5(2125, 2551, 5165, 8946) }, - { AOM_CDF5(513, 765, 1859, 6339) } }, - { { AOM_CDF5(7637, 9498, 14259, 19108) }, - { AOM_CDF5(2497, 4096, 8866, 16993) } } }, - { { { AOM_CDF5(4016, 4897, 8881, 14968) }, - { AOM_CDF5(716, 1105, 2646, 10056) } }, - { { AOM_CDF5(11139, 13270, 18241, 23566) }, - { AOM_CDF5(3192, 5032, 10297, 19755) } } }, - { { { AOM_CDF5(6708, 8958, 14746, 22133) }, - { AOM_CDF5(1222, 2074, 4783, 15410) } }, - { { AOM_CDF5(19575, 21766, 26044, 29709) }, - { AOM_CDF5(7297, 10767, 19273, 28194) } } } }; - -static const aom_cdf_prob - av1_default_eob_multi32_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE( - 6)] = { { { { AOM_CDF6(400, 520, 977, 2102, 6542) }, - { AOM_CDF6(210, 405, 1315, 3326, 7537) } }, - { { AOM_CDF6(2636, 4273, 7588, 11794, 20401) }, - { AOM_CDF6(1786, 3179, 6902, 11357, 19054) } } }, - { { { AOM_CDF6(989, 1249, 2019, 4151, 10785) }, - { AOM_CDF6(313, 441, 1099, 2917, 8562) } }, - { { AOM_CDF6(8394, 10352, 13932, 18855, 26014) }, - { AOM_CDF6(2578, 4124, 8181, 13670, 24234) } } }, - { { { AOM_CDF6(2515, 3003, 4452, 8162, 16041) }, - { AOM_CDF6(574, 821, 1836, 5089, 13128) } }, - { { AOM_CDF6(13468, 16303, 20361, 25105, 29281) }, - { AOM_CDF6(3542, 5502, 10415, 16760, 25644) } } }, - { { { AOM_CDF6(4617, 5709, 8446, 13584, 23135) }, - { AOM_CDF6(1156, 1702, 3675, 9274, 20539) } }, - { { AOM_CDF6(22086, 24282, 27010, 29770, 31743) }, - { AOM_CDF6(7699, 10897, 20891, 26926, 31628) } } } }; - -static const aom_cdf_prob - av1_default_eob_multi64_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE( - 7)] = { { { { AOM_CDF7(329, 498, 1101, 1784, 3265, 7758) }, - { AOM_CDF7(335, 730, 1459, 5494, 8755, 12997) } }, - { { AOM_CDF7(3505, 5304, 10086, 13814, 17684, 23370) }, - { AOM_CDF7(1563, 2700, 4876, 10911, 14706, 22480) } } }, - { { { AOM_CDF7(1260, 1446, 2253, 3712, 6652, 13369) }, - { AOM_CDF7(401, 605, 1029, 2563, 5845, 12626) } }, - { { AOM_CDF7(8609, 10612, 14624, 18714, 22614, 29024) }, - { AOM_CDF7(1923, 3127, 5867, 9703, 14277, 27100) } } }, - { { { AOM_CDF7(2374, 2772, 4583, 7276, 12288, 19706) }, - { AOM_CDF7(497, 810, 1315, 3000, 7004, 15641) } }, - { { AOM_CDF7(15050, 17126, 21410, 24886, 28156, 30726) }, - { AOM_CDF7(4034, 6290, 10235, 14982, 21214, 28491) } } }, - { { { AOM_CDF7(6307, 7541, 12060, 16358, 22553, 27865) }, - { AOM_CDF7(1289, 2320, 3971, 7926, 14153, 24291) } }, - { { AOM_CDF7(24212, 25708, 28268, 30035, 31307, 32049) }, - { AOM_CDF7(8726, 12378, 19409, 26450, 30038, 32462) } } } }; - -static const aom_cdf_prob - av1_default_eob_multi128_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE( - 8)] = { - { { { AOM_CDF8(219, 482, 1140, 2091, 3680, 6028, 12586) }, - { AOM_CDF8(371, 699, 1254, 4830, 9479, 12562, 17497) } }, - { { AOM_CDF8(5245, 7456, 12880, 15852, 20033, 23932, 27608) }, - { AOM_CDF8(2054, 3472, 5869, 14232, 18242, 20590, 26752) } } }, - { { { AOM_CDF8(685, 933, 1488, 2714, 4766, 8562, 19254) }, - { AOM_CDF8(217, 352, 618, 2303, 5261, 9969, 17472) } }, - { { AOM_CDF8(8045, 11200, 15497, 19595, 23948, 27408, 30938) }, - { AOM_CDF8(2310, 4160, 7471, 14997, 17931, 20768, 30240) } } }, - { { { AOM_CDF8(1366, 1738, 2527, 5016, 9355, 15797, 24643) }, - { AOM_CDF8(354, 558, 944, 2760, 7287, 14037, 21779) } }, - { { AOM_CDF8(13627, 16246, 20173, 24429, 27948, 30415, 31863) }, - { AOM_CDF8(6275, 9889, 14769, 23164, 27988, 30493, 32272) } } }, - { { { AOM_CDF8(3472, 4885, 7489, 12481, 18517, 24536, 29635) }, - { AOM_CDF8(886, 1731, 3271, 8469, 15569, 22126, 28383) } }, - { { AOM_CDF8(24313, 26062, 28385, 30107, 31217, 31898, 32345) }, - { AOM_CDF8(9165, 13282, 21150, 30286, 31894, 32571, 32712) } } } - }; - -static const aom_cdf_prob - av1_default_eob_multi256_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE( - 9)] = { - { { { AOM_CDF9(310, 584, 1887, 3589, 6168, 8611, 11352, 15652) }, - { AOM_CDF9(998, 1850, 2998, 5604, 17341, 19888, 22899, 25583) } }, - { { AOM_CDF9(2520, 3240, 5952, 8870, 12577, 17558, 19954, 24168) }, - { AOM_CDF9(2203, 4130, 7435, 10739, 20652, 23681, 25609, 27261) } } }, - { { { AOM_CDF9(1448, 2109, 4151, 6263, 9329, 13260, 17944, 23300) }, - { AOM_CDF9(399, 1019, 1749, 3038, 10444, 15546, 22739, 27294) } }, - { { AOM_CDF9(6402, 8148, 12623, 15072, 18728, 22847, 26447, 29377) }, - { AOM_CDF9(1674, 3252, 5734, 10159, 22397, 23802, 24821, 30940) } } }, - { { { AOM_CDF9(3089, 3920, 6038, 9460, 14266, 19881, 25766, 29176) }, - { AOM_CDF9(1084, 2358, 3488, 5122, 11483, 18103, 26023, 29799) } }, - { { AOM_CDF9(11514, 13794, 17480, 20754, 24361, 27378, 29492, 31277) }, - { AOM_CDF9(6571, 9610, 15516, 21826, 29092, 30829, 31842, - 32708) } } }, - { { { AOM_CDF9(5348, 7113, 11820, 15924, 22106, 26777, 30334, 31757) }, - { AOM_CDF9(2453, 4474, 6307, 8777, 16474, 22975, 29000, 31547) } }, - { { AOM_CDF9(23110, 24597, 27140, 28894, 30167, 30927, 31392, 32094) }, - { AOM_CDF9(9998, 17661, 25178, 28097, 31308, 32038, 32403, - 32695) } } } - }; - -static const aom_cdf_prob - av1_default_eob_multi512_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE( - 10)] = { { { { AOM_CDF10(641, 983, 3707, 5430, 10234, 14958, 18788, - 23412, 26061) }, - { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938, - 26214, 29491) } }, - { { AOM_CDF10(5095, 6446, 9996, 13354, 16017, 17986, 20919, - 26129, 29140) }, - { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938, - 26214, 29491) } } }, - { { { AOM_CDF10(1230, 2278, 5035, 7776, 11871, 15346, 19590, - 24584, 28749) }, - { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938, - 26214, 29491) } }, - { { AOM_CDF10(7265, 9979, 15819, 19250, 21780, 23846, 26478, - 28396, 31811) }, - { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938, - 26214, 29491) } } }, - { { { AOM_CDF10(2624, 3936, 6480, 9686, 13979, 17726, 23267, - 28410, 31078) }, - { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938, - 26214, 29491) } }, - { { AOM_CDF10(12015, 14769, 19588, 22052, 24222, 25812, - 27300, 29219, 32114) }, - { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938, - 26214, 29491) } } }, - { { { AOM_CDF10(5927, 7809, 10923, 14597, 19439, 24135, 28456, - 31142, 32060) }, - { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938, - 26214, 29491) } }, - { { AOM_CDF10(21093, 23043, 25742, 27658, 29097, 29716, - 30073, 30820, 31956) }, - { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938, - 26214, 29491) } } } }; - -static const aom_cdf_prob - av1_default_eob_multi1024_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE( - 11)] = { { { { AOM_CDF11(393, 421, 751, 1623, 3160, 6352, 13345, 18047, - 22571, 25830) }, - { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852, - 23831, 26810, 29789) } }, - { { AOM_CDF11(1865, 1988, 2930, 4242, 10533, 16538, 21354, - 27255, 28546, 31784) }, - { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852, - 23831, 26810, 29789) } } }, - { { { AOM_CDF11(696, 948, 3145, 5702, 9706, 13217, 17851, - 21856, 25692, 28034) }, - { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852, - 23831, 26810, 29789) } }, - { { AOM_CDF11(2672, 3591, 9330, 17084, 22725, 24284, 26527, - 28027, 28377, 30876) }, - { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852, - 23831, 26810, 29789) } } }, - { { { AOM_CDF11(2784, 3831, 7041, 10521, 14847, 18844, 23155, - 26682, 29229, 31045) }, - { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852, - 23831, 26810, 29789) } }, - { { AOM_CDF11(9577, 12466, 17739, 20750, 22061, 23215, 24601, - 25483, 25843, 32056) }, - { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852, - 23831, 26810, 29789) } } }, - { { { AOM_CDF11(6698, 8334, 11961, 15762, 20186, 23862, 27434, - 29326, 31082, 32050) }, - { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852, - 23831, 26810, 29789) } }, - { { AOM_CDF11(20569, 22426, 25569, 26859, 28053, 28913, - 29486, 29724, 29807, 32570) }, - { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852, - 23831, 26810, 29789) } } } }; - -static const aom_cdf_prob av1_default_coeff_lps_multi_cdfs - [TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS] - [CDF_SIZE(BR_CDF_SIZE)] = { - { { { { AOM_CDF4(14298, 20718, 24174) }, - { AOM_CDF4(12536, 19601, 23789) }, - { AOM_CDF4(8712, 15051, 19503) }, - { AOM_CDF4(6170, 11327, 15434) }, - { AOM_CDF4(4742, 8926, 12538) }, - { AOM_CDF4(3803, 7317, 10546) }, - { AOM_CDF4(1696, 3317, 4871) }, - { AOM_CDF4(14392, 19951, 22756) }, - { AOM_CDF4(15978, 23218, 26818) }, - { AOM_CDF4(12187, 19474, 23889) }, - { AOM_CDF4(9176, 15640, 20259) }, - { AOM_CDF4(7068, 12655, 17028) }, - { AOM_CDF4(5656, 10442, 14472) }, - { AOM_CDF4(2580, 4992, 7244) }, - { AOM_CDF4(12136, 18049, 21426) }, - { AOM_CDF4(13784, 20721, 24481) }, - { AOM_CDF4(10836, 17621, 21900) }, - { AOM_CDF4(8372, 14444, 18847) }, - { AOM_CDF4(6523, 11779, 16000) }, - { AOM_CDF4(5337, 9898, 13760) }, - { AOM_CDF4(3034, 5860, 8462) } }, - { { AOM_CDF4(15967, 22905, 26286) }, - { AOM_CDF4(13534, 20654, 24579) }, - { AOM_CDF4(9504, 16092, 20535) }, - { AOM_CDF4(6975, 12568, 16903) }, - { AOM_CDF4(5364, 10091, 14020) }, - { AOM_CDF4(4357, 8370, 11857) }, - { AOM_CDF4(2506, 4934, 7218) }, - { AOM_CDF4(23032, 28815, 30936) }, - { AOM_CDF4(19540, 26704, 29719) }, - { AOM_CDF4(15158, 22969, 27097) }, - { AOM_CDF4(11408, 18865, 23650) }, - { AOM_CDF4(8885, 15448, 20250) }, - { AOM_CDF4(7108, 12853, 17416) }, - { AOM_CDF4(4231, 8041, 11480) }, - { AOM_CDF4(19823, 26490, 29156) }, - { AOM_CDF4(18890, 25929, 28932) }, - { AOM_CDF4(15660, 23491, 27433) }, - { AOM_CDF4(12147, 19776, 24488) }, - { AOM_CDF4(9728, 16774, 21649) }, - { AOM_CDF4(7919, 14277, 19066) }, - { AOM_CDF4(5440, 10170, 14185) } } }, - { { { AOM_CDF4(14406, 20862, 24414) }, - { AOM_CDF4(11824, 18907, 23109) }, - { AOM_CDF4(8257, 14393, 18803) }, - { AOM_CDF4(5860, 10747, 14778) }, - { AOM_CDF4(4475, 8486, 11984) }, - { AOM_CDF4(3606, 6954, 10043) }, - { AOM_CDF4(1736, 3410, 5048) }, - { AOM_CDF4(14430, 20046, 22882) }, - { AOM_CDF4(15593, 22899, 26709) }, - { AOM_CDF4(12102, 19368, 23811) }, - { AOM_CDF4(9059, 15584, 20262) }, - { AOM_CDF4(6999, 12603, 17048) }, - { AOM_CDF4(5684, 10497, 14553) }, - { AOM_CDF4(2822, 5438, 7862) }, - { AOM_CDF4(15785, 21585, 24359) }, - { AOM_CDF4(18347, 25229, 28266) }, - { AOM_CDF4(14974, 22487, 26389) }, - { AOM_CDF4(11423, 18681, 23271) }, - { AOM_CDF4(8863, 15350, 20008) }, - { AOM_CDF4(7153, 12852, 17278) }, - { AOM_CDF4(3707, 7036, 9982) } }, - { { AOM_CDF4(15460, 21696, 25469) }, - { AOM_CDF4(12170, 19249, 23191) }, - { AOM_CDF4(8723, 15027, 19332) }, - { AOM_CDF4(6428, 11704, 15874) }, - { AOM_CDF4(4922, 9292, 13052) }, - { AOM_CDF4(4139, 7695, 11010) }, - { AOM_CDF4(2291, 4508, 6598) }, - { AOM_CDF4(19856, 26920, 29828) }, - { AOM_CDF4(17923, 25289, 28792) }, - { AOM_CDF4(14278, 21968, 26297) }, - { AOM_CDF4(10910, 18136, 22950) }, - { AOM_CDF4(8423, 14815, 19627) }, - { AOM_CDF4(6771, 12283, 16774) }, - { AOM_CDF4(4074, 7750, 11081) }, - { AOM_CDF4(19852, 26074, 28672) }, - { AOM_CDF4(19371, 26110, 28989) }, - { AOM_CDF4(16265, 23873, 27663) }, - { AOM_CDF4(12758, 20378, 24952) }, - { AOM_CDF4(10095, 17098, 21961) }, - { AOM_CDF4(8250, 14628, 19451) }, - { AOM_CDF4(5205, 9745, 13622) } } }, - { { { AOM_CDF4(10563, 16233, 19763) }, - { AOM_CDF4(9794, 16022, 19804) }, - { AOM_CDF4(6750, 11945, 15759) }, - { AOM_CDF4(4963, 9186, 12752) }, - { AOM_CDF4(3845, 7435, 10627) }, - { AOM_CDF4(3051, 6085, 8834) }, - { AOM_CDF4(1311, 2596, 3830) }, - { AOM_CDF4(11246, 16404, 19689) }, - { AOM_CDF4(12315, 18911, 22731) }, - { AOM_CDF4(10557, 17095, 21289) }, - { AOM_CDF4(8136, 14006, 18249) }, - { AOM_CDF4(6348, 11474, 15565) }, - { AOM_CDF4(5196, 9655, 13400) }, - { AOM_CDF4(2349, 4526, 6587) }, - { AOM_CDF4(13337, 18730, 21569) }, - { AOM_CDF4(19306, 26071, 28882) }, - { AOM_CDF4(15952, 23540, 27254) }, - { AOM_CDF4(12409, 19934, 24430) }, - { AOM_CDF4(9760, 16706, 21389) }, - { AOM_CDF4(8004, 14220, 18818) }, - { AOM_CDF4(4138, 7794, 10961) } }, - { { AOM_CDF4(10870, 16684, 20949) }, - { AOM_CDF4(9664, 15230, 18680) }, - { AOM_CDF4(6886, 12109, 15408) }, - { AOM_CDF4(4825, 8900, 12305) }, - { AOM_CDF4(3630, 7162, 10314) }, - { AOM_CDF4(3036, 6429, 9387) }, - { AOM_CDF4(1671, 3296, 4940) }, - { AOM_CDF4(13819, 19159, 23026) }, - { AOM_CDF4(11984, 19108, 23120) }, - { AOM_CDF4(10690, 17210, 21663) }, - { AOM_CDF4(7984, 14154, 18333) }, - { AOM_CDF4(6868, 12294, 16124) }, - { AOM_CDF4(5274, 8994, 12868) }, - { AOM_CDF4(2988, 5771, 8424) }, - { AOM_CDF4(19736, 26647, 29141) }, - { AOM_CDF4(18933, 26070, 28984) }, - { AOM_CDF4(15779, 23048, 27200) }, - { AOM_CDF4(12638, 20061, 24532) }, - { AOM_CDF4(10692, 17545, 22220) }, - { AOM_CDF4(9217, 15251, 20054) }, - { AOM_CDF4(5078, 9284, 12594) } } }, - { { { AOM_CDF4(2331, 3662, 5244) }, - { AOM_CDF4(2891, 4771, 6145) }, - { AOM_CDF4(4598, 7623, 9729) }, - { AOM_CDF4(3520, 6845, 9199) }, - { AOM_CDF4(3417, 6119, 9324) }, - { AOM_CDF4(2601, 5412, 7385) }, - { AOM_CDF4(600, 1173, 1744) }, - { AOM_CDF4(7672, 13286, 17469) }, - { AOM_CDF4(4232, 7792, 10793) }, - { AOM_CDF4(2915, 5317, 7397) }, - { AOM_CDF4(2318, 4356, 6152) }, - { AOM_CDF4(2127, 4000, 5554) }, - { AOM_CDF4(1850, 3478, 5275) }, - { AOM_CDF4(977, 1933, 2843) }, - { AOM_CDF4(18280, 24387, 27989) }, - { AOM_CDF4(15852, 22671, 26185) }, - { AOM_CDF4(13845, 20951, 24789) }, - { AOM_CDF4(11055, 17966, 22129) }, - { AOM_CDF4(9138, 15422, 19801) }, - { AOM_CDF4(7454, 13145, 17456) }, - { AOM_CDF4(3370, 6393, 9013) } }, - { { AOM_CDF4(5842, 9229, 10838) }, - { AOM_CDF4(2313, 3491, 4276) }, - { AOM_CDF4(2998, 6104, 7496) }, - { AOM_CDF4(2420, 7447, 9868) }, - { AOM_CDF4(3034, 8495, 10923) }, - { AOM_CDF4(4076, 8937, 10975) }, - { AOM_CDF4(1086, 2370, 3299) }, - { AOM_CDF4(9714, 17254, 20444) }, - { AOM_CDF4(8543, 13698, 17123) }, - { AOM_CDF4(4918, 9007, 11910) }, - { AOM_CDF4(4129, 7532, 10553) }, - { AOM_CDF4(2364, 5533, 8058) }, - { AOM_CDF4(1834, 3546, 5563) }, - { AOM_CDF4(1473, 2908, 4133) }, - { AOM_CDF4(15405, 21193, 25619) }, - { AOM_CDF4(15691, 21952, 26561) }, - { AOM_CDF4(12962, 19194, 24165) }, - { AOM_CDF4(10272, 17855, 22129) }, - { AOM_CDF4(8588, 15270, 20718) }, - { AOM_CDF4(8682, 14669, 19500) }, - { AOM_CDF4(4870, 9636, 13205) } } }, - { { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } } } }, - { { { { AOM_CDF4(14995, 21341, 24749) }, - { AOM_CDF4(13158, 20289, 24601) }, - { AOM_CDF4(8941, 15326, 19876) }, - { AOM_CDF4(6297, 11541, 15807) }, - { AOM_CDF4(4817, 9029, 12776) }, - { AOM_CDF4(3731, 7273, 10627) }, - { AOM_CDF4(1847, 3617, 5354) }, - { AOM_CDF4(14472, 19659, 22343) }, - { AOM_CDF4(16806, 24162, 27533) }, - { AOM_CDF4(12900, 20404, 24713) }, - { AOM_CDF4(9411, 16112, 20797) }, - { AOM_CDF4(7056, 12697, 17148) }, - { AOM_CDF4(5544, 10339, 14460) }, - { AOM_CDF4(2954, 5704, 8319) }, - { AOM_CDF4(12464, 18071, 21354) }, - { AOM_CDF4(15482, 22528, 26034) }, - { AOM_CDF4(12070, 19269, 23624) }, - { AOM_CDF4(8953, 15406, 20106) }, - { AOM_CDF4(7027, 12730, 17220) }, - { AOM_CDF4(5887, 10913, 15140) }, - { AOM_CDF4(3793, 7278, 10447) } }, - { { AOM_CDF4(15571, 22232, 25749) }, - { AOM_CDF4(14506, 21575, 25374) }, - { AOM_CDF4(10189, 17089, 21569) }, - { AOM_CDF4(7316, 13301, 17915) }, - { AOM_CDF4(5783, 10912, 15190) }, - { AOM_CDF4(4760, 9155, 13088) }, - { AOM_CDF4(2993, 5966, 8774) }, - { AOM_CDF4(23424, 28903, 30778) }, - { AOM_CDF4(20775, 27666, 30290) }, - { AOM_CDF4(16474, 24410, 28299) }, - { AOM_CDF4(12471, 20180, 24987) }, - { AOM_CDF4(9410, 16487, 21439) }, - { AOM_CDF4(7536, 13614, 18529) }, - { AOM_CDF4(5048, 9586, 13549) }, - { AOM_CDF4(21090, 27290, 29756) }, - { AOM_CDF4(20796, 27402, 30026) }, - { AOM_CDF4(17819, 25485, 28969) }, - { AOM_CDF4(13860, 21909, 26462) }, - { AOM_CDF4(11002, 18494, 23529) }, - { AOM_CDF4(8953, 15929, 20897) }, - { AOM_CDF4(6448, 11918, 16454) } } }, - { { { AOM_CDF4(15999, 22208, 25449) }, - { AOM_CDF4(13050, 19988, 24122) }, - { AOM_CDF4(8594, 14864, 19378) }, - { AOM_CDF4(6033, 11079, 15238) }, - { AOM_CDF4(4554, 8683, 12347) }, - { AOM_CDF4(3672, 7139, 10337) }, - { AOM_CDF4(1900, 3771, 5576) }, - { AOM_CDF4(15788, 21340, 23949) }, - { AOM_CDF4(16825, 24235, 27758) }, - { AOM_CDF4(12873, 20402, 24810) }, - { AOM_CDF4(9590, 16363, 21094) }, - { AOM_CDF4(7352, 13209, 17733) }, - { AOM_CDF4(5960, 10989, 15184) }, - { AOM_CDF4(3232, 6234, 9007) }, - { AOM_CDF4(15761, 20716, 23224) }, - { AOM_CDF4(19318, 25989, 28759) }, - { AOM_CDF4(15529, 23094, 26929) }, - { AOM_CDF4(11662, 18989, 23641) }, - { AOM_CDF4(8955, 15568, 20366) }, - { AOM_CDF4(7281, 13106, 17708) }, - { AOM_CDF4(4248, 8059, 11440) } }, - { { AOM_CDF4(14899, 21217, 24503) }, - { AOM_CDF4(13519, 20283, 24047) }, - { AOM_CDF4(9429, 15966, 20365) }, - { AOM_CDF4(6700, 12355, 16652) }, - { AOM_CDF4(5088, 9704, 13716) }, - { AOM_CDF4(4243, 8154, 11731) }, - { AOM_CDF4(2702, 5364, 7861) }, - { AOM_CDF4(22745, 28388, 30454) }, - { AOM_CDF4(20235, 27146, 29922) }, - { AOM_CDF4(15896, 23715, 27637) }, - { AOM_CDF4(11840, 19350, 24131) }, - { AOM_CDF4(9122, 15932, 20880) }, - { AOM_CDF4(7488, 13581, 18362) }, - { AOM_CDF4(5114, 9568, 13370) }, - { AOM_CDF4(20845, 26553, 28932) }, - { AOM_CDF4(20981, 27372, 29884) }, - { AOM_CDF4(17781, 25335, 28785) }, - { AOM_CDF4(13760, 21708, 26297) }, - { AOM_CDF4(10975, 18415, 23365) }, - { AOM_CDF4(9045, 15789, 20686) }, - { AOM_CDF4(6130, 11199, 15423) } } }, - { { { AOM_CDF4(13549, 19724, 23158) }, - { AOM_CDF4(11844, 18382, 22246) }, - { AOM_CDF4(7919, 13619, 17773) }, - { AOM_CDF4(5486, 10143, 13946) }, - { AOM_CDF4(4166, 7983, 11324) }, - { AOM_CDF4(3364, 6506, 9427) }, - { AOM_CDF4(1598, 3160, 4674) }, - { AOM_CDF4(15281, 20979, 23781) }, - { AOM_CDF4(14939, 22119, 25952) }, - { AOM_CDF4(11363, 18407, 22812) }, - { AOM_CDF4(8609, 14857, 19370) }, - { AOM_CDF4(6737, 12184, 16480) }, - { AOM_CDF4(5506, 10263, 14262) }, - { AOM_CDF4(2990, 5786, 8380) }, - { AOM_CDF4(20249, 25253, 27417) }, - { AOM_CDF4(21070, 27518, 30001) }, - { AOM_CDF4(16854, 24469, 28074) }, - { AOM_CDF4(12864, 20486, 25000) }, - { AOM_CDF4(9962, 16978, 21778) }, - { AOM_CDF4(8074, 14338, 19048) }, - { AOM_CDF4(4494, 8479, 11906) } }, - { { AOM_CDF4(13960, 19617, 22829) }, - { AOM_CDF4(11150, 17341, 21228) }, - { AOM_CDF4(7150, 12964, 17190) }, - { AOM_CDF4(5331, 10002, 13867) }, - { AOM_CDF4(4167, 7744, 11057) }, - { AOM_CDF4(3480, 6629, 9646) }, - { AOM_CDF4(1883, 3784, 5686) }, - { AOM_CDF4(18752, 25660, 28912) }, - { AOM_CDF4(16968, 24586, 28030) }, - { AOM_CDF4(13520, 21055, 25313) }, - { AOM_CDF4(10453, 17626, 22280) }, - { AOM_CDF4(8386, 14505, 19116) }, - { AOM_CDF4(6742, 12595, 17008) }, - { AOM_CDF4(4273, 8140, 11499) }, - { AOM_CDF4(22120, 27827, 30233) }, - { AOM_CDF4(20563, 27358, 29895) }, - { AOM_CDF4(17076, 24644, 28153) }, - { AOM_CDF4(13362, 20942, 25309) }, - { AOM_CDF4(10794, 17965, 22695) }, - { AOM_CDF4(9014, 15652, 20319) }, - { AOM_CDF4(5708, 10512, 14497) } } }, - { { { AOM_CDF4(5705, 10930, 15725) }, - { AOM_CDF4(7946, 12765, 16115) }, - { AOM_CDF4(6801, 12123, 16226) }, - { AOM_CDF4(5462, 10135, 14200) }, - { AOM_CDF4(4189, 8011, 11507) }, - { AOM_CDF4(3191, 6229, 9408) }, - { AOM_CDF4(1057, 2137, 3212) }, - { AOM_CDF4(10018, 17067, 21491) }, - { AOM_CDF4(7380, 12582, 16453) }, - { AOM_CDF4(6068, 10845, 14339) }, - { AOM_CDF4(5098, 9198, 12555) }, - { AOM_CDF4(4312, 8010, 11119) }, - { AOM_CDF4(3700, 6966, 9781) }, - { AOM_CDF4(1693, 3326, 4887) }, - { AOM_CDF4(18757, 24930, 27774) }, - { AOM_CDF4(17648, 24596, 27817) }, - { AOM_CDF4(14707, 22052, 26026) }, - { AOM_CDF4(11720, 18852, 23292) }, - { AOM_CDF4(9357, 15952, 20525) }, - { AOM_CDF4(7810, 13753, 18210) }, - { AOM_CDF4(3879, 7333, 10328) } }, - { { AOM_CDF4(8278, 13242, 15922) }, - { AOM_CDF4(10547, 15867, 18919) }, - { AOM_CDF4(9106, 15842, 20609) }, - { AOM_CDF4(6833, 13007, 17218) }, - { AOM_CDF4(4811, 9712, 13923) }, - { AOM_CDF4(3985, 7352, 11128) }, - { AOM_CDF4(1688, 3458, 5262) }, - { AOM_CDF4(12951, 21861, 26510) }, - { AOM_CDF4(9788, 16044, 20276) }, - { AOM_CDF4(6309, 11244, 14870) }, - { AOM_CDF4(5183, 9349, 12566) }, - { AOM_CDF4(4389, 8229, 11492) }, - { AOM_CDF4(3633, 6945, 10620) }, - { AOM_CDF4(3600, 6847, 9907) }, - { AOM_CDF4(21748, 28137, 30255) }, - { AOM_CDF4(19436, 26581, 29560) }, - { AOM_CDF4(16359, 24201, 27953) }, - { AOM_CDF4(13961, 21693, 25871) }, - { AOM_CDF4(11544, 18686, 23322) }, - { AOM_CDF4(9372, 16462, 20952) }, - { AOM_CDF4(6138, 11210, 15390) } } }, - { { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } } } }, - { { { { AOM_CDF4(16138, 22223, 25509) }, - { AOM_CDF4(15347, 22430, 26332) }, - { AOM_CDF4(9614, 16736, 21332) }, - { AOM_CDF4(6600, 12275, 16907) }, - { AOM_CDF4(4811, 9424, 13547) }, - { AOM_CDF4(3748, 7809, 11420) }, - { AOM_CDF4(2254, 4587, 6890) }, - { AOM_CDF4(15196, 20284, 23177) }, - { AOM_CDF4(18317, 25469, 28451) }, - { AOM_CDF4(13918, 21651, 25842) }, - { AOM_CDF4(10052, 17150, 21995) }, - { AOM_CDF4(7499, 13630, 18587) }, - { AOM_CDF4(6158, 11417, 16003) }, - { AOM_CDF4(4014, 7785, 11252) }, - { AOM_CDF4(15048, 21067, 24384) }, - { AOM_CDF4(18202, 25346, 28553) }, - { AOM_CDF4(14302, 22019, 26356) }, - { AOM_CDF4(10839, 18139, 23166) }, - { AOM_CDF4(8715, 15744, 20806) }, - { AOM_CDF4(7536, 13576, 18544) }, - { AOM_CDF4(5413, 10335, 14498) } }, - { { AOM_CDF4(17394, 24501, 27895) }, - { AOM_CDF4(15889, 23420, 27185) }, - { AOM_CDF4(11561, 19133, 23870) }, - { AOM_CDF4(8285, 14812, 19844) }, - { AOM_CDF4(6496, 12043, 16550) }, - { AOM_CDF4(4771, 9574, 13677) }, - { AOM_CDF4(3603, 6830, 10144) }, - { AOM_CDF4(21656, 27704, 30200) }, - { AOM_CDF4(21324, 27915, 30511) }, - { AOM_CDF4(17327, 25336, 28997) }, - { AOM_CDF4(13417, 21381, 26033) }, - { AOM_CDF4(10132, 17425, 22338) }, - { AOM_CDF4(8580, 15016, 19633) }, - { AOM_CDF4(5694, 11477, 16411) }, - { AOM_CDF4(24116, 29780, 31450) }, - { AOM_CDF4(23853, 29695, 31591) }, - { AOM_CDF4(20085, 27614, 30428) }, - { AOM_CDF4(15326, 24335, 28575) }, - { AOM_CDF4(11814, 19472, 24810) }, - { AOM_CDF4(10221, 18611, 24767) }, - { AOM_CDF4(7689, 14558, 20321) } } }, - { { { AOM_CDF4(16214, 22380, 25770) }, - { AOM_CDF4(14213, 21304, 25295) }, - { AOM_CDF4(9213, 15823, 20455) }, - { AOM_CDF4(6395, 11758, 16139) }, - { AOM_CDF4(4779, 9187, 13066) }, - { AOM_CDF4(3821, 7501, 10953) }, - { AOM_CDF4(2293, 4567, 6795) }, - { AOM_CDF4(15859, 21283, 23820) }, - { AOM_CDF4(18404, 25602, 28726) }, - { AOM_CDF4(14325, 21980, 26206) }, - { AOM_CDF4(10669, 17937, 22720) }, - { AOM_CDF4(8297, 14642, 19447) }, - { AOM_CDF4(6746, 12389, 16893) }, - { AOM_CDF4(4324, 8251, 11770) }, - { AOM_CDF4(16532, 21631, 24475) }, - { AOM_CDF4(20667, 27150, 29668) }, - { AOM_CDF4(16728, 24510, 28175) }, - { AOM_CDF4(12861, 20645, 25332) }, - { AOM_CDF4(10076, 17361, 22417) }, - { AOM_CDF4(8395, 14940, 19963) }, - { AOM_CDF4(5731, 10683, 14912) } }, - { { AOM_CDF4(14433, 21155, 24938) }, - { AOM_CDF4(14658, 21716, 25545) }, - { AOM_CDF4(9923, 16824, 21557) }, - { AOM_CDF4(6982, 13052, 17721) }, - { AOM_CDF4(5419, 10503, 15050) }, - { AOM_CDF4(4852, 9162, 13014) }, - { AOM_CDF4(3271, 6395, 9630) }, - { AOM_CDF4(22210, 27833, 30109) }, - { AOM_CDF4(20750, 27368, 29821) }, - { AOM_CDF4(16894, 24828, 28573) }, - { AOM_CDF4(13247, 21276, 25757) }, - { AOM_CDF4(10038, 17265, 22563) }, - { AOM_CDF4(8587, 14947, 20327) }, - { AOM_CDF4(5645, 11371, 15252) }, - { AOM_CDF4(22027, 27526, 29714) }, - { AOM_CDF4(23098, 29146, 31221) }, - { AOM_CDF4(19886, 27341, 30272) }, - { AOM_CDF4(15609, 23747, 28046) }, - { AOM_CDF4(11993, 20065, 24939) }, - { AOM_CDF4(9637, 18267, 23671) }, - { AOM_CDF4(7625, 13801, 19144) } } }, - { { { AOM_CDF4(14438, 20798, 24089) }, - { AOM_CDF4(12621, 19203, 23097) }, - { AOM_CDF4(8177, 14125, 18402) }, - { AOM_CDF4(5674, 10501, 14456) }, - { AOM_CDF4(4236, 8239, 11733) }, - { AOM_CDF4(3447, 6750, 9806) }, - { AOM_CDF4(1986, 3950, 5864) }, - { AOM_CDF4(16208, 22099, 24930) }, - { AOM_CDF4(16537, 24025, 27585) }, - { AOM_CDF4(12780, 20381, 24867) }, - { AOM_CDF4(9767, 16612, 21416) }, - { AOM_CDF4(7686, 13738, 18398) }, - { AOM_CDF4(6333, 11614, 15964) }, - { AOM_CDF4(3941, 7571, 10836) }, - { AOM_CDF4(22819, 27422, 29202) }, - { AOM_CDF4(22224, 28514, 30721) }, - { AOM_CDF4(17660, 25433, 28913) }, - { AOM_CDF4(13574, 21482, 26002) }, - { AOM_CDF4(10629, 17977, 22938) }, - { AOM_CDF4(8612, 15298, 20265) }, - { AOM_CDF4(5607, 10491, 14596) } }, - { { AOM_CDF4(13569, 19800, 23206) }, - { AOM_CDF4(13128, 19924, 23869) }, - { AOM_CDF4(8329, 14841, 19403) }, - { AOM_CDF4(6130, 10976, 15057) }, - { AOM_CDF4(4682, 8839, 12518) }, - { AOM_CDF4(3656, 7409, 10588) }, - { AOM_CDF4(2577, 5099, 7412) }, - { AOM_CDF4(22427, 28684, 30585) }, - { AOM_CDF4(20913, 27750, 30139) }, - { AOM_CDF4(15840, 24109, 27834) }, - { AOM_CDF4(12308, 20029, 24569) }, - { AOM_CDF4(10216, 16785, 21458) }, - { AOM_CDF4(8309, 14203, 19113) }, - { AOM_CDF4(6043, 11168, 15307) }, - { AOM_CDF4(23166, 28901, 30998) }, - { AOM_CDF4(21899, 28405, 30751) }, - { AOM_CDF4(18413, 26091, 29443) }, - { AOM_CDF4(15233, 23114, 27352) }, - { AOM_CDF4(12683, 20472, 25288) }, - { AOM_CDF4(10702, 18259, 23409) }, - { AOM_CDF4(8125, 14464, 19226) } } }, - { { { AOM_CDF4(9040, 14786, 18360) }, - { AOM_CDF4(9979, 15718, 19415) }, - { AOM_CDF4(7913, 13918, 18311) }, - { AOM_CDF4(5859, 10889, 15184) }, - { AOM_CDF4(4593, 8677, 12510) }, - { AOM_CDF4(3820, 7396, 10791) }, - { AOM_CDF4(1730, 3471, 5192) }, - { AOM_CDF4(11803, 18365, 22709) }, - { AOM_CDF4(11419, 18058, 22225) }, - { AOM_CDF4(9418, 15774, 20243) }, - { AOM_CDF4(7539, 13325, 17657) }, - { AOM_CDF4(6233, 11317, 15384) }, - { AOM_CDF4(5137, 9656, 13545) }, - { AOM_CDF4(2977, 5774, 8349) }, - { AOM_CDF4(21207, 27246, 29640) }, - { AOM_CDF4(19547, 26578, 29497) }, - { AOM_CDF4(16169, 23871, 27690) }, - { AOM_CDF4(12820, 20458, 25018) }, - { AOM_CDF4(10224, 17332, 22214) }, - { AOM_CDF4(8526, 15048, 19884) }, - { AOM_CDF4(5037, 9410, 13118) } }, - { { AOM_CDF4(12339, 17329, 20140) }, - { AOM_CDF4(13505, 19895, 23225) }, - { AOM_CDF4(9847, 16944, 21564) }, - { AOM_CDF4(7280, 13256, 18348) }, - { AOM_CDF4(4712, 10009, 14454) }, - { AOM_CDF4(4361, 7914, 12477) }, - { AOM_CDF4(2870, 5628, 7995) }, - { AOM_CDF4(20061, 25504, 28526) }, - { AOM_CDF4(15235, 22878, 26145) }, - { AOM_CDF4(12985, 19958, 24155) }, - { AOM_CDF4(9782, 16641, 21403) }, - { AOM_CDF4(9456, 16360, 20760) }, - { AOM_CDF4(6855, 12940, 18557) }, - { AOM_CDF4(5661, 10564, 15002) }, - { AOM_CDF4(25656, 30602, 31894) }, - { AOM_CDF4(22570, 29107, 31092) }, - { AOM_CDF4(18917, 26423, 29541) }, - { AOM_CDF4(15940, 23649, 27754) }, - { AOM_CDF4(12803, 20581, 25219) }, - { AOM_CDF4(11082, 18695, 23376) }, - { AOM_CDF4(7939, 14373, 19005) } } }, - { { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } } } }, - { { { { AOM_CDF4(18315, 24289, 27551) }, - { AOM_CDF4(16854, 24068, 27835) }, - { AOM_CDF4(10140, 17927, 23173) }, - { AOM_CDF4(6722, 12982, 18267) }, - { AOM_CDF4(4661, 9826, 14706) }, - { AOM_CDF4(3832, 8165, 12294) }, - { AOM_CDF4(2795, 6098, 9245) }, - { AOM_CDF4(17145, 23326, 26672) }, - { AOM_CDF4(20733, 27680, 30308) }, - { AOM_CDF4(16032, 24461, 28546) }, - { AOM_CDF4(11653, 20093, 25081) }, - { AOM_CDF4(9290, 16429, 22086) }, - { AOM_CDF4(7796, 14598, 19982) }, - { AOM_CDF4(6502, 12378, 17441) }, - { AOM_CDF4(21681, 27732, 30320) }, - { AOM_CDF4(22389, 29044, 31261) }, - { AOM_CDF4(19027, 26731, 30087) }, - { AOM_CDF4(14739, 23755, 28624) }, - { AOM_CDF4(11358, 20778, 25511) }, - { AOM_CDF4(10995, 18073, 24190) }, - { AOM_CDF4(9162, 14990, 20617) } }, - { { AOM_CDF4(21425, 27952, 30388) }, - { AOM_CDF4(18062, 25838, 29034) }, - { AOM_CDF4(11956, 19881, 24808) }, - { AOM_CDF4(7718, 15000, 20980) }, - { AOM_CDF4(5702, 11254, 16143) }, - { AOM_CDF4(4898, 9088, 16864) }, - { AOM_CDF4(3679, 6776, 11907) }, - { AOM_CDF4(23294, 30160, 31663) }, - { AOM_CDF4(24397, 29896, 31836) }, - { AOM_CDF4(19245, 27128, 30593) }, - { AOM_CDF4(13202, 19825, 26404) }, - { AOM_CDF4(11578, 19297, 23957) }, - { AOM_CDF4(8073, 13297, 21370) }, - { AOM_CDF4(5461, 10923, 19745) }, - { AOM_CDF4(27367, 30521, 31934) }, - { AOM_CDF4(24904, 30671, 31940) }, - { AOM_CDF4(23075, 28460, 31299) }, - { AOM_CDF4(14400, 23658, 30417) }, - { AOM_CDF4(13885, 23882, 28325) }, - { AOM_CDF4(14746, 22938, 27853) }, - { AOM_CDF4(5461, 16384, 27307) } } }, - { { { AOM_CDF4(18274, 24813, 27890) }, - { AOM_CDF4(15537, 23149, 27003) }, - { AOM_CDF4(9449, 16740, 21827) }, - { AOM_CDF4(6700, 12498, 17261) }, - { AOM_CDF4(4988, 9866, 14198) }, - { AOM_CDF4(4236, 8147, 11902) }, - { AOM_CDF4(2867, 5860, 8654) }, - { AOM_CDF4(17124, 23171, 26101) }, - { AOM_CDF4(20396, 27477, 30148) }, - { AOM_CDF4(16573, 24629, 28492) }, - { AOM_CDF4(12749, 20846, 25674) }, - { AOM_CDF4(10233, 17878, 22818) }, - { AOM_CDF4(8525, 15332, 20363) }, - { AOM_CDF4(6283, 11632, 16255) }, - { AOM_CDF4(20466, 26511, 29286) }, - { AOM_CDF4(23059, 29174, 31191) }, - { AOM_CDF4(19481, 27263, 30241) }, - { AOM_CDF4(15458, 23631, 28137) }, - { AOM_CDF4(12416, 20608, 25693) }, - { AOM_CDF4(10261, 18011, 23261) }, - { AOM_CDF4(8016, 14655, 19666) } }, - { { AOM_CDF4(17616, 24586, 28112) }, - { AOM_CDF4(15809, 23299, 27155) }, - { AOM_CDF4(10767, 18890, 23793) }, - { AOM_CDF4(7727, 14255, 18865) }, - { AOM_CDF4(6129, 11926, 16882) }, - { AOM_CDF4(4482, 9704, 14861) }, - { AOM_CDF4(3277, 7452, 11522) }, - { AOM_CDF4(22956, 28551, 30730) }, - { AOM_CDF4(22724, 28937, 30961) }, - { AOM_CDF4(18467, 26324, 29580) }, - { AOM_CDF4(13234, 20713, 25649) }, - { AOM_CDF4(11181, 17592, 22481) }, - { AOM_CDF4(8291, 18358, 24576) }, - { AOM_CDF4(7568, 11881, 14984) }, - { AOM_CDF4(24948, 29001, 31147) }, - { AOM_CDF4(25674, 30619, 32151) }, - { AOM_CDF4(20841, 26793, 29603) }, - { AOM_CDF4(14669, 24356, 28666) }, - { AOM_CDF4(11334, 23593, 28219) }, - { AOM_CDF4(8922, 14762, 22873) }, - { AOM_CDF4(8301, 13544, 20535) } } }, - { { { AOM_CDF4(17113, 23733, 27081) }, - { AOM_CDF4(14139, 21406, 25452) }, - { AOM_CDF4(8552, 15002, 19776) }, - { AOM_CDF4(5871, 11120, 15378) }, - { AOM_CDF4(4455, 8616, 12253) }, - { AOM_CDF4(3469, 6910, 10386) }, - { AOM_CDF4(2255, 4553, 6782) }, - { AOM_CDF4(18224, 24376, 27053) }, - { AOM_CDF4(19290, 26710, 29614) }, - { AOM_CDF4(14936, 22991, 27184) }, - { AOM_CDF4(11238, 18951, 23762) }, - { AOM_CDF4(8786, 15617, 20588) }, - { AOM_CDF4(7317, 13228, 18003) }, - { AOM_CDF4(5101, 9512, 13493) }, - { AOM_CDF4(22639, 28222, 30210) }, - { AOM_CDF4(23216, 29331, 31307) }, - { AOM_CDF4(19075, 26762, 29895) }, - { AOM_CDF4(15014, 23113, 27457) }, - { AOM_CDF4(11938, 19857, 24752) }, - { AOM_CDF4(9942, 17280, 22282) }, - { AOM_CDF4(7167, 13144, 17752) } }, - { { AOM_CDF4(15820, 22738, 26488) }, - { AOM_CDF4(13530, 20885, 25216) }, - { AOM_CDF4(8395, 15530, 20452) }, - { AOM_CDF4(6574, 12321, 16380) }, - { AOM_CDF4(5353, 10419, 14568) }, - { AOM_CDF4(4613, 8446, 12381) }, - { AOM_CDF4(3440, 7158, 9903) }, - { AOM_CDF4(24247, 29051, 31224) }, - { AOM_CDF4(22118, 28058, 30369) }, - { AOM_CDF4(16498, 24768, 28389) }, - { AOM_CDF4(12920, 21175, 26137) }, - { AOM_CDF4(10730, 18619, 25352) }, - { AOM_CDF4(10187, 16279, 22791) }, - { AOM_CDF4(9310, 14631, 22127) }, - { AOM_CDF4(24970, 30558, 32057) }, - { AOM_CDF4(24801, 29942, 31698) }, - { AOM_CDF4(22432, 28453, 30855) }, - { AOM_CDF4(19054, 25680, 29580) }, - { AOM_CDF4(14392, 23036, 28109) }, - { AOM_CDF4(12495, 20947, 26650) }, - { AOM_CDF4(12442, 20326, 26214) } } }, - { { { AOM_CDF4(12162, 18785, 22648) }, - { AOM_CDF4(12749, 19697, 23806) }, - { AOM_CDF4(8580, 15297, 20346) }, - { AOM_CDF4(6169, 11749, 16543) }, - { AOM_CDF4(4836, 9391, 13448) }, - { AOM_CDF4(3821, 7711, 11613) }, - { AOM_CDF4(2228, 4601, 7070) }, - { AOM_CDF4(16319, 24725, 28280) }, - { AOM_CDF4(15698, 23277, 27168) }, - { AOM_CDF4(12726, 20368, 25047) }, - { AOM_CDF4(9912, 17015, 21976) }, - { AOM_CDF4(7888, 14220, 19179) }, - { AOM_CDF4(6777, 12284, 17018) }, - { AOM_CDF4(4492, 8590, 12252) }, - { AOM_CDF4(23249, 28904, 30947) }, - { AOM_CDF4(21050, 27908, 30512) }, - { AOM_CDF4(17440, 25340, 28949) }, - { AOM_CDF4(14059, 22018, 26541) }, - { AOM_CDF4(11288, 18903, 23898) }, - { AOM_CDF4(9411, 16342, 21428) }, - { AOM_CDF4(6278, 11588, 15944) } }, - { { AOM_CDF4(13981, 20067, 23226) }, - { AOM_CDF4(16922, 23580, 26783) }, - { AOM_CDF4(11005, 19039, 24487) }, - { AOM_CDF4(7389, 14218, 19798) }, - { AOM_CDF4(5598, 11505, 17206) }, - { AOM_CDF4(6090, 11213, 15659) }, - { AOM_CDF4(3820, 7371, 10119) }, - { AOM_CDF4(21082, 26925, 29675) }, - { AOM_CDF4(21262, 28627, 31128) }, - { AOM_CDF4(18392, 26454, 30437) }, - { AOM_CDF4(14870, 22910, 27096) }, - { AOM_CDF4(12620, 19484, 24908) }, - { AOM_CDF4(9290, 16553, 22802) }, - { AOM_CDF4(6668, 14288, 20004) }, - { AOM_CDF4(27704, 31055, 31949) }, - { AOM_CDF4(24709, 29978, 31788) }, - { AOM_CDF4(21668, 29264, 31657) }, - { AOM_CDF4(18295, 26968, 30074) }, - { AOM_CDF4(16399, 24422, 29313) }, - { AOM_CDF4(14347, 23026, 28104) }, - { AOM_CDF4(12370, 19806, 24477) } } }, - { { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } } } } - }; - -static const aom_cdf_prob av1_default_coeff_base_multi_cdfs - [TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS] - [CDF_SIZE(NUM_BASE_LEVELS + 2)] = - { { { { { AOM_CDF4(4034, 8930, 12727) }, - { AOM_CDF4(18082, 29741, 31877) }, - { AOM_CDF4(12596, 26124, 30493) }, - { AOM_CDF4(9446, 21118, 27005) }, - { AOM_CDF4(6308, 15141, 21279) }, - { AOM_CDF4(2463, 6357, 9783) }, - { AOM_CDF4(20667, 30546, 31929) }, - { AOM_CDF4(13043, 26123, 30134) }, - { AOM_CDF4(8151, 18757, 24778) }, - { AOM_CDF4(5255, 12839, 18632) }, - { AOM_CDF4(2820, 7206, 11161) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(15736, 27553, 30604) }, - { AOM_CDF4(11210, 23794, 28787) }, - { AOM_CDF4(5947, 13874, 19701) }, - { AOM_CDF4(4215, 9323, 13891) }, - { AOM_CDF4(2833, 6462, 10059) }, - { AOM_CDF4(19605, 30393, 31582) }, - { AOM_CDF4(13523, 26252, 30248) }, - { AOM_CDF4(8446, 18622, 24512) }, - { AOM_CDF4(3818, 10343, 15974) }, - { AOM_CDF4(1481, 4117, 6796) }, - { AOM_CDF4(22649, 31302, 32190) }, - { AOM_CDF4(14829, 27127, 30449) }, - { AOM_CDF4(8313, 17702, 23304) }, - { AOM_CDF4(3022, 8301, 12786) }, - { AOM_CDF4(1536, 4412, 7184) }, - { AOM_CDF4(22354, 29774, 31372) }, - { AOM_CDF4(14723, 25472, 29214) }, - { AOM_CDF4(6673, 13745, 18662) }, - { AOM_CDF4(2068, 5766, 9322) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(6302, 16444, 21761) }, - { AOM_CDF4(23040, 31538, 32475) }, - { AOM_CDF4(15196, 28452, 31496) }, - { AOM_CDF4(10020, 22946, 28514) }, - { AOM_CDF4(6533, 16862, 23501) }, - { AOM_CDF4(3538, 9816, 15076) }, - { AOM_CDF4(24444, 31875, 32525) }, - { AOM_CDF4(15881, 28924, 31635) }, - { AOM_CDF4(9922, 22873, 28466) }, - { AOM_CDF4(6527, 16966, 23691) }, - { AOM_CDF4(4114, 11303, 17220) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(20201, 30770, 32209) }, - { AOM_CDF4(14754, 28071, 31258) }, - { AOM_CDF4(8378, 20186, 26517) }, - { AOM_CDF4(5916, 15299, 21978) }, - { AOM_CDF4(4268, 11583, 17901) }, - { AOM_CDF4(24361, 32025, 32581) }, - { AOM_CDF4(18673, 30105, 31943) }, - { AOM_CDF4(10196, 22244, 27576) }, - { AOM_CDF4(5495, 14349, 20417) }, - { AOM_CDF4(2676, 7415, 11498) }, - { AOM_CDF4(24678, 31958, 32585) }, - { AOM_CDF4(18629, 29906, 31831) }, - { AOM_CDF4(9364, 20724, 26315) }, - { AOM_CDF4(4641, 12318, 18094) }, - { AOM_CDF4(2758, 7387, 11579) }, - { AOM_CDF4(25433, 31842, 32469) }, - { AOM_CDF4(18795, 29289, 31411) }, - { AOM_CDF4(7644, 17584, 23592) }, - { AOM_CDF4(3408, 9014, 15047) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } } }, - { { { AOM_CDF4(4536, 10072, 14001) }, - { AOM_CDF4(25459, 31416, 32206) }, - { AOM_CDF4(16605, 28048, 30818) }, - { AOM_CDF4(11008, 22857, 27719) }, - { AOM_CDF4(6915, 16268, 22315) }, - { AOM_CDF4(2625, 6812, 10537) }, - { AOM_CDF4(24257, 31788, 32499) }, - { AOM_CDF4(16880, 29454, 31879) }, - { AOM_CDF4(11958, 25054, 29778) }, - { AOM_CDF4(7916, 18718, 25084) }, - { AOM_CDF4(3383, 8777, 13446) }, - { AOM_CDF4(22720, 31603, 32393) }, - { AOM_CDF4(14960, 28125, 31335) }, - { AOM_CDF4(9731, 22210, 27928) }, - { AOM_CDF4(6304, 15832, 22277) }, - { AOM_CDF4(2910, 7818, 12166) }, - { AOM_CDF4(20375, 30627, 32131) }, - { AOM_CDF4(13904, 27284, 30887) }, - { AOM_CDF4(9368, 21558, 27144) }, - { AOM_CDF4(5937, 14966, 21119) }, - { AOM_CDF4(2667, 7225, 11319) }, - { AOM_CDF4(23970, 31470, 32378) }, - { AOM_CDF4(17173, 29734, 32018) }, - { AOM_CDF4(12795, 25441, 29965) }, - { AOM_CDF4(8981, 19680, 25893) }, - { AOM_CDF4(4728, 11372, 16902) }, - { AOM_CDF4(24287, 31797, 32439) }, - { AOM_CDF4(16703, 29145, 31696) }, - { AOM_CDF4(10833, 23554, 28725) }, - { AOM_CDF4(6468, 16566, 23057) }, - { AOM_CDF4(2415, 6562, 10278) }, - { AOM_CDF4(26610, 32395, 32659) }, - { AOM_CDF4(18590, 30498, 32117) }, - { AOM_CDF4(12420, 25756, 29950) }, - { AOM_CDF4(7639, 18746, 24710) }, - { AOM_CDF4(3001, 8086, 12347) }, - { AOM_CDF4(25076, 32064, 32580) }, - { AOM_CDF4(17946, 30128, 32028) }, - { AOM_CDF4(12024, 24985, 29378) }, - { AOM_CDF4(7517, 18390, 24304) }, - { AOM_CDF4(3243, 8781, 13331) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(6037, 16771, 21957) }, - { AOM_CDF4(24774, 31704, 32426) }, - { AOM_CDF4(16830, 28589, 31056) }, - { AOM_CDF4(10602, 22828, 27760) }, - { AOM_CDF4(6733, 16829, 23071) }, - { AOM_CDF4(3250, 8914, 13556) }, - { AOM_CDF4(25582, 32220, 32668) }, - { AOM_CDF4(18659, 30342, 32223) }, - { AOM_CDF4(12546, 26149, 30515) }, - { AOM_CDF4(8420, 20451, 26801) }, - { AOM_CDF4(4636, 12420, 18344) }, - { AOM_CDF4(27581, 32362, 32639) }, - { AOM_CDF4(18987, 30083, 31978) }, - { AOM_CDF4(11327, 24248, 29084) }, - { AOM_CDF4(7264, 17719, 24120) }, - { AOM_CDF4(3995, 10768, 16169) }, - { AOM_CDF4(25893, 31831, 32487) }, - { AOM_CDF4(16577, 28587, 31379) }, - { AOM_CDF4(10189, 22748, 28182) }, - { AOM_CDF4(6832, 17094, 23556) }, - { AOM_CDF4(3708, 10110, 15334) }, - { AOM_CDF4(25904, 32282, 32656) }, - { AOM_CDF4(19721, 30792, 32276) }, - { AOM_CDF4(12819, 26243, 30411) }, - { AOM_CDF4(8572, 20614, 26891) }, - { AOM_CDF4(5364, 14059, 20467) }, - { AOM_CDF4(26580, 32438, 32677) }, - { AOM_CDF4(20852, 31225, 32340) }, - { AOM_CDF4(12435, 25700, 29967) }, - { AOM_CDF4(8691, 20825, 26976) }, - { AOM_CDF4(4446, 12209, 17269) }, - { AOM_CDF4(27350, 32429, 32696) }, - { AOM_CDF4(21372, 30977, 32272) }, - { AOM_CDF4(12673, 25270, 29853) }, - { AOM_CDF4(9208, 20925, 26640) }, - { AOM_CDF4(5018, 13351, 18732) }, - { AOM_CDF4(27351, 32479, 32713) }, - { AOM_CDF4(21398, 31209, 32387) }, - { AOM_CDF4(12162, 25047, 29842) }, - { AOM_CDF4(7896, 18691, 25319) }, - { AOM_CDF4(4670, 12882, 18881) }, - { AOM_CDF4(8192, 16384, 24576) } } }, - { { { AOM_CDF4(5487, 10460, 13708) }, - { AOM_CDF4(21597, 28303, 30674) }, - { AOM_CDF4(11037, 21953, 26476) }, - { AOM_CDF4(8147, 17962, 22952) }, - { AOM_CDF4(5242, 13061, 18532) }, - { AOM_CDF4(1889, 5208, 8182) }, - { AOM_CDF4(26774, 32133, 32590) }, - { AOM_CDF4(17844, 29564, 31767) }, - { AOM_CDF4(11690, 24438, 29171) }, - { AOM_CDF4(7542, 18215, 24459) }, - { AOM_CDF4(2993, 8050, 12319) }, - { AOM_CDF4(28023, 32328, 32591) }, - { AOM_CDF4(18651, 30126, 31954) }, - { AOM_CDF4(12164, 25146, 29589) }, - { AOM_CDF4(7762, 18530, 24771) }, - { AOM_CDF4(3492, 9183, 13920) }, - { AOM_CDF4(27591, 32008, 32491) }, - { AOM_CDF4(17149, 28853, 31510) }, - { AOM_CDF4(11485, 24003, 28860) }, - { AOM_CDF4(7697, 18086, 24210) }, - { AOM_CDF4(3075, 7999, 12218) }, - { AOM_CDF4(28268, 32482, 32654) }, - { AOM_CDF4(19631, 31051, 32404) }, - { AOM_CDF4(13860, 27260, 31020) }, - { AOM_CDF4(9605, 21613, 27594) }, - { AOM_CDF4(4876, 12162, 17908) }, - { AOM_CDF4(27248, 32316, 32576) }, - { AOM_CDF4(18955, 30457, 32075) }, - { AOM_CDF4(11824, 23997, 28795) }, - { AOM_CDF4(7346, 18196, 24647) }, - { AOM_CDF4(3403, 9247, 14111) }, - { AOM_CDF4(29711, 32655, 32735) }, - { AOM_CDF4(21169, 31394, 32417) }, - { AOM_CDF4(13487, 27198, 30957) }, - { AOM_CDF4(8828, 21683, 27614) }, - { AOM_CDF4(4270, 11451, 17038) }, - { AOM_CDF4(28708, 32578, 32731) }, - { AOM_CDF4(20120, 31241, 32482) }, - { AOM_CDF4(13692, 27550, 31321) }, - { AOM_CDF4(9418, 22514, 28439) }, - { AOM_CDF4(4999, 13283, 19462) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(5673, 14302, 19711) }, - { AOM_CDF4(26251, 30701, 31834) }, - { AOM_CDF4(12782, 23783, 27803) }, - { AOM_CDF4(9127, 20657, 25808) }, - { AOM_CDF4(6368, 16208, 21462) }, - { AOM_CDF4(2465, 7177, 10822) }, - { AOM_CDF4(29961, 32563, 32719) }, - { AOM_CDF4(18318, 29891, 31949) }, - { AOM_CDF4(11361, 24514, 29357) }, - { AOM_CDF4(7900, 19603, 25607) }, - { AOM_CDF4(4002, 10590, 15546) }, - { AOM_CDF4(29637, 32310, 32595) }, - { AOM_CDF4(18296, 29913, 31809) }, - { AOM_CDF4(10144, 21515, 26871) }, - { AOM_CDF4(5358, 14322, 20394) }, - { AOM_CDF4(3067, 8362, 13346) }, - { AOM_CDF4(28652, 32470, 32676) }, - { AOM_CDF4(17538, 30771, 32209) }, - { AOM_CDF4(13924, 26882, 30494) }, - { AOM_CDF4(10496, 22837, 27869) }, - { AOM_CDF4(7236, 16396, 21621) }, - { AOM_CDF4(30743, 32687, 32746) }, - { AOM_CDF4(23006, 31676, 32489) }, - { AOM_CDF4(14494, 27828, 31120) }, - { AOM_CDF4(10174, 22801, 28352) }, - { AOM_CDF4(6242, 15281, 21043) }, - { AOM_CDF4(25817, 32243, 32720) }, - { AOM_CDF4(18618, 31367, 32325) }, - { AOM_CDF4(13997, 28318, 31878) }, - { AOM_CDF4(12255, 26534, 31383) }, - { AOM_CDF4(9561, 21588, 28450) }, - { AOM_CDF4(28188, 32635, 32724) }, - { AOM_CDF4(22060, 32365, 32728) }, - { AOM_CDF4(18102, 30690, 32528) }, - { AOM_CDF4(14196, 28864, 31999) }, - { AOM_CDF4(12262, 25792, 30865) }, - { AOM_CDF4(24176, 32109, 32628) }, - { AOM_CDF4(18280, 29681, 31963) }, - { AOM_CDF4(10205, 23703, 29664) }, - { AOM_CDF4(7889, 20025, 27676) }, - { AOM_CDF4(6060, 16743, 23970) }, - { AOM_CDF4(8192, 16384, 24576) } } }, - { { { AOM_CDF4(5141, 7096, 8260) }, - { AOM_CDF4(27186, 29022, 29789) }, - { AOM_CDF4(6668, 12568, 15682) }, - { AOM_CDF4(2172, 6181, 8638) }, - { AOM_CDF4(1126, 3379, 4531) }, - { AOM_CDF4(443, 1361, 2254) }, - { AOM_CDF4(26083, 31153, 32436) }, - { AOM_CDF4(13486, 24603, 28483) }, - { AOM_CDF4(6508, 14840, 19910) }, - { AOM_CDF4(3386, 8800, 13286) }, - { AOM_CDF4(1530, 4322, 7054) }, - { AOM_CDF4(29639, 32080, 32548) }, - { AOM_CDF4(15897, 27552, 30290) }, - { AOM_CDF4(8588, 20047, 25383) }, - { AOM_CDF4(4889, 13339, 19269) }, - { AOM_CDF4(2240, 6871, 10498) }, - { AOM_CDF4(28165, 32197, 32517) }, - { AOM_CDF4(20735, 30427, 31568) }, - { AOM_CDF4(14325, 24671, 27692) }, - { AOM_CDF4(5119, 12554, 17805) }, - { AOM_CDF4(1810, 5441, 8261) }, - { AOM_CDF4(31212, 32724, 32748) }, - { AOM_CDF4(23352, 31766, 32545) }, - { AOM_CDF4(14669, 27570, 31059) }, - { AOM_CDF4(8492, 20894, 27272) }, - { AOM_CDF4(3644, 10194, 15204) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(2461, 7013, 9371) }, - { AOM_CDF4(24749, 29600, 30986) }, - { AOM_CDF4(9466, 19037, 22417) }, - { AOM_CDF4(3584, 9280, 14400) }, - { AOM_CDF4(1505, 3929, 5433) }, - { AOM_CDF4(677, 1500, 2736) }, - { AOM_CDF4(23987, 30702, 32117) }, - { AOM_CDF4(13554, 24571, 29263) }, - { AOM_CDF4(6211, 14556, 21155) }, - { AOM_CDF4(3135, 10972, 15625) }, - { AOM_CDF4(2435, 7127, 11427) }, - { AOM_CDF4(31300, 32532, 32550) }, - { AOM_CDF4(14757, 30365, 31954) }, - { AOM_CDF4(4405, 11612, 18553) }, - { AOM_CDF4(580, 4132, 7322) }, - { AOM_CDF4(1695, 10169, 14124) }, - { AOM_CDF4(30008, 32282, 32591) }, - { AOM_CDF4(19244, 30108, 31748) }, - { AOM_CDF4(11180, 24158, 29555) }, - { AOM_CDF4(5650, 14972, 19209) }, - { AOM_CDF4(2114, 5109, 8456) }, - { AOM_CDF4(31856, 32716, 32748) }, - { AOM_CDF4(23012, 31664, 32572) }, - { AOM_CDF4(13694, 26656, 30636) }, - { AOM_CDF4(8142, 19508, 26093) }, - { AOM_CDF4(4253, 10955, 16724) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } } }, - { { { AOM_CDF4(601, 983, 1311) }, - { AOM_CDF4(18725, 23406, 28087) }, - { AOM_CDF4(5461, 8192, 10923) }, - { AOM_CDF4(3781, 15124, 21425) }, - { AOM_CDF4(2587, 7761, 12072) }, - { AOM_CDF4(106, 458, 810) }, - { AOM_CDF4(22282, 29710, 31894) }, - { AOM_CDF4(8508, 20926, 25984) }, - { AOM_CDF4(3726, 12713, 18083) }, - { AOM_CDF4(1620, 7112, 10893) }, - { AOM_CDF4(729, 2236, 3495) }, - { AOM_CDF4(30163, 32474, 32684) }, - { AOM_CDF4(18304, 30464, 32000) }, - { AOM_CDF4(11443, 26526, 29647) }, - { AOM_CDF4(6007, 15292, 21299) }, - { AOM_CDF4(2234, 6703, 8937) }, - { AOM_CDF4(30954, 32177, 32571) }, - { AOM_CDF4(17363, 29562, 31076) }, - { AOM_CDF4(9686, 22464, 27410) }, - { AOM_CDF4(8192, 16384, 21390) }, - { AOM_CDF4(1755, 8046, 11264) }, - { AOM_CDF4(31168, 32734, 32748) }, - { AOM_CDF4(22486, 31441, 32471) }, - { AOM_CDF4(12833, 25627, 29738) }, - { AOM_CDF4(6980, 17379, 23122) }, - { AOM_CDF4(3111, 8887, 13479) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } } } }, - { { { { AOM_CDF4(6041, 11854, 15927) }, - { AOM_CDF4(20326, 30905, 32251) }, - { AOM_CDF4(14164, 26831, 30725) }, - { AOM_CDF4(9760, 20647, 26585) }, - { AOM_CDF4(6416, 14953, 21219) }, - { AOM_CDF4(2966, 7151, 10891) }, - { AOM_CDF4(23567, 31374, 32254) }, - { AOM_CDF4(14978, 27416, 30946) }, - { AOM_CDF4(9434, 20225, 26254) }, - { AOM_CDF4(6658, 14558, 20535) }, - { AOM_CDF4(3916, 8677, 12989) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(18088, 29545, 31587) }, - { AOM_CDF4(13062, 25843, 30073) }, - { AOM_CDF4(8940, 16827, 22251) }, - { AOM_CDF4(7654, 13220, 17973) }, - { AOM_CDF4(5733, 10316, 14456) }, - { AOM_CDF4(22879, 31388, 32114) }, - { AOM_CDF4(15215, 27993, 30955) }, - { AOM_CDF4(9397, 19445, 24978) }, - { AOM_CDF4(3442, 9813, 15344) }, - { AOM_CDF4(1368, 3936, 6532) }, - { AOM_CDF4(25494, 32033, 32406) }, - { AOM_CDF4(16772, 27963, 30718) }, - { AOM_CDF4(9419, 18165, 23260) }, - { AOM_CDF4(2677, 7501, 11797) }, - { AOM_CDF4(1516, 4344, 7170) }, - { AOM_CDF4(26556, 31454, 32101) }, - { AOM_CDF4(17128, 27035, 30108) }, - { AOM_CDF4(8324, 15344, 20249) }, - { AOM_CDF4(1903, 5696, 9469) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(8455, 19003, 24368) }, - { AOM_CDF4(23563, 32021, 32604) }, - { AOM_CDF4(16237, 29446, 31935) }, - { AOM_CDF4(10724, 23999, 29358) }, - { AOM_CDF4(6725, 17528, 24416) }, - { AOM_CDF4(3927, 10927, 16825) }, - { AOM_CDF4(26313, 32288, 32634) }, - { AOM_CDF4(17430, 30095, 32095) }, - { AOM_CDF4(11116, 24606, 29679) }, - { AOM_CDF4(7195, 18384, 25269) }, - { AOM_CDF4(4726, 12852, 19315) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(22822, 31648, 32483) }, - { AOM_CDF4(16724, 29633, 31929) }, - { AOM_CDF4(10261, 23033, 28725) }, - { AOM_CDF4(7029, 17840, 24528) }, - { AOM_CDF4(4867, 13886, 21502) }, - { AOM_CDF4(25298, 31892, 32491) }, - { AOM_CDF4(17809, 29330, 31512) }, - { AOM_CDF4(9668, 21329, 26579) }, - { AOM_CDF4(4774, 12956, 18976) }, - { AOM_CDF4(2322, 7030, 11540) }, - { AOM_CDF4(25472, 31920, 32543) }, - { AOM_CDF4(17957, 29387, 31632) }, - { AOM_CDF4(9196, 20593, 26400) }, - { AOM_CDF4(4680, 12705, 19202) }, - { AOM_CDF4(2917, 8456, 13436) }, - { AOM_CDF4(26471, 32059, 32574) }, - { AOM_CDF4(18458, 29783, 31909) }, - { AOM_CDF4(8400, 19464, 25956) }, - { AOM_CDF4(3812, 10973, 17206) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } } }, - { { { AOM_CDF4(6779, 13743, 17678) }, - { AOM_CDF4(24806, 31797, 32457) }, - { AOM_CDF4(17616, 29047, 31372) }, - { AOM_CDF4(11063, 23175, 28003) }, - { AOM_CDF4(6521, 16110, 22324) }, - { AOM_CDF4(2764, 7504, 11654) }, - { AOM_CDF4(25266, 32367, 32637) }, - { AOM_CDF4(19054, 30553, 32175) }, - { AOM_CDF4(12139, 25212, 29807) }, - { AOM_CDF4(7311, 18162, 24704) }, - { AOM_CDF4(3397, 9164, 14074) }, - { AOM_CDF4(25988, 32208, 32522) }, - { AOM_CDF4(16253, 28912, 31526) }, - { AOM_CDF4(9151, 21387, 27372) }, - { AOM_CDF4(5688, 14915, 21496) }, - { AOM_CDF4(2717, 7627, 12004) }, - { AOM_CDF4(23144, 31855, 32443) }, - { AOM_CDF4(16070, 28491, 31325) }, - { AOM_CDF4(8702, 20467, 26517) }, - { AOM_CDF4(5243, 13956, 20367) }, - { AOM_CDF4(2621, 7335, 11567) }, - { AOM_CDF4(26636, 32340, 32630) }, - { AOM_CDF4(19990, 31050, 32341) }, - { AOM_CDF4(13243, 26105, 30315) }, - { AOM_CDF4(8588, 19521, 25918) }, - { AOM_CDF4(4717, 11585, 17304) }, - { AOM_CDF4(25844, 32292, 32582) }, - { AOM_CDF4(19090, 30635, 32097) }, - { AOM_CDF4(11963, 24546, 28939) }, - { AOM_CDF4(6218, 16087, 22354) }, - { AOM_CDF4(2340, 6608, 10426) }, - { AOM_CDF4(28046, 32576, 32694) }, - { AOM_CDF4(21178, 31313, 32296) }, - { AOM_CDF4(13486, 26184, 29870) }, - { AOM_CDF4(7149, 17871, 23723) }, - { AOM_CDF4(2833, 7958, 12259) }, - { AOM_CDF4(27710, 32528, 32686) }, - { AOM_CDF4(20674, 31076, 32268) }, - { AOM_CDF4(12413, 24955, 29243) }, - { AOM_CDF4(6676, 16927, 23097) }, - { AOM_CDF4(2966, 8333, 12919) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(8639, 19339, 24429) }, - { AOM_CDF4(24404, 31837, 32525) }, - { AOM_CDF4(16997, 29425, 31784) }, - { AOM_CDF4(11253, 24234, 29149) }, - { AOM_CDF4(6751, 17394, 24028) }, - { AOM_CDF4(3490, 9830, 15191) }, - { AOM_CDF4(26283, 32471, 32714) }, - { AOM_CDF4(19599, 31168, 32442) }, - { AOM_CDF4(13146, 26954, 30893) }, - { AOM_CDF4(8214, 20588, 26890) }, - { AOM_CDF4(4699, 13081, 19300) }, - { AOM_CDF4(28212, 32458, 32669) }, - { AOM_CDF4(18594, 30316, 32100) }, - { AOM_CDF4(11219, 24408, 29234) }, - { AOM_CDF4(6865, 17656, 24149) }, - { AOM_CDF4(3678, 10362, 16006) }, - { AOM_CDF4(25825, 32136, 32616) }, - { AOM_CDF4(17313, 29853, 32021) }, - { AOM_CDF4(11197, 24471, 29472) }, - { AOM_CDF4(6947, 17781, 24405) }, - { AOM_CDF4(3768, 10660, 16261) }, - { AOM_CDF4(27352, 32500, 32706) }, - { AOM_CDF4(20850, 31468, 32469) }, - { AOM_CDF4(14021, 27707, 31133) }, - { AOM_CDF4(8964, 21748, 27838) }, - { AOM_CDF4(5437, 14665, 21187) }, - { AOM_CDF4(26304, 32492, 32698) }, - { AOM_CDF4(20409, 31380, 32385) }, - { AOM_CDF4(13682, 27222, 30632) }, - { AOM_CDF4(8974, 21236, 26685) }, - { AOM_CDF4(4234, 11665, 16934) }, - { AOM_CDF4(26273, 32357, 32711) }, - { AOM_CDF4(20672, 31242, 32441) }, - { AOM_CDF4(14172, 27254, 30902) }, - { AOM_CDF4(9870, 21898, 27275) }, - { AOM_CDF4(5164, 13506, 19270) }, - { AOM_CDF4(26725, 32459, 32728) }, - { AOM_CDF4(20991, 31442, 32527) }, - { AOM_CDF4(13071, 26434, 30811) }, - { AOM_CDF4(8184, 20090, 26742) }, - { AOM_CDF4(4803, 13255, 19895) }, - { AOM_CDF4(8192, 16384, 24576) } } }, - { { { AOM_CDF4(7555, 14942, 18501) }, - { AOM_CDF4(24410, 31178, 32287) }, - { AOM_CDF4(14394, 26738, 30253) }, - { AOM_CDF4(8413, 19554, 25195) }, - { AOM_CDF4(4766, 12924, 18785) }, - { AOM_CDF4(2029, 5806, 9207) }, - { AOM_CDF4(26776, 32364, 32663) }, - { AOM_CDF4(18732, 29967, 31931) }, - { AOM_CDF4(11005, 23786, 28852) }, - { AOM_CDF4(6466, 16909, 23510) }, - { AOM_CDF4(3044, 8638, 13419) }, - { AOM_CDF4(29208, 32582, 32704) }, - { AOM_CDF4(20068, 30857, 32208) }, - { AOM_CDF4(12003, 25085, 29595) }, - { AOM_CDF4(6947, 17750, 24189) }, - { AOM_CDF4(3245, 9103, 14007) }, - { AOM_CDF4(27359, 32465, 32669) }, - { AOM_CDF4(19421, 30614, 32174) }, - { AOM_CDF4(11915, 25010, 29579) }, - { AOM_CDF4(6950, 17676, 24074) }, - { AOM_CDF4(3007, 8473, 13096) }, - { AOM_CDF4(29002, 32676, 32735) }, - { AOM_CDF4(22102, 31849, 32576) }, - { AOM_CDF4(14408, 28009, 31405) }, - { AOM_CDF4(9027, 21679, 27931) }, - { AOM_CDF4(4694, 12678, 18748) }, - { AOM_CDF4(28216, 32528, 32682) }, - { AOM_CDF4(20849, 31264, 32318) }, - { AOM_CDF4(12756, 25815, 29751) }, - { AOM_CDF4(7565, 18801, 24923) }, - { AOM_CDF4(3509, 9533, 14477) }, - { AOM_CDF4(30133, 32687, 32739) }, - { AOM_CDF4(23063, 31910, 32515) }, - { AOM_CDF4(14588, 28051, 31132) }, - { AOM_CDF4(9085, 21649, 27457) }, - { AOM_CDF4(4261, 11654, 17264) }, - { AOM_CDF4(29518, 32691, 32748) }, - { AOM_CDF4(22451, 31959, 32613) }, - { AOM_CDF4(14864, 28722, 31700) }, - { AOM_CDF4(9695, 22964, 28716) }, - { AOM_CDF4(4932, 13358, 19502) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(6465, 16958, 21688) }, - { AOM_CDF4(25199, 31514, 32360) }, - { AOM_CDF4(14774, 27149, 30607) }, - { AOM_CDF4(9257, 21438, 26972) }, - { AOM_CDF4(5723, 15183, 21882) }, - { AOM_CDF4(3150, 8879, 13731) }, - { AOM_CDF4(26989, 32262, 32682) }, - { AOM_CDF4(17396, 29937, 32085) }, - { AOM_CDF4(11387, 24901, 29784) }, - { AOM_CDF4(7289, 18821, 25548) }, - { AOM_CDF4(3734, 10577, 16086) }, - { AOM_CDF4(29728, 32501, 32695) }, - { AOM_CDF4(17431, 29701, 31903) }, - { AOM_CDF4(9921, 22826, 28300) }, - { AOM_CDF4(5896, 15434, 22068) }, - { AOM_CDF4(3430, 9646, 14757) }, - { AOM_CDF4(28614, 32511, 32705) }, - { AOM_CDF4(19364, 30638, 32263) }, - { AOM_CDF4(13129, 26254, 30402) }, - { AOM_CDF4(8754, 20484, 26440) }, - { AOM_CDF4(4378, 11607, 17110) }, - { AOM_CDF4(30292, 32671, 32744) }, - { AOM_CDF4(21780, 31603, 32501) }, - { AOM_CDF4(14314, 27829, 31291) }, - { AOM_CDF4(9611, 22327, 28263) }, - { AOM_CDF4(4890, 13087, 19065) }, - { AOM_CDF4(25862, 32567, 32733) }, - { AOM_CDF4(20794, 32050, 32567) }, - { AOM_CDF4(17243, 30625, 32254) }, - { AOM_CDF4(13283, 27628, 31474) }, - { AOM_CDF4(9669, 22532, 28918) }, - { AOM_CDF4(27435, 32697, 32748) }, - { AOM_CDF4(24922, 32390, 32714) }, - { AOM_CDF4(21449, 31504, 32536) }, - { AOM_CDF4(16392, 29729, 31832) }, - { AOM_CDF4(11692, 24884, 29076) }, - { AOM_CDF4(24193, 32290, 32735) }, - { AOM_CDF4(18909, 31104, 32563) }, - { AOM_CDF4(12236, 26841, 31403) }, - { AOM_CDF4(8171, 21840, 29082) }, - { AOM_CDF4(7224, 17280, 25275) }, - { AOM_CDF4(8192, 16384, 24576) } } }, - { { { AOM_CDF4(3078, 6839, 9890) }, - { AOM_CDF4(13837, 20450, 24479) }, - { AOM_CDF4(5914, 14222, 19328) }, - { AOM_CDF4(3866, 10267, 14762) }, - { AOM_CDF4(2612, 7208, 11042) }, - { AOM_CDF4(1067, 2991, 4776) }, - { AOM_CDF4(25817, 31646, 32529) }, - { AOM_CDF4(13708, 26338, 30385) }, - { AOM_CDF4(7328, 18585, 24870) }, - { AOM_CDF4(4691, 13080, 19276) }, - { AOM_CDF4(1825, 5253, 8352) }, - { AOM_CDF4(29386, 32315, 32624) }, - { AOM_CDF4(17160, 29001, 31360) }, - { AOM_CDF4(9602, 21862, 27396) }, - { AOM_CDF4(5915, 15772, 22148) }, - { AOM_CDF4(2786, 7779, 12047) }, - { AOM_CDF4(29246, 32450, 32663) }, - { AOM_CDF4(18696, 29929, 31818) }, - { AOM_CDF4(10510, 23369, 28560) }, - { AOM_CDF4(6229, 16499, 23125) }, - { AOM_CDF4(2608, 7448, 11705) }, - { AOM_CDF4(30753, 32710, 32748) }, - { AOM_CDF4(21638, 31487, 32503) }, - { AOM_CDF4(12937, 26854, 30870) }, - { AOM_CDF4(8182, 20596, 26970) }, - { AOM_CDF4(3637, 10269, 15497) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(5244, 12150, 16906) }, - { AOM_CDF4(20486, 26858, 29701) }, - { AOM_CDF4(7756, 18317, 23735) }, - { AOM_CDF4(3452, 9256, 13146) }, - { AOM_CDF4(2020, 5206, 8229) }, - { AOM_CDF4(1801, 4993, 7903) }, - { AOM_CDF4(27051, 31858, 32531) }, - { AOM_CDF4(15988, 27531, 30619) }, - { AOM_CDF4(9188, 21484, 26719) }, - { AOM_CDF4(6273, 17186, 23800) }, - { AOM_CDF4(3108, 9355, 14764) }, - { AOM_CDF4(31076, 32520, 32680) }, - { AOM_CDF4(18119, 30037, 31850) }, - { AOM_CDF4(10244, 22969, 27472) }, - { AOM_CDF4(4692, 14077, 19273) }, - { AOM_CDF4(3694, 11677, 17556) }, - { AOM_CDF4(30060, 32581, 32720) }, - { AOM_CDF4(21011, 30775, 32120) }, - { AOM_CDF4(11931, 24820, 29289) }, - { AOM_CDF4(7119, 17662, 24356) }, - { AOM_CDF4(3833, 10706, 16304) }, - { AOM_CDF4(31954, 32731, 32748) }, - { AOM_CDF4(23913, 31724, 32489) }, - { AOM_CDF4(15520, 28060, 31286) }, - { AOM_CDF4(11517, 23008, 28571) }, - { AOM_CDF4(6193, 14508, 20629) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } } }, - { { { AOM_CDF4(1035, 2807, 4156) }, - { AOM_CDF4(13162, 18138, 20939) }, - { AOM_CDF4(2696, 6633, 8755) }, - { AOM_CDF4(1373, 4161, 6853) }, - { AOM_CDF4(1099, 2746, 4716) }, - { AOM_CDF4(340, 1021, 1599) }, - { AOM_CDF4(22826, 30419, 32135) }, - { AOM_CDF4(10395, 21762, 26942) }, - { AOM_CDF4(4726, 12407, 17361) }, - { AOM_CDF4(2447, 7080, 10593) }, - { AOM_CDF4(1227, 3717, 6011) }, - { AOM_CDF4(28156, 31424, 31934) }, - { AOM_CDF4(16915, 27754, 30373) }, - { AOM_CDF4(9148, 20990, 26431) }, - { AOM_CDF4(5950, 15515, 21148) }, - { AOM_CDF4(2492, 7327, 11526) }, - { AOM_CDF4(30602, 32477, 32670) }, - { AOM_CDF4(20026, 29955, 31568) }, - { AOM_CDF4(11220, 23628, 28105) }, - { AOM_CDF4(6652, 17019, 22973) }, - { AOM_CDF4(3064, 8536, 13043) }, - { AOM_CDF4(31769, 32724, 32748) }, - { AOM_CDF4(22230, 30887, 32373) }, - { AOM_CDF4(12234, 25079, 29731) }, - { AOM_CDF4(7326, 18816, 25353) }, - { AOM_CDF4(3933, 10907, 16616) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } } } }, - { { { { AOM_CDF4(8896, 16227, 20630) }, - { AOM_CDF4(23629, 31782, 32527) }, - { AOM_CDF4(15173, 27755, 31321) }, - { AOM_CDF4(10158, 21233, 27382) }, - { AOM_CDF4(6420, 14857, 21558) }, - { AOM_CDF4(3269, 8155, 12646) }, - { AOM_CDF4(24835, 32009, 32496) }, - { AOM_CDF4(16509, 28421, 31579) }, - { AOM_CDF4(10957, 21514, 27418) }, - { AOM_CDF4(7881, 15930, 22096) }, - { AOM_CDF4(5388, 10960, 15918) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(20745, 30773, 32093) }, - { AOM_CDF4(15200, 27221, 30861) }, - { AOM_CDF4(13032, 20873, 25667) }, - { AOM_CDF4(12285, 18663, 23494) }, - { AOM_CDF4(11563, 17481, 21489) }, - { AOM_CDF4(26260, 31982, 32320) }, - { AOM_CDF4(15397, 28083, 31100) }, - { AOM_CDF4(9742, 19217, 24824) }, - { AOM_CDF4(3261, 9629, 15362) }, - { AOM_CDF4(1480, 4322, 7499) }, - { AOM_CDF4(27599, 32256, 32460) }, - { AOM_CDF4(16857, 27659, 30774) }, - { AOM_CDF4(9551, 18290, 23748) }, - { AOM_CDF4(3052, 8933, 14103) }, - { AOM_CDF4(2021, 5910, 9787) }, - { AOM_CDF4(29005, 32015, 32392) }, - { AOM_CDF4(17677, 27694, 30863) }, - { AOM_CDF4(9204, 17356, 23219) }, - { AOM_CDF4(2403, 7516, 12814) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(10808, 22056, 26896) }, - { AOM_CDF4(25739, 32313, 32676) }, - { AOM_CDF4(17288, 30203, 32221) }, - { AOM_CDF4(11359, 24878, 29896) }, - { AOM_CDF4(6949, 17767, 24893) }, - { AOM_CDF4(4287, 11796, 18071) }, - { AOM_CDF4(27880, 32521, 32705) }, - { AOM_CDF4(19038, 31004, 32414) }, - { AOM_CDF4(12564, 26345, 30768) }, - { AOM_CDF4(8269, 19947, 26779) }, - { AOM_CDF4(5674, 14657, 21674) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(25742, 32319, 32671) }, - { AOM_CDF4(19557, 31164, 32454) }, - { AOM_CDF4(13381, 26381, 30755) }, - { AOM_CDF4(10101, 21466, 26722) }, - { AOM_CDF4(9209, 19650, 26825) }, - { AOM_CDF4(27107, 31917, 32432) }, - { AOM_CDF4(18056, 28893, 31203) }, - { AOM_CDF4(10200, 21434, 26764) }, - { AOM_CDF4(4660, 12913, 19502) }, - { AOM_CDF4(2368, 6930, 12504) }, - { AOM_CDF4(26960, 32158, 32613) }, - { AOM_CDF4(18628, 30005, 32031) }, - { AOM_CDF4(10233, 22442, 28232) }, - { AOM_CDF4(5471, 14630, 21516) }, - { AOM_CDF4(3235, 10767, 17109) }, - { AOM_CDF4(27696, 32440, 32692) }, - { AOM_CDF4(20032, 31167, 32438) }, - { AOM_CDF4(8700, 21341, 28442) }, - { AOM_CDF4(5662, 14831, 21795) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } } }, - { { { AOM_CDF4(9704, 17294, 21132) }, - { AOM_CDF4(26762, 32278, 32633) }, - { AOM_CDF4(18382, 29620, 31819) }, - { AOM_CDF4(10891, 23475, 28723) }, - { AOM_CDF4(6358, 16583, 23309) }, - { AOM_CDF4(3248, 9118, 14141) }, - { AOM_CDF4(27204, 32573, 32699) }, - { AOM_CDF4(19818, 30824, 32329) }, - { AOM_CDF4(11772, 25120, 30041) }, - { AOM_CDF4(6995, 18033, 25039) }, - { AOM_CDF4(3752, 10442, 16098) }, - { AOM_CDF4(27222, 32256, 32559) }, - { AOM_CDF4(15356, 28399, 31475) }, - { AOM_CDF4(8821, 20635, 27057) }, - { AOM_CDF4(5511, 14404, 21239) }, - { AOM_CDF4(2935, 8222, 13051) }, - { AOM_CDF4(24875, 32120, 32529) }, - { AOM_CDF4(15233, 28265, 31445) }, - { AOM_CDF4(8605, 20570, 26932) }, - { AOM_CDF4(5431, 14413, 21196) }, - { AOM_CDF4(2994, 8341, 13223) }, - { AOM_CDF4(28201, 32604, 32700) }, - { AOM_CDF4(21041, 31446, 32456) }, - { AOM_CDF4(13221, 26213, 30475) }, - { AOM_CDF4(8255, 19385, 26037) }, - { AOM_CDF4(4930, 12585, 18830) }, - { AOM_CDF4(28768, 32448, 32627) }, - { AOM_CDF4(19705, 30561, 32021) }, - { AOM_CDF4(11572, 23589, 28220) }, - { AOM_CDF4(5532, 15034, 21446) }, - { AOM_CDF4(2460, 7150, 11456) }, - { AOM_CDF4(29874, 32619, 32699) }, - { AOM_CDF4(21621, 31071, 32201) }, - { AOM_CDF4(12511, 24747, 28992) }, - { AOM_CDF4(6281, 16395, 22748) }, - { AOM_CDF4(3246, 9278, 14497) }, - { AOM_CDF4(29715, 32625, 32712) }, - { AOM_CDF4(20958, 31011, 32283) }, - { AOM_CDF4(11233, 23671, 28806) }, - { AOM_CDF4(6012, 16128, 22868) }, - { AOM_CDF4(3427, 9851, 15414) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(11016, 22111, 26794) }, - { AOM_CDF4(25946, 32357, 32677) }, - { AOM_CDF4(17890, 30452, 32252) }, - { AOM_CDF4(11678, 25142, 29816) }, - { AOM_CDF4(6720, 17534, 24584) }, - { AOM_CDF4(4230, 11665, 17820) }, - { AOM_CDF4(28400, 32623, 32747) }, - { AOM_CDF4(21164, 31668, 32575) }, - { AOM_CDF4(13572, 27388, 31182) }, - { AOM_CDF4(8234, 20750, 27358) }, - { AOM_CDF4(5065, 14055, 20897) }, - { AOM_CDF4(28981, 32547, 32705) }, - { AOM_CDF4(18681, 30543, 32239) }, - { AOM_CDF4(10919, 24075, 29286) }, - { AOM_CDF4(6431, 17199, 24077) }, - { AOM_CDF4(3819, 10464, 16618) }, - { AOM_CDF4(26870, 32467, 32693) }, - { AOM_CDF4(19041, 30831, 32347) }, - { AOM_CDF4(11794, 25211, 30016) }, - { AOM_CDF4(6888, 18019, 24970) }, - { AOM_CDF4(4370, 12363, 18992) }, - { AOM_CDF4(29578, 32670, 32744) }, - { AOM_CDF4(23159, 32007, 32613) }, - { AOM_CDF4(15315, 28669, 31676) }, - { AOM_CDF4(9298, 22607, 28782) }, - { AOM_CDF4(6144, 15913, 22968) }, - { AOM_CDF4(28110, 32499, 32669) }, - { AOM_CDF4(21574, 30937, 32015) }, - { AOM_CDF4(12759, 24818, 28727) }, - { AOM_CDF4(6545, 16761, 23042) }, - { AOM_CDF4(3649, 10597, 16833) }, - { AOM_CDF4(28163, 32552, 32728) }, - { AOM_CDF4(22101, 31469, 32464) }, - { AOM_CDF4(13160, 25472, 30143) }, - { AOM_CDF4(7303, 18684, 25468) }, - { AOM_CDF4(5241, 13975, 20955) }, - { AOM_CDF4(28400, 32631, 32744) }, - { AOM_CDF4(22104, 31793, 32603) }, - { AOM_CDF4(13557, 26571, 30846) }, - { AOM_CDF4(7749, 19861, 26675) }, - { AOM_CDF4(4873, 14030, 21234) }, - { AOM_CDF4(8192, 16384, 24576) } } }, - { { { AOM_CDF4(9800, 17635, 21073) }, - { AOM_CDF4(26153, 31885, 32527) }, - { AOM_CDF4(15038, 27852, 31006) }, - { AOM_CDF4(8718, 20564, 26486) }, - { AOM_CDF4(5128, 14076, 20514) }, - { AOM_CDF4(2636, 7566, 11925) }, - { AOM_CDF4(27551, 32504, 32701) }, - { AOM_CDF4(18310, 30054, 32100) }, - { AOM_CDF4(10211, 23420, 29082) }, - { AOM_CDF4(6222, 16876, 23916) }, - { AOM_CDF4(3462, 9954, 15498) }, - { AOM_CDF4(29991, 32633, 32721) }, - { AOM_CDF4(19883, 30751, 32201) }, - { AOM_CDF4(11141, 24184, 29285) }, - { AOM_CDF4(6420, 16940, 23774) }, - { AOM_CDF4(3392, 9753, 15118) }, - { AOM_CDF4(28465, 32616, 32712) }, - { AOM_CDF4(19850, 30702, 32244) }, - { AOM_CDF4(10983, 24024, 29223) }, - { AOM_CDF4(6294, 16770, 23582) }, - { AOM_CDF4(3244, 9283, 14509) }, - { AOM_CDF4(30023, 32717, 32748) }, - { AOM_CDF4(22940, 32032, 32626) }, - { AOM_CDF4(14282, 27928, 31473) }, - { AOM_CDF4(8562, 21327, 27914) }, - { AOM_CDF4(4846, 13393, 19919) }, - { AOM_CDF4(29981, 32590, 32695) }, - { AOM_CDF4(20465, 30963, 32166) }, - { AOM_CDF4(11479, 23579, 28195) }, - { AOM_CDF4(5916, 15648, 22073) }, - { AOM_CDF4(3031, 8605, 13398) }, - { AOM_CDF4(31146, 32691, 32739) }, - { AOM_CDF4(23106, 31724, 32444) }, - { AOM_CDF4(13783, 26738, 30439) }, - { AOM_CDF4(7852, 19468, 25807) }, - { AOM_CDF4(3860, 11124, 16853) }, - { AOM_CDF4(31014, 32724, 32748) }, - { AOM_CDF4(23629, 32109, 32628) }, - { AOM_CDF4(14747, 28115, 31403) }, - { AOM_CDF4(8545, 21242, 27478) }, - { AOM_CDF4(4574, 12781, 19067) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(9185, 19694, 24688) }, - { AOM_CDF4(26081, 31985, 32621) }, - { AOM_CDF4(16015, 29000, 31787) }, - { AOM_CDF4(10542, 23690, 29206) }, - { AOM_CDF4(6732, 17945, 24677) }, - { AOM_CDF4(3916, 11039, 16722) }, - { AOM_CDF4(28224, 32566, 32744) }, - { AOM_CDF4(19100, 31138, 32485) }, - { AOM_CDF4(12528, 26620, 30879) }, - { AOM_CDF4(7741, 20277, 26885) }, - { AOM_CDF4(4566, 12845, 18990) }, - { AOM_CDF4(29933, 32593, 32718) }, - { AOM_CDF4(17670, 30333, 32155) }, - { AOM_CDF4(10385, 23600, 28909) }, - { AOM_CDF4(6243, 16236, 22407) }, - { AOM_CDF4(3976, 10389, 16017) }, - { AOM_CDF4(28377, 32561, 32738) }, - { AOM_CDF4(19366, 31175, 32482) }, - { AOM_CDF4(13327, 27175, 31094) }, - { AOM_CDF4(8258, 20769, 27143) }, - { AOM_CDF4(4703, 13198, 19527) }, - { AOM_CDF4(31086, 32706, 32748) }, - { AOM_CDF4(22853, 31902, 32583) }, - { AOM_CDF4(14759, 28186, 31419) }, - { AOM_CDF4(9284, 22382, 28348) }, - { AOM_CDF4(5585, 15192, 21868) }, - { AOM_CDF4(28291, 32652, 32746) }, - { AOM_CDF4(19849, 32107, 32571) }, - { AOM_CDF4(14834, 26818, 29214) }, - { AOM_CDF4(10306, 22594, 28672) }, - { AOM_CDF4(6615, 17384, 23384) }, - { AOM_CDF4(28947, 32604, 32745) }, - { AOM_CDF4(25625, 32289, 32646) }, - { AOM_CDF4(18758, 28672, 31403) }, - { AOM_CDF4(10017, 23430, 28523) }, - { AOM_CDF4(6862, 15269, 22131) }, - { AOM_CDF4(23933, 32509, 32739) }, - { AOM_CDF4(19927, 31495, 32631) }, - { AOM_CDF4(11903, 26023, 30621) }, - { AOM_CDF4(7026, 20094, 27252) }, - { AOM_CDF4(5998, 18106, 24437) }, - { AOM_CDF4(8192, 16384, 24576) } } }, - { { { AOM_CDF4(4456, 11274, 15533) }, - { AOM_CDF4(21219, 29079, 31616) }, - { AOM_CDF4(11173, 23774, 28567) }, - { AOM_CDF4(7282, 18293, 24263) }, - { AOM_CDF4(4890, 13286, 19115) }, - { AOM_CDF4(1890, 5508, 8659) }, - { AOM_CDF4(26651, 32136, 32647) }, - { AOM_CDF4(14630, 28254, 31455) }, - { AOM_CDF4(8716, 21287, 27395) }, - { AOM_CDF4(5615, 15331, 22008) }, - { AOM_CDF4(2675, 7700, 12150) }, - { AOM_CDF4(29954, 32526, 32690) }, - { AOM_CDF4(16126, 28982, 31633) }, - { AOM_CDF4(9030, 21361, 27352) }, - { AOM_CDF4(5411, 14793, 21271) }, - { AOM_CDF4(2943, 8422, 13163) }, - { AOM_CDF4(29539, 32601, 32730) }, - { AOM_CDF4(18125, 30385, 32201) }, - { AOM_CDF4(10422, 24090, 29468) }, - { AOM_CDF4(6468, 17487, 24438) }, - { AOM_CDF4(2970, 8653, 13531) }, - { AOM_CDF4(30912, 32715, 32748) }, - { AOM_CDF4(20666, 31373, 32497) }, - { AOM_CDF4(12509, 26640, 30917) }, - { AOM_CDF4(8058, 20629, 27290) }, - { AOM_CDF4(4231, 12006, 18052) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(10202, 20633, 25484) }, - { AOM_CDF4(27336, 31445, 32352) }, - { AOM_CDF4(12420, 24384, 28552) }, - { AOM_CDF4(7648, 18115, 23856) }, - { AOM_CDF4(5662, 14341, 19902) }, - { AOM_CDF4(3611, 10328, 15390) }, - { AOM_CDF4(30945, 32616, 32736) }, - { AOM_CDF4(18682, 30505, 32253) }, - { AOM_CDF4(11513, 25336, 30203) }, - { AOM_CDF4(7449, 19452, 26148) }, - { AOM_CDF4(4482, 13051, 18886) }, - { AOM_CDF4(32022, 32690, 32747) }, - { AOM_CDF4(18578, 30501, 32146) }, - { AOM_CDF4(11249, 23368, 28631) }, - { AOM_CDF4(5645, 16958, 22158) }, - { AOM_CDF4(5009, 11444, 16637) }, - { AOM_CDF4(31357, 32710, 32748) }, - { AOM_CDF4(21552, 31494, 32504) }, - { AOM_CDF4(13891, 27677, 31340) }, - { AOM_CDF4(9051, 22098, 28172) }, - { AOM_CDF4(5190, 13377, 19486) }, - { AOM_CDF4(32364, 32740, 32748) }, - { AOM_CDF4(24839, 31907, 32551) }, - { AOM_CDF4(17160, 28779, 31696) }, - { AOM_CDF4(12452, 24137, 29602) }, - { AOM_CDF4(6165, 15389, 22477) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } } }, - { { { AOM_CDF4(2575, 7281, 11077) }, - { AOM_CDF4(14002, 20866, 25402) }, - { AOM_CDF4(6343, 15056, 19658) }, - { AOM_CDF4(4474, 11858, 17041) }, - { AOM_CDF4(2865, 8299, 12534) }, - { AOM_CDF4(1344, 3949, 6391) }, - { AOM_CDF4(24720, 31239, 32459) }, - { AOM_CDF4(12585, 25356, 29968) }, - { AOM_CDF4(7181, 18246, 24444) }, - { AOM_CDF4(5025, 13667, 19885) }, - { AOM_CDF4(2521, 7304, 11605) }, - { AOM_CDF4(29908, 32252, 32584) }, - { AOM_CDF4(17421, 29156, 31575) }, - { AOM_CDF4(9889, 22188, 27782) }, - { AOM_CDF4(5878, 15647, 22123) }, - { AOM_CDF4(2814, 8665, 13323) }, - { AOM_CDF4(30183, 32568, 32713) }, - { AOM_CDF4(18528, 30195, 32049) }, - { AOM_CDF4(10982, 24606, 29657) }, - { AOM_CDF4(6957, 18165, 25231) }, - { AOM_CDF4(3508, 10118, 15468) }, - { AOM_CDF4(31761, 32736, 32748) }, - { AOM_CDF4(21041, 31328, 32546) }, - { AOM_CDF4(12568, 26732, 31166) }, - { AOM_CDF4(8052, 20720, 27733) }, - { AOM_CDF4(4336, 12192, 18396) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } } } }, - { { { { AOM_CDF4(7062, 16472, 22319) }, - { AOM_CDF4(24538, 32261, 32674) }, - { AOM_CDF4(13675, 28041, 31779) }, - { AOM_CDF4(8590, 20674, 27631) }, - { AOM_CDF4(5685, 14675, 22013) }, - { AOM_CDF4(3655, 9898, 15731) }, - { AOM_CDF4(26493, 32418, 32658) }, - { AOM_CDF4(16376, 29342, 32090) }, - { AOM_CDF4(10594, 22649, 28970) }, - { AOM_CDF4(8176, 17170, 24303) }, - { AOM_CDF4(5605, 12694, 19139) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(23888, 31902, 32542) }, - { AOM_CDF4(18612, 29687, 31987) }, - { AOM_CDF4(16245, 24852, 29249) }, - { AOM_CDF4(15765, 22608, 27559) }, - { AOM_CDF4(19895, 24699, 27510) }, - { AOM_CDF4(28401, 32212, 32457) }, - { AOM_CDF4(15274, 27825, 30980) }, - { AOM_CDF4(9364, 18128, 24332) }, - { AOM_CDF4(2283, 8193, 15082) }, - { AOM_CDF4(1228, 3972, 7881) }, - { AOM_CDF4(29455, 32469, 32620) }, - { AOM_CDF4(17981, 28245, 31388) }, - { AOM_CDF4(10921, 20098, 26240) }, - { AOM_CDF4(3743, 11829, 18657) }, - { AOM_CDF4(2374, 9593, 15715) }, - { AOM_CDF4(31068, 32466, 32635) }, - { AOM_CDF4(20321, 29572, 31971) }, - { AOM_CDF4(10771, 20255, 27119) }, - { AOM_CDF4(2795, 10410, 17361) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(9320, 22102, 27840) }, - { AOM_CDF4(27057, 32464, 32724) }, - { AOM_CDF4(16331, 30268, 32309) }, - { AOM_CDF4(10319, 23935, 29720) }, - { AOM_CDF4(6189, 16448, 24106) }, - { AOM_CDF4(3589, 10884, 18808) }, - { AOM_CDF4(29026, 32624, 32748) }, - { AOM_CDF4(19226, 31507, 32587) }, - { AOM_CDF4(12692, 26921, 31203) }, - { AOM_CDF4(7049, 19532, 27635) }, - { AOM_CDF4(7727, 15669, 23252) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(28056, 32625, 32748) }, - { AOM_CDF4(22383, 32075, 32669) }, - { AOM_CDF4(15417, 27098, 31749) }, - { AOM_CDF4(18127, 26493, 27190) }, - { AOM_CDF4(5461, 16384, 21845) }, - { AOM_CDF4(27982, 32091, 32584) }, - { AOM_CDF4(19045, 29868, 31972) }, - { AOM_CDF4(10397, 22266, 27932) }, - { AOM_CDF4(5990, 13697, 21500) }, - { AOM_CDF4(1792, 6912, 15104) }, - { AOM_CDF4(28198, 32501, 32718) }, - { AOM_CDF4(21534, 31521, 32569) }, - { AOM_CDF4(11109, 25217, 30017) }, - { AOM_CDF4(5671, 15124, 26151) }, - { AOM_CDF4(4681, 14043, 18725) }, - { AOM_CDF4(28688, 32580, 32741) }, - { AOM_CDF4(22576, 32079, 32661) }, - { AOM_CDF4(10627, 22141, 28340) }, - { AOM_CDF4(9362, 14043, 28087) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } } }, - { { { AOM_CDF4(7754, 16948, 22142) }, - { AOM_CDF4(25670, 32330, 32691) }, - { AOM_CDF4(15663, 29225, 31994) }, - { AOM_CDF4(9878, 23288, 29158) }, - { AOM_CDF4(6419, 17088, 24336) }, - { AOM_CDF4(3859, 11003, 17039) }, - { AOM_CDF4(27562, 32595, 32725) }, - { AOM_CDF4(17575, 30588, 32399) }, - { AOM_CDF4(10819, 24838, 30309) }, - { AOM_CDF4(7124, 18686, 25916) }, - { AOM_CDF4(4479, 12688, 19340) }, - { AOM_CDF4(28385, 32476, 32673) }, - { AOM_CDF4(15306, 29005, 31938) }, - { AOM_CDF4(8937, 21615, 28322) }, - { AOM_CDF4(5982, 15603, 22786) }, - { AOM_CDF4(3620, 10267, 16136) }, - { AOM_CDF4(27280, 32464, 32667) }, - { AOM_CDF4(15607, 29160, 32004) }, - { AOM_CDF4(9091, 22135, 28740) }, - { AOM_CDF4(6232, 16632, 24020) }, - { AOM_CDF4(4047, 11377, 17672) }, - { AOM_CDF4(29220, 32630, 32718) }, - { AOM_CDF4(19650, 31220, 32462) }, - { AOM_CDF4(13050, 26312, 30827) }, - { AOM_CDF4(9228, 20870, 27468) }, - { AOM_CDF4(6146, 15149, 21971) }, - { AOM_CDF4(30169, 32481, 32623) }, - { AOM_CDF4(17212, 29311, 31554) }, - { AOM_CDF4(9911, 21311, 26882) }, - { AOM_CDF4(4487, 13314, 20372) }, - { AOM_CDF4(2570, 7772, 12889) }, - { AOM_CDF4(30924, 32613, 32708) }, - { AOM_CDF4(19490, 30206, 32107) }, - { AOM_CDF4(11232, 23998, 29276) }, - { AOM_CDF4(6769, 17955, 25035) }, - { AOM_CDF4(4398, 12623, 19214) }, - { AOM_CDF4(30609, 32627, 32722) }, - { AOM_CDF4(19370, 30582, 32287) }, - { AOM_CDF4(10457, 23619, 29409) }, - { AOM_CDF4(6443, 17637, 24834) }, - { AOM_CDF4(4645, 13236, 20106) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(8626, 20271, 26216) }, - { AOM_CDF4(26707, 32406, 32711) }, - { AOM_CDF4(16999, 30329, 32286) }, - { AOM_CDF4(11445, 25123, 30286) }, - { AOM_CDF4(6411, 18828, 25601) }, - { AOM_CDF4(6801, 12458, 20248) }, - { AOM_CDF4(29918, 32682, 32748) }, - { AOM_CDF4(20649, 31739, 32618) }, - { AOM_CDF4(12879, 27773, 31581) }, - { AOM_CDF4(7896, 21751, 28244) }, - { AOM_CDF4(5260, 14870, 23698) }, - { AOM_CDF4(29252, 32593, 32731) }, - { AOM_CDF4(17072, 30460, 32294) }, - { AOM_CDF4(10653, 24143, 29365) }, - { AOM_CDF4(6536, 17490, 23983) }, - { AOM_CDF4(4929, 13170, 20085) }, - { AOM_CDF4(28137, 32518, 32715) }, - { AOM_CDF4(18171, 30784, 32407) }, - { AOM_CDF4(11437, 25436, 30459) }, - { AOM_CDF4(7252, 18534, 26176) }, - { AOM_CDF4(4126, 13353, 20978) }, - { AOM_CDF4(31162, 32726, 32748) }, - { AOM_CDF4(23017, 32222, 32701) }, - { AOM_CDF4(15629, 29233, 32046) }, - { AOM_CDF4(9387, 22621, 29480) }, - { AOM_CDF4(6922, 17616, 25010) }, - { AOM_CDF4(28838, 32265, 32614) }, - { AOM_CDF4(19701, 30206, 31920) }, - { AOM_CDF4(11214, 22410, 27933) }, - { AOM_CDF4(5320, 14177, 23034) }, - { AOM_CDF4(5049, 12881, 17827) }, - { AOM_CDF4(27484, 32471, 32734) }, - { AOM_CDF4(21076, 31526, 32561) }, - { AOM_CDF4(12707, 26303, 31211) }, - { AOM_CDF4(8169, 21722, 28219) }, - { AOM_CDF4(6045, 19406, 27042) }, - { AOM_CDF4(27753, 32572, 32745) }, - { AOM_CDF4(20832, 31878, 32653) }, - { AOM_CDF4(13250, 27356, 31674) }, - { AOM_CDF4(7718, 21508, 29858) }, - { AOM_CDF4(7209, 18350, 25559) }, - { AOM_CDF4(8192, 16384, 24576) } } }, - { { { AOM_CDF4(7876, 16901, 21741) }, - { AOM_CDF4(24001, 31898, 32625) }, - { AOM_CDF4(14529, 27959, 31451) }, - { AOM_CDF4(8273, 20818, 27258) }, - { AOM_CDF4(5278, 14673, 21510) }, - { AOM_CDF4(2983, 8843, 14039) }, - { AOM_CDF4(28016, 32574, 32732) }, - { AOM_CDF4(17471, 30306, 32301) }, - { AOM_CDF4(10224, 24063, 29728) }, - { AOM_CDF4(6602, 17954, 25052) }, - { AOM_CDF4(4002, 11585, 17759) }, - { AOM_CDF4(30190, 32634, 32739) }, - { AOM_CDF4(17497, 30282, 32270) }, - { AOM_CDF4(10229, 23729, 29538) }, - { AOM_CDF4(6344, 17211, 24440) }, - { AOM_CDF4(3849, 11189, 17108) }, - { AOM_CDF4(28570, 32583, 32726) }, - { AOM_CDF4(17521, 30161, 32238) }, - { AOM_CDF4(10153, 23565, 29378) }, - { AOM_CDF4(6455, 17341, 24443) }, - { AOM_CDF4(3907, 11042, 17024) }, - { AOM_CDF4(30689, 32715, 32748) }, - { AOM_CDF4(21546, 31840, 32610) }, - { AOM_CDF4(13547, 27581, 31459) }, - { AOM_CDF4(8912, 21757, 28309) }, - { AOM_CDF4(5548, 15080, 22046) }, - { AOM_CDF4(30783, 32540, 32685) }, - { AOM_CDF4(17540, 29528, 31668) }, - { AOM_CDF4(10160, 21468, 26783) }, - { AOM_CDF4(4724, 13393, 20054) }, - { AOM_CDF4(2702, 8174, 13102) }, - { AOM_CDF4(31648, 32686, 32742) }, - { AOM_CDF4(20954, 31094, 32337) }, - { AOM_CDF4(12420, 25698, 30179) }, - { AOM_CDF4(7304, 19320, 26248) }, - { AOM_CDF4(4366, 12261, 18864) }, - { AOM_CDF4(31581, 32723, 32748) }, - { AOM_CDF4(21373, 31586, 32525) }, - { AOM_CDF4(12744, 26625, 30885) }, - { AOM_CDF4(7431, 20322, 26950) }, - { AOM_CDF4(4692, 13323, 20111) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(7833, 18369, 24095) }, - { AOM_CDF4(26650, 32273, 32702) }, - { AOM_CDF4(16371, 29961, 32191) }, - { AOM_CDF4(11055, 24082, 29629) }, - { AOM_CDF4(6892, 18644, 25400) }, - { AOM_CDF4(5006, 13057, 19240) }, - { AOM_CDF4(29834, 32666, 32748) }, - { AOM_CDF4(19577, 31335, 32570) }, - { AOM_CDF4(12253, 26509, 31122) }, - { AOM_CDF4(7991, 20772, 27711) }, - { AOM_CDF4(5677, 15910, 23059) }, - { AOM_CDF4(30109, 32532, 32720) }, - { AOM_CDF4(16747, 30166, 32252) }, - { AOM_CDF4(10134, 23542, 29184) }, - { AOM_CDF4(5791, 16176, 23556) }, - { AOM_CDF4(4362, 10414, 17284) }, - { AOM_CDF4(29492, 32626, 32748) }, - { AOM_CDF4(19894, 31402, 32525) }, - { AOM_CDF4(12942, 27071, 30869) }, - { AOM_CDF4(8346, 21216, 27405) }, - { AOM_CDF4(6572, 17087, 23859) }, - { AOM_CDF4(32035, 32735, 32748) }, - { AOM_CDF4(22957, 31838, 32618) }, - { AOM_CDF4(14724, 28572, 31772) }, - { AOM_CDF4(10364, 23999, 29553) }, - { AOM_CDF4(7004, 18433, 25655) }, - { AOM_CDF4(27528, 32277, 32681) }, - { AOM_CDF4(16959, 31171, 32096) }, - { AOM_CDF4(10486, 23593, 27962) }, - { AOM_CDF4(8192, 16384, 23211) }, - { AOM_CDF4(8937, 17873, 20852) }, - { AOM_CDF4(27715, 32002, 32615) }, - { AOM_CDF4(15073, 29491, 31676) }, - { AOM_CDF4(11264, 24576, 28672) }, - { AOM_CDF4(2341, 18725, 23406) }, - { AOM_CDF4(7282, 18204, 25486) }, - { AOM_CDF4(28547, 32213, 32657) }, - { AOM_CDF4(20788, 29773, 32239) }, - { AOM_CDF4(6780, 21469, 30508) }, - { AOM_CDF4(5958, 14895, 23831) }, - { AOM_CDF4(16384, 21845, 27307) }, - { AOM_CDF4(8192, 16384, 24576) } } }, - { { { AOM_CDF4(5992, 14304, 19765) }, - { AOM_CDF4(22612, 31238, 32456) }, - { AOM_CDF4(13456, 27162, 31087) }, - { AOM_CDF4(8001, 20062, 26504) }, - { AOM_CDF4(5168, 14105, 20764) }, - { AOM_CDF4(2632, 7771, 12385) }, - { AOM_CDF4(27034, 32344, 32709) }, - { AOM_CDF4(15850, 29415, 31997) }, - { AOM_CDF4(9494, 22776, 28841) }, - { AOM_CDF4(6151, 16830, 23969) }, - { AOM_CDF4(3461, 10039, 15722) }, - { AOM_CDF4(30134, 32569, 32731) }, - { AOM_CDF4(15638, 29422, 31945) }, - { AOM_CDF4(9150, 21865, 28218) }, - { AOM_CDF4(5647, 15719, 22676) }, - { AOM_CDF4(3402, 9772, 15477) }, - { AOM_CDF4(28530, 32586, 32735) }, - { AOM_CDF4(17139, 30298, 32292) }, - { AOM_CDF4(10200, 24039, 29685) }, - { AOM_CDF4(6419, 17674, 24786) }, - { AOM_CDF4(3544, 10225, 15824) }, - { AOM_CDF4(31333, 32726, 32748) }, - { AOM_CDF4(20618, 31487, 32544) }, - { AOM_CDF4(12901, 27217, 31232) }, - { AOM_CDF4(8624, 21734, 28171) }, - { AOM_CDF4(5104, 14191, 20748) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(11206, 21090, 26561) }, - { AOM_CDF4(28759, 32279, 32671) }, - { AOM_CDF4(14171, 27952, 31569) }, - { AOM_CDF4(9743, 22907, 29141) }, - { AOM_CDF4(6871, 17886, 24868) }, - { AOM_CDF4(4960, 13152, 19315) }, - { AOM_CDF4(31077, 32661, 32748) }, - { AOM_CDF4(19400, 31195, 32515) }, - { AOM_CDF4(12752, 26858, 31040) }, - { AOM_CDF4(8370, 22098, 28591) }, - { AOM_CDF4(5457, 15373, 22298) }, - { AOM_CDF4(31697, 32706, 32748) }, - { AOM_CDF4(17860, 30657, 32333) }, - { AOM_CDF4(12510, 24812, 29261) }, - { AOM_CDF4(6180, 19124, 24722) }, - { AOM_CDF4(5041, 13548, 17959) }, - { AOM_CDF4(31552, 32716, 32748) }, - { AOM_CDF4(21908, 31769, 32623) }, - { AOM_CDF4(14470, 28201, 31565) }, - { AOM_CDF4(9493, 22982, 28608) }, - { AOM_CDF4(6858, 17240, 24137) }, - { AOM_CDF4(32543, 32752, 32756) }, - { AOM_CDF4(24286, 32097, 32666) }, - { AOM_CDF4(15958, 29217, 32024) }, - { AOM_CDF4(10207, 24234, 29958) }, - { AOM_CDF4(6929, 18305, 25652) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } } }, - { { { AOM_CDF4(4137, 10847, 15682) }, - { AOM_CDF4(17824, 27001, 30058) }, - { AOM_CDF4(10204, 22796, 28291) }, - { AOM_CDF4(6076, 15935, 22125) }, - { AOM_CDF4(3852, 10937, 16816) }, - { AOM_CDF4(2252, 6324, 10131) }, - { AOM_CDF4(25840, 32016, 32662) }, - { AOM_CDF4(15109, 28268, 31531) }, - { AOM_CDF4(9385, 22231, 28340) }, - { AOM_CDF4(6082, 16672, 23479) }, - { AOM_CDF4(3318, 9427, 14681) }, - { AOM_CDF4(30594, 32574, 32718) }, - { AOM_CDF4(16836, 29552, 31859) }, - { AOM_CDF4(9556, 22542, 28356) }, - { AOM_CDF4(6305, 16725, 23540) }, - { AOM_CDF4(3376, 9895, 15184) }, - { AOM_CDF4(29383, 32617, 32745) }, - { AOM_CDF4(18891, 30809, 32401) }, - { AOM_CDF4(11688, 25942, 30687) }, - { AOM_CDF4(7468, 19469, 26651) }, - { AOM_CDF4(3909, 11358, 17012) }, - { AOM_CDF4(31564, 32736, 32748) }, - { AOM_CDF4(20906, 31611, 32600) }, - { AOM_CDF4(13191, 27621, 31537) }, - { AOM_CDF4(8768, 22029, 28676) }, - { AOM_CDF4(5079, 14109, 20906) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } }, - { { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) }, - { AOM_CDF4(8192, 16384, 24576) } } } } }; - -static const aom_cdf_prob av1_default_coeff_base_eob_multi_cdfs - [TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS_EOB][CDF_SIZE( - NUM_BASE_LEVELS + 1)] = { { { { { AOM_CDF3(17837, 29055) }, - { AOM_CDF3(29600, 31446) }, - { AOM_CDF3(30844, 31878) }, - { AOM_CDF3(24926, 28948) } }, - { { AOM_CDF3(21365, 30026) }, - { AOM_CDF3(30512, 32423) }, - { AOM_CDF3(31658, 32621) }, - { AOM_CDF3(29630, 31881) } } }, - { { { AOM_CDF3(5717, 26477) }, - { AOM_CDF3(30491, 31703) }, - { AOM_CDF3(31550, 32158) }, - { AOM_CDF3(29648, 31491) } }, - { { AOM_CDF3(12608, 27820) }, - { AOM_CDF3(30680, 32225) }, - { AOM_CDF3(30809, 32335) }, - { AOM_CDF3(31299, 32423) } } }, - { { { AOM_CDF3(1786, 12612) }, - { AOM_CDF3(30663, 31625) }, - { AOM_CDF3(32339, 32468) }, - { AOM_CDF3(31148, 31833) } }, - { { AOM_CDF3(18857, 23865) }, - { AOM_CDF3(31428, 32428) }, - { AOM_CDF3(31744, 32373) }, - { AOM_CDF3(31775, 32526) } } }, - { { { AOM_CDF3(1787, 2532) }, - { AOM_CDF3(30832, 31662) }, - { AOM_CDF3(31824, 32682) }, - { AOM_CDF3(32133, 32569) } }, - { { AOM_CDF3(13751, 22235) }, - { AOM_CDF3(32089, 32409) }, - { AOM_CDF3(27084, 27920) }, - { AOM_CDF3(29291, 32594) } } }, - { { { AOM_CDF3(1725, 3449) }, - { AOM_CDF3(31102, 31935) }, - { AOM_CDF3(32457, 32613) }, - { AOM_CDF3(32412, 32649) } }, - { { AOM_CDF3(10923, 21845) }, - { AOM_CDF3(10923, 21845) }, - { AOM_CDF3(10923, 21845) }, - { AOM_CDF3(10923, 21845) } } } }, - { { { { AOM_CDF3(17560, 29888) }, - { AOM_CDF3(29671, 31549) }, - { AOM_CDF3(31007, 32056) }, - { AOM_CDF3(27286, 30006) } }, - { { AOM_CDF3(26594, 31212) }, - { AOM_CDF3(31208, 32582) }, - { AOM_CDF3(31835, 32637) }, - { AOM_CDF3(30595, 32206) } } }, - { { { AOM_CDF3(15239, 29932) }, - { AOM_CDF3(31315, 32095) }, - { AOM_CDF3(32130, 32434) }, - { AOM_CDF3(30864, 31996) } }, - { { AOM_CDF3(26279, 30968) }, - { AOM_CDF3(31142, 32495) }, - { AOM_CDF3(31713, 32540) }, - { AOM_CDF3(31929, 32594) } } }, - { { { AOM_CDF3(2644, 25198) }, - { AOM_CDF3(32038, 32451) }, - { AOM_CDF3(32639, 32695) }, - { AOM_CDF3(32166, 32518) } }, - { { AOM_CDF3(17187, 27668) }, - { AOM_CDF3(31714, 32550) }, - { AOM_CDF3(32283, 32678) }, - { AOM_CDF3(31930, 32563) } } }, - { { { AOM_CDF3(1044, 2257) }, - { AOM_CDF3(30755, 31923) }, - { AOM_CDF3(32208, 32693) }, - { AOM_CDF3(32244, 32615) } }, - { { AOM_CDF3(21317, 26207) }, - { AOM_CDF3(29133, 30868) }, - { AOM_CDF3(29311, 31231) }, - { AOM_CDF3(29657, 31087) } } }, - { { { AOM_CDF3(478, 1834) }, - { AOM_CDF3(31005, 31987) }, - { AOM_CDF3(32317, 32724) }, - { AOM_CDF3(30865, 32648) } }, - { { AOM_CDF3(10923, 21845) }, - { AOM_CDF3(10923, 21845) }, - { AOM_CDF3(10923, 21845) }, - { AOM_CDF3(10923, 21845) } } } }, - { { { { AOM_CDF3(20092, 30774) }, - { AOM_CDF3(30695, 32020) }, - { AOM_CDF3(31131, 32103) }, - { AOM_CDF3(28666, 30870) } }, - { { AOM_CDF3(27258, 31095) }, - { AOM_CDF3(31804, 32623) }, - { AOM_CDF3(31763, 32528) }, - { AOM_CDF3(31438, 32506) } } }, - { { { AOM_CDF3(18049, 30489) }, - { AOM_CDF3(31706, 32286) }, - { AOM_CDF3(32163, 32473) }, - { AOM_CDF3(31550, 32184) } }, - { { AOM_CDF3(27116, 30842) }, - { AOM_CDF3(31971, 32598) }, - { AOM_CDF3(32088, 32576) }, - { AOM_CDF3(32067, 32664) } } }, - { { { AOM_CDF3(12854, 29093) }, - { AOM_CDF3(32272, 32558) }, - { AOM_CDF3(32667, 32729) }, - { AOM_CDF3(32306, 32585) } }, - { { AOM_CDF3(25476, 30366) }, - { AOM_CDF3(32169, 32687) }, - { AOM_CDF3(32479, 32689) }, - { AOM_CDF3(31673, 32634) } } }, - { { { AOM_CDF3(2809, 19301) }, - { AOM_CDF3(32205, 32622) }, - { AOM_CDF3(32338, 32730) }, - { AOM_CDF3(31786, 32616) } }, - { { AOM_CDF3(22737, 29105) }, - { AOM_CDF3(30810, 32362) }, - { AOM_CDF3(30014, 32627) }, - { AOM_CDF3(30528, 32574) } } }, - { { { AOM_CDF3(935, 3382) }, - { AOM_CDF3(30789, 31909) }, - { AOM_CDF3(32466, 32756) }, - { AOM_CDF3(30860, 32513) } }, - { { AOM_CDF3(10923, 21845) }, - { AOM_CDF3(10923, 21845) }, - { AOM_CDF3(10923, 21845) }, - { AOM_CDF3(10923, 21845) } } } }, - { { { { AOM_CDF3(22497, 31198) }, - { AOM_CDF3(31715, 32495) }, - { AOM_CDF3(31606, 32337) }, - { AOM_CDF3(30388, 31990) } }, - { { AOM_CDF3(27877, 31584) }, - { AOM_CDF3(32170, 32728) }, - { AOM_CDF3(32155, 32688) }, - { AOM_CDF3(32219, 32702) } } }, - { { { AOM_CDF3(21457, 31043) }, - { AOM_CDF3(31951, 32483) }, - { AOM_CDF3(32153, 32562) }, - { AOM_CDF3(31473, 32215) } }, - { { AOM_CDF3(27558, 31151) }, - { AOM_CDF3(32020, 32640) }, - { AOM_CDF3(32097, 32575) }, - { AOM_CDF3(32242, 32719) } } }, - { { { AOM_CDF3(19980, 30591) }, - { AOM_CDF3(32219, 32597) }, - { AOM_CDF3(32581, 32706) }, - { AOM_CDF3(31803, 32287) } }, - { { AOM_CDF3(26473, 30507) }, - { AOM_CDF3(32431, 32723) }, - { AOM_CDF3(32196, 32611) }, - { AOM_CDF3(31588, 32528) } } }, - { { { AOM_CDF3(24647, 30463) }, - { AOM_CDF3(32412, 32695) }, - { AOM_CDF3(32468, 32720) }, - { AOM_CDF3(31269, 32523) } }, - { { AOM_CDF3(28482, 31505) }, - { AOM_CDF3(32152, 32701) }, - { AOM_CDF3(31732, 32598) }, - { AOM_CDF3(31767, 32712) } } }, - { { { AOM_CDF3(12358, 24977) }, - { AOM_CDF3(31331, 32385) }, - { AOM_CDF3(32634, 32756) }, - { AOM_CDF3(30411, 32548) } }, - { { AOM_CDF3(10923, 21845) }, - { AOM_CDF3(10923, 21845) }, - { AOM_CDF3(10923, 21845) }, - { AOM_CDF3(10923, 21845) } } } } }; - -#endif // AOM_AV1_COMMON_TOKEN_CDFS_H_ diff --git a/third_party/aom/av1/common/txb_common.c b/third_party/aom/av1/common/txb_common.c deleted file mode 100644 index c96d37cca..000000000 --- a/third_party/aom/av1/common/txb_common.c +++ /dev/null @@ -1,475 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#include "aom/aom_integer.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/txb_common.h" - -const int8_t av1_coeff_band_4x4[16] = { 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15 }; - -const int8_t av1_coeff_band_8x8[64] = { - 0, 1, 2, 2, 3, 3, 4, 4, 5, 6, 2, 2, 3, 3, 4, 4, - 7, 7, 8, 8, 9, 9, 10, 10, 7, 7, 8, 8, 9, 9, 10, 10, - 11, 11, 12, 12, 13, 13, 14, 14, 11, 11, 12, 12, 13, 13, 14, 14, - 15, 15, 16, 16, 17, 17, 18, 18, 15, 15, 16, 16, 17, 17, 18, 18, -}; - -const int8_t av1_coeff_band_16x16[256] = { - 0, 1, 4, 4, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 2, 3, 4, - 4, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 5, 5, 6, 6, 7, 7, - 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 5, 5, 6, 6, 7, 7, 7, 7, 8, - 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, - 13, 13, 13, 13, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, - 13, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 10, 10, - 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, - 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 17, 14, 14, 14, 14, 15, 15, 15, 15, - 16, 16, 16, 16, 17, 17, 17, 17, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, - 16, 17, 17, 17, 17, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, 17, - 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 18, - 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 18, 18, 18, 18, - 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 18, 18, 18, 18, 19, 19, 19, - 19, 20, 20, 20, 20, 21, 21, 21, 21, -}; - -const int8_t av1_coeff_band_32x32[1024] = { - 0, 1, 4, 4, 7, 7, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, - 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 2, 3, 4, 4, 7, 7, - 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, - 12, 12, 12, 12, 12, 12, 12, 5, 5, 6, 6, 7, 7, 7, 7, 10, 10, 10, 10, - 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, - 12, 5, 5, 6, 6, 7, 7, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, - 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 8, 8, 8, 8, 9, - 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, - 12, 12, 12, 12, 12, 12, 12, 12, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, - 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, - 12, 12, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, - 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 8, 8, 8, 8, - 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, - 11, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, - 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, - 16, 16, 16, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, - 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 13, 13, 13, - 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, - 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 13, 13, 13, 13, 13, 13, 13, 13, 14, - 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, - 16, 16, 16, 16, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, - 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 13, 13, - 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, - 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 13, 13, 13, 13, 13, 13, 13, 13, - 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, - 16, 16, 16, 16, 16, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, - 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 17, - 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, - 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 17, 17, 17, 17, 17, 17, 17, - 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, - 20, 20, 20, 20, 20, 20, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, - 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, - 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, - 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 17, 17, 17, 17, 17, 17, - 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, - 20, 20, 20, 20, 20, 20, 20, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, - 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, - 20, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, - 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 17, 17, 17, 17, 17, - 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, - 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, - 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, - 24, 24, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, - 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 21, 21, 21, 21, - 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, - 23, 24, 24, 24, 24, 24, 24, 24, 24, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, - 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, - 24, 24, 24, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, - 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 21, 21, 21, - 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, - 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 21, 21, 21, 21, 21, 21, 21, 21, 22, - 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, - 24, 24, 24, 24, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, - 22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, -}; - -// The ctx offset table when TX is TX_CLASS_2D. -// TX col and row indices are clamped to 4 - -const int8_t av1_nz_map_ctx_offset_4x4[16] = { - 0, 1, 6, 6, 1, 6, 6, 21, 6, 6, 21, 21, 6, 21, 21, 21, -}; - -const int8_t av1_nz_map_ctx_offset_8x8[64] = { - 0, 1, 6, 6, 21, 21, 21, 21, 1, 6, 6, 21, 21, 21, 21, 21, - 6, 6, 21, 21, 21, 21, 21, 21, 6, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, -}; - -const int8_t av1_nz_map_ctx_offset_16x16[256] = { - 0, 1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 1, 6, 6, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 6, 6, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 6, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, -}; - -const int8_t av1_nz_map_ctx_offset_32x32[1024] = { - 0, 1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 1, 6, 6, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, -}; - -const int8_t av1_nz_map_ctx_offset_8x4[32] = { - 0, 16, 6, 6, 21, 21, 21, 21, 16, 16, 6, 21, 21, 21, 21, 21, - 16, 16, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, -}; - -const int8_t av1_nz_map_ctx_offset_8x16[128] = { - 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 6, 6, 21, - 21, 21, 21, 21, 21, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, -}; - -const int8_t av1_nz_map_ctx_offset_16x8[128] = { - 0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 6, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, -}; - -const int8_t av1_nz_map_ctx_offset_16x32[512] = { - 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 6, 6, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 6, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, -}; - -const int8_t av1_nz_map_ctx_offset_32x16[512] = { - 0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 6, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, -}; - -const int8_t av1_nz_map_ctx_offset_32x64[1024] = { - 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, -}; - -const int8_t av1_nz_map_ctx_offset_64x32[1024] = { - 0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 6, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, - 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, -}; - -const int8_t av1_nz_map_ctx_offset_4x16[64] = { - 0, 11, 11, 11, 11, 11, 11, 11, 6, 6, 21, 21, 6, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, -}; - -const int8_t av1_nz_map_ctx_offset_16x4[64] = { - 0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 16, 16, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, -}; - -const int8_t av1_nz_map_ctx_offset_8x32[256] = { - 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 6, 6, 21, - 21, 21, 21, 21, 21, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, -}; - -const int8_t av1_nz_map_ctx_offset_32x8[256] = { - 0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 6, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, -}; - -const int8_t *av1_nz_map_ctx_offset[19] = { - av1_nz_map_ctx_offset_4x4, // TX_4x4 - av1_nz_map_ctx_offset_8x8, // TX_8x8 - av1_nz_map_ctx_offset_16x16, // TX_16x16 - av1_nz_map_ctx_offset_32x32, // TX_32x32 - av1_nz_map_ctx_offset_32x32, // TX_32x32 - av1_nz_map_ctx_offset_4x16, // TX_4x8 - av1_nz_map_ctx_offset_8x4, // TX_8x4 - av1_nz_map_ctx_offset_8x32, // TX_8x16 - av1_nz_map_ctx_offset_16x8, // TX_16x8 - av1_nz_map_ctx_offset_16x32, // TX_16x32 - av1_nz_map_ctx_offset_32x16, // TX_32x16 - av1_nz_map_ctx_offset_32x64, // TX_32x64 - av1_nz_map_ctx_offset_64x32, // TX_64x32 - av1_nz_map_ctx_offset_4x16, // TX_4x16 - av1_nz_map_ctx_offset_16x4, // TX_16x4 - av1_nz_map_ctx_offset_8x32, // TX_8x32 - av1_nz_map_ctx_offset_32x8, // TX_32x8 - av1_nz_map_ctx_offset_16x32, // TX_16x64 - av1_nz_map_ctx_offset_64x32, // TX_64x16 -}; - -void av1_init_lv_map(AV1_COMMON *cm) { - LV_MAP_CTX_TABLE *coeff_ctx_table = &cm->coeff_ctx_table; - for (int row = 0; row < 2; ++row) { - for (int col = 0; col < 2; ++col) { - for (int sig_mag = 0; sig_mag < 3; ++sig_mag) { - for (int count = 0; count < BASE_CONTEXT_POSITION_NUM + 1; ++count) { - if (row == 0 && col == 0 && count > 5) continue; - if ((row == 0 || col == 0) && count > 8) continue; - - coeff_ctx_table->base_ctx_table[row][col][sig_mag][count] = - get_base_ctx_from_count_mag(row, col, count, sig_mag); - } - } - } - } -} - -const int16_t k_eob_group_start[12] = { 0, 1, 2, 3, 5, 9, - 17, 33, 65, 129, 257, 513 }; -const int16_t k_eob_offset_bits[12] = { 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; diff --git a/third_party/aom/av1/common/txb_common.h b/third_party/aom/av1/common/txb_common.h deleted file mode 100644 index 1dda51f8b..000000000 --- a/third_party/aom/av1/common/txb_common.h +++ /dev/null @@ -1,424 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_TXB_COMMON_H_ -#define AOM_AV1_COMMON_TXB_COMMON_H_ - -extern const int16_t k_eob_group_start[12]; -extern const int16_t k_eob_offset_bits[12]; - -extern const int8_t av1_coeff_band_4x4[16]; - -extern const int8_t av1_coeff_band_8x8[64]; - -extern const int8_t av1_coeff_band_16x16[256]; - -extern const int8_t av1_coeff_band_32x32[1024]; - -extern const int8_t *av1_nz_map_ctx_offset[TX_SIZES_ALL]; - -typedef struct txb_ctx { - int txb_skip_ctx; - int dc_sign_ctx; -} TXB_CTX; - -static const int base_level_count_to_index[13] = { - 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, -}; - -static const TX_CLASS tx_type_to_class[TX_TYPES] = { - TX_CLASS_2D, // DCT_DCT - TX_CLASS_2D, // ADST_DCT - TX_CLASS_2D, // DCT_ADST - TX_CLASS_2D, // ADST_ADST - TX_CLASS_2D, // FLIPADST_DCT - TX_CLASS_2D, // DCT_FLIPADST - TX_CLASS_2D, // FLIPADST_FLIPADST - TX_CLASS_2D, // ADST_FLIPADST - TX_CLASS_2D, // FLIPADST_ADST - TX_CLASS_2D, // IDTX - TX_CLASS_VERT, // V_DCT - TX_CLASS_HORIZ, // H_DCT - TX_CLASS_VERT, // V_ADST - TX_CLASS_HORIZ, // H_ADST - TX_CLASS_VERT, // V_FLIPADST - TX_CLASS_HORIZ, // H_FLIPADST -}; - -static INLINE int get_txb_bwl(TX_SIZE tx_size) { - tx_size = av1_get_adjusted_tx_size(tx_size); - return tx_size_wide_log2[tx_size]; -} - -static INLINE int get_txb_wide(TX_SIZE tx_size) { - tx_size = av1_get_adjusted_tx_size(tx_size); - return tx_size_wide[tx_size]; -} - -static INLINE int get_txb_high(TX_SIZE tx_size) { - tx_size = av1_get_adjusted_tx_size(tx_size); - return tx_size_high[tx_size]; -} - -static INLINE uint8_t *set_levels(uint8_t *const levels_buf, const int width) { - return levels_buf + TX_PAD_TOP * (width + TX_PAD_HOR); -} - -static INLINE int get_padded_idx(const int idx, const int bwl) { - return idx + ((idx >> bwl) << TX_PAD_HOR_LOG2); -} - -static INLINE int get_base_ctx_from_count_mag(int row, int col, int count, - int sig_mag) { - const int ctx = base_level_count_to_index[count]; - int ctx_idx = -1; - - if (row == 0 && col == 0) { - if (sig_mag >= 2) return ctx_idx = 0; - if (sig_mag == 1) { - if (count >= 2) - ctx_idx = 1; - else - ctx_idx = 2; - - return ctx_idx; - } - - ctx_idx = 3 + ctx; - assert(ctx_idx <= 6); - return ctx_idx; - } else if (row == 0) { - if (sig_mag >= 2) return ctx_idx = 6; - if (sig_mag == 1) { - if (count >= 2) - ctx_idx = 7; - else - ctx_idx = 8; - return ctx_idx; - } - - ctx_idx = 9 + ctx; - assert(ctx_idx <= 11); - return ctx_idx; - } else if (col == 0) { - if (sig_mag >= 2) return ctx_idx = 12; - if (sig_mag == 1) { - if (count >= 2) - ctx_idx = 13; - else - ctx_idx = 14; - - return ctx_idx; - } - - ctx_idx = 15 + ctx; - assert(ctx_idx <= 17); - // TODO(angiebird): turn this on once the optimization is finalized - // assert(ctx_idx < 28); - } else { - if (sig_mag >= 2) return ctx_idx = 18; - if (sig_mag == 1) { - if (count >= 2) - ctx_idx = 19; - else - ctx_idx = 20; - return ctx_idx; - } - - ctx_idx = 21 + ctx; - - assert(ctx_idx <= 24); - } - return ctx_idx; -} - -static INLINE int get_br_ctx_2d(const uint8_t *const levels, - const int c, // raster order - const int bwl) { - assert(c > 0); - const int row = c >> bwl; - const int col = c - (row << bwl); - const int stride = (1 << bwl) + TX_PAD_HOR; - const int pos = row * stride + col; - int mag = AOMMIN(levels[pos + 1], MAX_BASE_BR_RANGE) + - AOMMIN(levels[pos + stride], MAX_BASE_BR_RANGE) + - AOMMIN(levels[pos + 1 + stride], MAX_BASE_BR_RANGE); - mag = AOMMIN((mag + 1) >> 1, 6); - //((row | col) < 2) is equivalent to ((row < 2) && (col < 2)) - if ((row | col) < 2) return mag + 7; - return mag + 14; -} - -static AOM_FORCE_INLINE int get_br_ctx(const uint8_t *const levels, - const int c, // raster order - const int bwl, const TX_CLASS tx_class) { - const int row = c >> bwl; - const int col = c - (row << bwl); - const int stride = (1 << bwl) + TX_PAD_HOR; - const int pos = row * stride + col; - int mag = levels[pos + 1]; - mag += levels[pos + stride]; - switch (tx_class) { - case TX_CLASS_2D: - mag += levels[pos + stride + 1]; - mag = AOMMIN((mag + 1) >> 1, 6); - if (c == 0) return mag; - if ((row < 2) && (col < 2)) return mag + 7; - break; - case TX_CLASS_HORIZ: - mag += levels[pos + 2]; - mag = AOMMIN((mag + 1) >> 1, 6); - if (c == 0) return mag; - if (col == 0) return mag + 7; - break; - case TX_CLASS_VERT: - mag += levels[pos + (stride << 1)]; - mag = AOMMIN((mag + 1) >> 1, 6); - if (c == 0) return mag; - if (row == 0) return mag + 7; - break; - default: break; - } - - return mag + 14; -} - -static const uint8_t clip_max3[256] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 -}; - -static AOM_FORCE_INLINE int get_nz_mag(const uint8_t *const levels, - const int bwl, const TX_CLASS tx_class) { - int mag; - - // Note: AOMMIN(level, 3) is useless for decoder since level < 3. - mag = clip_max3[levels[1]]; // { 0, 1 } - mag += clip_max3[levels[(1 << bwl) + TX_PAD_HOR]]; // { 1, 0 } - - if (tx_class == TX_CLASS_2D) { - mag += clip_max3[levels[(1 << bwl) + TX_PAD_HOR + 1]]; // { 1, 1 } - mag += clip_max3[levels[2]]; // { 0, 2 } - mag += clip_max3[levels[(2 << bwl) + (2 << TX_PAD_HOR_LOG2)]]; // { 2, 0 } - } else if (tx_class == TX_CLASS_VERT) { - mag += clip_max3[levels[(2 << bwl) + (2 << TX_PAD_HOR_LOG2)]]; // { 2, 0 } - mag += clip_max3[levels[(3 << bwl) + (3 << TX_PAD_HOR_LOG2)]]; // { 3, 0 } - mag += clip_max3[levels[(4 << bwl) + (4 << TX_PAD_HOR_LOG2)]]; // { 4, 0 } - } else { - mag += clip_max3[levels[2]]; // { 0, 2 } - mag += clip_max3[levels[3]]; // { 0, 3 } - mag += clip_max3[levels[4]]; // { 0, 4 } - } - - return mag; -} - -#define NZ_MAP_CTX_0 SIG_COEF_CONTEXTS_2D -#define NZ_MAP_CTX_5 (NZ_MAP_CTX_0 + 5) -#define NZ_MAP_CTX_10 (NZ_MAP_CTX_0 + 10) - -static const int nz_map_ctx_offset_1d[32] = { - NZ_MAP_CTX_0, NZ_MAP_CTX_5, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, - NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, - NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, - NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, - NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, - NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, - NZ_MAP_CTX_10, NZ_MAP_CTX_10, -}; - -static AOM_FORCE_INLINE int get_nz_map_ctx_from_stats( - const int stats, - const int coeff_idx, // raster order - const int bwl, const TX_SIZE tx_size, const TX_CLASS tx_class) { - // tx_class == 0(TX_CLASS_2D) - if ((tx_class | coeff_idx) == 0) return 0; - int ctx = (stats + 1) >> 1; - ctx = AOMMIN(ctx, 4); - switch (tx_class) { - case TX_CLASS_2D: { - // This is the algorithm to generate av1_nz_map_ctx_offset[][] - // const int width = tx_size_wide[tx_size]; - // const int height = tx_size_high[tx_size]; - // if (width < height) { - // if (row < 2) return 11 + ctx; - // } else if (width > height) { - // if (col < 2) return 16 + ctx; - // } - // if (row + col < 2) return ctx + 1; - // if (row + col < 4) return 5 + ctx + 1; - // return 21 + ctx; - return ctx + av1_nz_map_ctx_offset[tx_size][coeff_idx]; - } - case TX_CLASS_HORIZ: { - const int row = coeff_idx >> bwl; - const int col = coeff_idx - (row << bwl); - return ctx + nz_map_ctx_offset_1d[col]; - break; - } - case TX_CLASS_VERT: { - const int row = coeff_idx >> bwl; - return ctx + nz_map_ctx_offset_1d[row]; - break; - } - default: break; - } - return 0; -} - -typedef aom_cdf_prob (*base_cdf_arr)[CDF_SIZE(4)]; -typedef aom_cdf_prob (*br_cdf_arr)[CDF_SIZE(BR_CDF_SIZE)]; - -static INLINE int get_lower_levels_ctx_eob(int bwl, int height, int scan_idx) { - if (scan_idx == 0) return 0; - if (scan_idx <= (height << bwl) / 8) return 1; - if (scan_idx <= (height << bwl) / 4) return 2; - return 3; -} - -static INLINE int get_lower_levels_ctx_2d(const uint8_t *levels, int coeff_idx, - int bwl, TX_SIZE tx_size) { - assert(coeff_idx > 0); - int mag; - // Note: AOMMIN(level, 3) is useless for decoder since level < 3. - levels = levels + get_padded_idx(coeff_idx, bwl); - mag = AOMMIN(levels[1], 3); // { 0, 1 } - mag += AOMMIN(levels[(1 << bwl) + TX_PAD_HOR], 3); // { 1, 0 } - mag += AOMMIN(levels[(1 << bwl) + TX_PAD_HOR + 1], 3); // { 1, 1 } - mag += AOMMIN(levels[2], 3); // { 0, 2 } - mag += AOMMIN(levels[(2 << bwl) + (2 << TX_PAD_HOR_LOG2)], 3); // { 2, 0 } - - const int ctx = AOMMIN((mag + 1) >> 1, 4); - return ctx + av1_nz_map_ctx_offset[tx_size][coeff_idx]; -} -static AOM_FORCE_INLINE int get_lower_levels_ctx(const uint8_t *levels, - int coeff_idx, int bwl, - TX_SIZE tx_size, - TX_CLASS tx_class) { - const int stats = - get_nz_mag(levels + get_padded_idx(coeff_idx, bwl), bwl, tx_class); - return get_nz_map_ctx_from_stats(stats, coeff_idx, bwl, tx_size, tx_class); -} - -static INLINE int get_lower_levels_ctx_general(int is_last, int scan_idx, - int bwl, int height, - const uint8_t *levels, - int coeff_idx, TX_SIZE tx_size, - TX_CLASS tx_class) { - if (is_last) { - if (scan_idx == 0) return 0; - if (scan_idx <= (height << bwl) >> 3) return 1; - if (scan_idx <= (height << bwl) >> 2) return 2; - return 3; - } - return get_lower_levels_ctx(levels, coeff_idx, bwl, tx_size, tx_class); -} - -static INLINE void set_dc_sign(int *cul_level, int dc_val) { - if (dc_val < 0) - *cul_level |= 1 << COEFF_CONTEXT_BITS; - else if (dc_val > 0) - *cul_level += 2 << COEFF_CONTEXT_BITS; -} - -static INLINE void get_txb_ctx(const BLOCK_SIZE plane_bsize, - const TX_SIZE tx_size, const int plane, - const ENTROPY_CONTEXT *const a, - const ENTROPY_CONTEXT *const l, - TXB_CTX *const txb_ctx) { -#define MAX_TX_SIZE_UNIT 16 - static const int8_t signs[3] = { 0, -1, 1 }; - static const int8_t dc_sign_contexts[4 * MAX_TX_SIZE_UNIT + 1] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 - }; - const int txb_w_unit = tx_size_wide_unit[tx_size]; - const int txb_h_unit = tx_size_high_unit[tx_size]; - int dc_sign = 0; - int k = 0; - - do { - const unsigned int sign = ((uint8_t)a[k]) >> COEFF_CONTEXT_BITS; - assert(sign <= 2); - dc_sign += signs[sign]; - } while (++k < txb_w_unit); - - k = 0; - do { - const unsigned int sign = ((uint8_t)l[k]) >> COEFF_CONTEXT_BITS; - assert(sign <= 2); - dc_sign += signs[sign]; - } while (++k < txb_h_unit); - - txb_ctx->dc_sign_ctx = dc_sign_contexts[dc_sign + 2 * MAX_TX_SIZE_UNIT]; - - if (plane == 0) { - if (plane_bsize == txsize_to_bsize[tx_size]) { - txb_ctx->txb_skip_ctx = 0; - } else { - // This is the algorithm to generate table skip_contexts[min][max]. - // if (!max) - // txb_skip_ctx = 1; - // else if (!min) - // txb_skip_ctx = 2 + (max > 3); - // else if (max <= 3) - // txb_skip_ctx = 4; - // else if (min <= 3) - // txb_skip_ctx = 5; - // else - // txb_skip_ctx = 6; - static const uint8_t skip_contexts[5][5] = { { 1, 2, 2, 2, 3 }, - { 1, 4, 4, 4, 5 }, - { 1, 4, 4, 4, 5 }, - { 1, 4, 4, 4, 5 }, - { 1, 4, 4, 4, 6 } }; - int top = 0; - int left = 0; - - k = 0; - do { - top |= a[k]; - } while (++k < txb_w_unit); - top &= COEFF_CONTEXT_MASK; - - k = 0; - do { - left |= l[k]; - } while (++k < txb_h_unit); - left &= COEFF_CONTEXT_MASK; - const int max = AOMMIN(top | left, 4); - const int min = AOMMIN(AOMMIN(top, left), 4); - - txb_ctx->txb_skip_ctx = skip_contexts[min][max]; - } - } else { - const int ctx_base = get_entropy_context(tx_size, a, l); - const int ctx_offset = (num_pels_log2_lookup[plane_bsize] > - num_pels_log2_lookup[txsize_to_bsize[tx_size]]) - ? 10 - : 7; - txb_ctx->txb_skip_ctx = ctx_base + ctx_offset; - } -#undef MAX_TX_SIZE_UNIT -} - -void av1_init_lv_map(AV1_COMMON *cm); - -#endif // AOM_AV1_COMMON_TXB_COMMON_H_ diff --git a/third_party/aom/av1/common/warped_motion.c b/third_party/aom/av1/common/warped_motion.c deleted file mode 100644 index 4144c4389..000000000 --- a/third_party/aom/av1/common/warped_motion.c +++ /dev/null @@ -1,1148 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <memory.h> -#include <math.h> -#include <assert.h> - -#include "config/av1_rtcd.h" - -#include "av1/common/warped_motion.h" -#include "av1/common/scale.h" - -#define WARP_ERROR_BLOCK 32 - -/* clang-format off */ -static const int error_measure_lut[512] = { - // pow 0.7 - 16384, 16339, 16294, 16249, 16204, 16158, 16113, 16068, - 16022, 15977, 15932, 15886, 15840, 15795, 15749, 15703, - 15657, 15612, 15566, 15520, 15474, 15427, 15381, 15335, - 15289, 15242, 15196, 15149, 15103, 15056, 15010, 14963, - 14916, 14869, 14822, 14775, 14728, 14681, 14634, 14587, - 14539, 14492, 14445, 14397, 14350, 14302, 14254, 14206, - 14159, 14111, 14063, 14015, 13967, 13918, 13870, 13822, - 13773, 13725, 13676, 13628, 13579, 13530, 13481, 13432, - 13383, 13334, 13285, 13236, 13187, 13137, 13088, 13038, - 12988, 12939, 12889, 12839, 12789, 12739, 12689, 12639, - 12588, 12538, 12487, 12437, 12386, 12335, 12285, 12234, - 12183, 12132, 12080, 12029, 11978, 11926, 11875, 11823, - 11771, 11719, 11667, 11615, 11563, 11511, 11458, 11406, - 11353, 11301, 11248, 11195, 11142, 11089, 11036, 10982, - 10929, 10875, 10822, 10768, 10714, 10660, 10606, 10552, - 10497, 10443, 10388, 10333, 10279, 10224, 10168, 10113, - 10058, 10002, 9947, 9891, 9835, 9779, 9723, 9666, - 9610, 9553, 9497, 9440, 9383, 9326, 9268, 9211, - 9153, 9095, 9037, 8979, 8921, 8862, 8804, 8745, - 8686, 8627, 8568, 8508, 8449, 8389, 8329, 8269, - 8208, 8148, 8087, 8026, 7965, 7903, 7842, 7780, - 7718, 7656, 7593, 7531, 7468, 7405, 7341, 7278, - 7214, 7150, 7086, 7021, 6956, 6891, 6826, 6760, - 6695, 6628, 6562, 6495, 6428, 6361, 6293, 6225, - 6157, 6089, 6020, 5950, 5881, 5811, 5741, 5670, - 5599, 5527, 5456, 5383, 5311, 5237, 5164, 5090, - 5015, 4941, 4865, 4789, 4713, 4636, 4558, 4480, - 4401, 4322, 4242, 4162, 4080, 3998, 3916, 3832, - 3748, 3663, 3577, 3490, 3402, 3314, 3224, 3133, - 3041, 2948, 2854, 2758, 2661, 2562, 2461, 2359, - 2255, 2148, 2040, 1929, 1815, 1698, 1577, 1452, - 1323, 1187, 1045, 894, 731, 550, 339, 0, - 339, 550, 731, 894, 1045, 1187, 1323, 1452, - 1577, 1698, 1815, 1929, 2040, 2148, 2255, 2359, - 2461, 2562, 2661, 2758, 2854, 2948, 3041, 3133, - 3224, 3314, 3402, 3490, 3577, 3663, 3748, 3832, - 3916, 3998, 4080, 4162, 4242, 4322, 4401, 4480, - 4558, 4636, 4713, 4789, 4865, 4941, 5015, 5090, - 5164, 5237, 5311, 5383, 5456, 5527, 5599, 5670, - 5741, 5811, 5881, 5950, 6020, 6089, 6157, 6225, - 6293, 6361, 6428, 6495, 6562, 6628, 6695, 6760, - 6826, 6891, 6956, 7021, 7086, 7150, 7214, 7278, - 7341, 7405, 7468, 7531, 7593, 7656, 7718, 7780, - 7842, 7903, 7965, 8026, 8087, 8148, 8208, 8269, - 8329, 8389, 8449, 8508, 8568, 8627, 8686, 8745, - 8804, 8862, 8921, 8979, 9037, 9095, 9153, 9211, - 9268, 9326, 9383, 9440, 9497, 9553, 9610, 9666, - 9723, 9779, 9835, 9891, 9947, 10002, 10058, 10113, - 10168, 10224, 10279, 10333, 10388, 10443, 10497, 10552, - 10606, 10660, 10714, 10768, 10822, 10875, 10929, 10982, - 11036, 11089, 11142, 11195, 11248, 11301, 11353, 11406, - 11458, 11511, 11563, 11615, 11667, 11719, 11771, 11823, - 11875, 11926, 11978, 12029, 12080, 12132, 12183, 12234, - 12285, 12335, 12386, 12437, 12487, 12538, 12588, 12639, - 12689, 12739, 12789, 12839, 12889, 12939, 12988, 13038, - 13088, 13137, 13187, 13236, 13285, 13334, 13383, 13432, - 13481, 13530, 13579, 13628, 13676, 13725, 13773, 13822, - 13870, 13918, 13967, 14015, 14063, 14111, 14159, 14206, - 14254, 14302, 14350, 14397, 14445, 14492, 14539, 14587, - 14634, 14681, 14728, 14775, 14822, 14869, 14916, 14963, - 15010, 15056, 15103, 15149, 15196, 15242, 15289, 15335, - 15381, 15427, 15474, 15520, 15566, 15612, 15657, 15703, - 15749, 15795, 15840, 15886, 15932, 15977, 16022, 16068, - 16113, 16158, 16204, 16249, 16294, 16339, 16384, 16384, -}; -/* clang-format on */ - -// For warping, we really use a 6-tap filter, but we do blocks of 8 pixels -// at a time. The zoom/rotation/shear in the model are applied to the -// "fractional" position of each pixel, which therefore varies within -// [-1, 2) * WARPEDPIXEL_PREC_SHIFTS. -// We need an extra 2 taps to fit this in, for a total of 8 taps. -/* clang-format off */ -const int16_t warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8] = { -#if WARPEDPIXEL_PREC_BITS == 6 - // [-1, 0) - { 0, 0, 127, 1, 0, 0, 0, 0 }, { 0, - 1, 127, 2, 0, 0, 0, 0 }, - { 1, - 3, 127, 4, - 1, 0, 0, 0 }, { 1, - 4, 126, 6, - 2, 1, 0, 0 }, - { 1, - 5, 126, 8, - 3, 1, 0, 0 }, { 1, - 6, 125, 11, - 4, 1, 0, 0 }, - { 1, - 7, 124, 13, - 4, 1, 0, 0 }, { 2, - 8, 123, 15, - 5, 1, 0, 0 }, - { 2, - 9, 122, 18, - 6, 1, 0, 0 }, { 2, -10, 121, 20, - 6, 1, 0, 0 }, - { 2, -11, 120, 22, - 7, 2, 0, 0 }, { 2, -12, 119, 25, - 8, 2, 0, 0 }, - { 3, -13, 117, 27, - 8, 2, 0, 0 }, { 3, -13, 116, 29, - 9, 2, 0, 0 }, - { 3, -14, 114, 32, -10, 3, 0, 0 }, { 3, -15, 113, 35, -10, 2, 0, 0 }, - { 3, -15, 111, 37, -11, 3, 0, 0 }, { 3, -16, 109, 40, -11, 3, 0, 0 }, - { 3, -16, 108, 42, -12, 3, 0, 0 }, { 4, -17, 106, 45, -13, 3, 0, 0 }, - { 4, -17, 104, 47, -13, 3, 0, 0 }, { 4, -17, 102, 50, -14, 3, 0, 0 }, - { 4, -17, 100, 52, -14, 3, 0, 0 }, { 4, -18, 98, 55, -15, 4, 0, 0 }, - { 4, -18, 96, 58, -15, 3, 0, 0 }, { 4, -18, 94, 60, -16, 4, 0, 0 }, - { 4, -18, 91, 63, -16, 4, 0, 0 }, { 4, -18, 89, 65, -16, 4, 0, 0 }, - { 4, -18, 87, 68, -17, 4, 0, 0 }, { 4, -18, 85, 70, -17, 4, 0, 0 }, - { 4, -18, 82, 73, -17, 4, 0, 0 }, { 4, -18, 80, 75, -17, 4, 0, 0 }, - { 4, -18, 78, 78, -18, 4, 0, 0 }, { 4, -17, 75, 80, -18, 4, 0, 0 }, - { 4, -17, 73, 82, -18, 4, 0, 0 }, { 4, -17, 70, 85, -18, 4, 0, 0 }, - { 4, -17, 68, 87, -18, 4, 0, 0 }, { 4, -16, 65, 89, -18, 4, 0, 0 }, - { 4, -16, 63, 91, -18, 4, 0, 0 }, { 4, -16, 60, 94, -18, 4, 0, 0 }, - { 3, -15, 58, 96, -18, 4, 0, 0 }, { 4, -15, 55, 98, -18, 4, 0, 0 }, - { 3, -14, 52, 100, -17, 4, 0, 0 }, { 3, -14, 50, 102, -17, 4, 0, 0 }, - { 3, -13, 47, 104, -17, 4, 0, 0 }, { 3, -13, 45, 106, -17, 4, 0, 0 }, - { 3, -12, 42, 108, -16, 3, 0, 0 }, { 3, -11, 40, 109, -16, 3, 0, 0 }, - { 3, -11, 37, 111, -15, 3, 0, 0 }, { 2, -10, 35, 113, -15, 3, 0, 0 }, - { 3, -10, 32, 114, -14, 3, 0, 0 }, { 2, - 9, 29, 116, -13, 3, 0, 0 }, - { 2, - 8, 27, 117, -13, 3, 0, 0 }, { 2, - 8, 25, 119, -12, 2, 0, 0 }, - { 2, - 7, 22, 120, -11, 2, 0, 0 }, { 1, - 6, 20, 121, -10, 2, 0, 0 }, - { 1, - 6, 18, 122, - 9, 2, 0, 0 }, { 1, - 5, 15, 123, - 8, 2, 0, 0 }, - { 1, - 4, 13, 124, - 7, 1, 0, 0 }, { 1, - 4, 11, 125, - 6, 1, 0, 0 }, - { 1, - 3, 8, 126, - 5, 1, 0, 0 }, { 1, - 2, 6, 126, - 4, 1, 0, 0 }, - { 0, - 1, 4, 127, - 3, 1, 0, 0 }, { 0, 0, 2, 127, - 1, 0, 0, 0 }, - - // [0, 1) - { 0, 0, 0, 127, 1, 0, 0, 0}, { 0, 0, -1, 127, 2, 0, 0, 0}, - { 0, 1, -3, 127, 4, -2, 1, 0}, { 0, 1, -5, 127, 6, -2, 1, 0}, - { 0, 2, -6, 126, 8, -3, 1, 0}, {-1, 2, -7, 126, 11, -4, 2, -1}, - {-1, 3, -8, 125, 13, -5, 2, -1}, {-1, 3, -10, 124, 16, -6, 3, -1}, - {-1, 4, -11, 123, 18, -7, 3, -1}, {-1, 4, -12, 122, 20, -7, 3, -1}, - {-1, 4, -13, 121, 23, -8, 3, -1}, {-2, 5, -14, 120, 25, -9, 4, -1}, - {-1, 5, -15, 119, 27, -10, 4, -1}, {-1, 5, -16, 118, 30, -11, 4, -1}, - {-2, 6, -17, 116, 33, -12, 5, -1}, {-2, 6, -17, 114, 35, -12, 5, -1}, - {-2, 6, -18, 113, 38, -13, 5, -1}, {-2, 7, -19, 111, 41, -14, 6, -2}, - {-2, 7, -19, 110, 43, -15, 6, -2}, {-2, 7, -20, 108, 46, -15, 6, -2}, - {-2, 7, -20, 106, 49, -16, 6, -2}, {-2, 7, -21, 104, 51, -16, 7, -2}, - {-2, 7, -21, 102, 54, -17, 7, -2}, {-2, 8, -21, 100, 56, -18, 7, -2}, - {-2, 8, -22, 98, 59, -18, 7, -2}, {-2, 8, -22, 96, 62, -19, 7, -2}, - {-2, 8, -22, 94, 64, -19, 7, -2}, {-2, 8, -22, 91, 67, -20, 8, -2}, - {-2, 8, -22, 89, 69, -20, 8, -2}, {-2, 8, -22, 87, 72, -21, 8, -2}, - {-2, 8, -21, 84, 74, -21, 8, -2}, {-2, 8, -22, 82, 77, -21, 8, -2}, - {-2, 8, -21, 79, 79, -21, 8, -2}, {-2, 8, -21, 77, 82, -22, 8, -2}, - {-2, 8, -21, 74, 84, -21, 8, -2}, {-2, 8, -21, 72, 87, -22, 8, -2}, - {-2, 8, -20, 69, 89, -22, 8, -2}, {-2, 8, -20, 67, 91, -22, 8, -2}, - {-2, 7, -19, 64, 94, -22, 8, -2}, {-2, 7, -19, 62, 96, -22, 8, -2}, - {-2, 7, -18, 59, 98, -22, 8, -2}, {-2, 7, -18, 56, 100, -21, 8, -2}, - {-2, 7, -17, 54, 102, -21, 7, -2}, {-2, 7, -16, 51, 104, -21, 7, -2}, - {-2, 6, -16, 49, 106, -20, 7, -2}, {-2, 6, -15, 46, 108, -20, 7, -2}, - {-2, 6, -15, 43, 110, -19, 7, -2}, {-2, 6, -14, 41, 111, -19, 7, -2}, - {-1, 5, -13, 38, 113, -18, 6, -2}, {-1, 5, -12, 35, 114, -17, 6, -2}, - {-1, 5, -12, 33, 116, -17, 6, -2}, {-1, 4, -11, 30, 118, -16, 5, -1}, - {-1, 4, -10, 27, 119, -15, 5, -1}, {-1, 4, -9, 25, 120, -14, 5, -2}, - {-1, 3, -8, 23, 121, -13, 4, -1}, {-1, 3, -7, 20, 122, -12, 4, -1}, - {-1, 3, -7, 18, 123, -11, 4, -1}, {-1, 3, -6, 16, 124, -10, 3, -1}, - {-1, 2, -5, 13, 125, -8, 3, -1}, {-1, 2, -4, 11, 126, -7, 2, -1}, - { 0, 1, -3, 8, 126, -6, 2, 0}, { 0, 1, -2, 6, 127, -5, 1, 0}, - { 0, 1, -2, 4, 127, -3, 1, 0}, { 0, 0, 0, 2, 127, -1, 0, 0}, - - // [1, 2) - { 0, 0, 0, 1, 127, 0, 0, 0 }, { 0, 0, 0, - 1, 127, 2, 0, 0 }, - { 0, 0, 1, - 3, 127, 4, - 1, 0 }, { 0, 0, 1, - 4, 126, 6, - 2, 1 }, - { 0, 0, 1, - 5, 126, 8, - 3, 1 }, { 0, 0, 1, - 6, 125, 11, - 4, 1 }, - { 0, 0, 1, - 7, 124, 13, - 4, 1 }, { 0, 0, 2, - 8, 123, 15, - 5, 1 }, - { 0, 0, 2, - 9, 122, 18, - 6, 1 }, { 0, 0, 2, -10, 121, 20, - 6, 1 }, - { 0, 0, 2, -11, 120, 22, - 7, 2 }, { 0, 0, 2, -12, 119, 25, - 8, 2 }, - { 0, 0, 3, -13, 117, 27, - 8, 2 }, { 0, 0, 3, -13, 116, 29, - 9, 2 }, - { 0, 0, 3, -14, 114, 32, -10, 3 }, { 0, 0, 3, -15, 113, 35, -10, 2 }, - { 0, 0, 3, -15, 111, 37, -11, 3 }, { 0, 0, 3, -16, 109, 40, -11, 3 }, - { 0, 0, 3, -16, 108, 42, -12, 3 }, { 0, 0, 4, -17, 106, 45, -13, 3 }, - { 0, 0, 4, -17, 104, 47, -13, 3 }, { 0, 0, 4, -17, 102, 50, -14, 3 }, - { 0, 0, 4, -17, 100, 52, -14, 3 }, { 0, 0, 4, -18, 98, 55, -15, 4 }, - { 0, 0, 4, -18, 96, 58, -15, 3 }, { 0, 0, 4, -18, 94, 60, -16, 4 }, - { 0, 0, 4, -18, 91, 63, -16, 4 }, { 0, 0, 4, -18, 89, 65, -16, 4 }, - { 0, 0, 4, -18, 87, 68, -17, 4 }, { 0, 0, 4, -18, 85, 70, -17, 4 }, - { 0, 0, 4, -18, 82, 73, -17, 4 }, { 0, 0, 4, -18, 80, 75, -17, 4 }, - { 0, 0, 4, -18, 78, 78, -18, 4 }, { 0, 0, 4, -17, 75, 80, -18, 4 }, - { 0, 0, 4, -17, 73, 82, -18, 4 }, { 0, 0, 4, -17, 70, 85, -18, 4 }, - { 0, 0, 4, -17, 68, 87, -18, 4 }, { 0, 0, 4, -16, 65, 89, -18, 4 }, - { 0, 0, 4, -16, 63, 91, -18, 4 }, { 0, 0, 4, -16, 60, 94, -18, 4 }, - { 0, 0, 3, -15, 58, 96, -18, 4 }, { 0, 0, 4, -15, 55, 98, -18, 4 }, - { 0, 0, 3, -14, 52, 100, -17, 4 }, { 0, 0, 3, -14, 50, 102, -17, 4 }, - { 0, 0, 3, -13, 47, 104, -17, 4 }, { 0, 0, 3, -13, 45, 106, -17, 4 }, - { 0, 0, 3, -12, 42, 108, -16, 3 }, { 0, 0, 3, -11, 40, 109, -16, 3 }, - { 0, 0, 3, -11, 37, 111, -15, 3 }, { 0, 0, 2, -10, 35, 113, -15, 3 }, - { 0, 0, 3, -10, 32, 114, -14, 3 }, { 0, 0, 2, - 9, 29, 116, -13, 3 }, - { 0, 0, 2, - 8, 27, 117, -13, 3 }, { 0, 0, 2, - 8, 25, 119, -12, 2 }, - { 0, 0, 2, - 7, 22, 120, -11, 2 }, { 0, 0, 1, - 6, 20, 121, -10, 2 }, - { 0, 0, 1, - 6, 18, 122, - 9, 2 }, { 0, 0, 1, - 5, 15, 123, - 8, 2 }, - { 0, 0, 1, - 4, 13, 124, - 7, 1 }, { 0, 0, 1, - 4, 11, 125, - 6, 1 }, - { 0, 0, 1, - 3, 8, 126, - 5, 1 }, { 0, 0, 1, - 2, 6, 126, - 4, 1 }, - { 0, 0, 0, - 1, 4, 127, - 3, 1 }, { 0, 0, 0, 0, 2, 127, - 1, 0 }, - // dummy (replicate row index 191) - { 0, 0, 0, 0, 2, 127, - 1, 0 }, - -#elif WARPEDPIXEL_PREC_BITS == 5 - // [-1, 0) - {0, 0, 127, 1, 0, 0, 0, 0}, {1, -3, 127, 4, -1, 0, 0, 0}, - {1, -5, 126, 8, -3, 1, 0, 0}, {1, -7, 124, 13, -4, 1, 0, 0}, - {2, -9, 122, 18, -6, 1, 0, 0}, {2, -11, 120, 22, -7, 2, 0, 0}, - {3, -13, 117, 27, -8, 2, 0, 0}, {3, -14, 114, 32, -10, 3, 0, 0}, - {3, -15, 111, 37, -11, 3, 0, 0}, {3, -16, 108, 42, -12, 3, 0, 0}, - {4, -17, 104, 47, -13, 3, 0, 0}, {4, -17, 100, 52, -14, 3, 0, 0}, - {4, -18, 96, 58, -15, 3, 0, 0}, {4, -18, 91, 63, -16, 4, 0, 0}, - {4, -18, 87, 68, -17, 4, 0, 0}, {4, -18, 82, 73, -17, 4, 0, 0}, - {4, -18, 78, 78, -18, 4, 0, 0}, {4, -17, 73, 82, -18, 4, 0, 0}, - {4, -17, 68, 87, -18, 4, 0, 0}, {4, -16, 63, 91, -18, 4, 0, 0}, - {3, -15, 58, 96, -18, 4, 0, 0}, {3, -14, 52, 100, -17, 4, 0, 0}, - {3, -13, 47, 104, -17, 4, 0, 0}, {3, -12, 42, 108, -16, 3, 0, 0}, - {3, -11, 37, 111, -15, 3, 0, 0}, {3, -10, 32, 114, -14, 3, 0, 0}, - {2, -8, 27, 117, -13, 3, 0, 0}, {2, -7, 22, 120, -11, 2, 0, 0}, - {1, -6, 18, 122, -9, 2, 0, 0}, {1, -4, 13, 124, -7, 1, 0, 0}, - {1, -3, 8, 126, -5, 1, 0, 0}, {0, -1, 4, 127, -3, 1, 0, 0}, - // [0, 1) - { 0, 0, 0, 127, 1, 0, 0, 0}, { 0, 1, -3, 127, 4, -2, 1, 0}, - { 0, 2, -6, 126, 8, -3, 1, 0}, {-1, 3, -8, 125, 13, -5, 2, -1}, - {-1, 4, -11, 123, 18, -7, 3, -1}, {-1, 4, -13, 121, 23, -8, 3, -1}, - {-1, 5, -15, 119, 27, -10, 4, -1}, {-2, 6, -17, 116, 33, -12, 5, -1}, - {-2, 6, -18, 113, 38, -13, 5, -1}, {-2, 7, -19, 110, 43, -15, 6, -2}, - {-2, 7, -20, 106, 49, -16, 6, -2}, {-2, 7, -21, 102, 54, -17, 7, -2}, - {-2, 8, -22, 98, 59, -18, 7, -2}, {-2, 8, -22, 94, 64, -19, 7, -2}, - {-2, 8, -22, 89, 69, -20, 8, -2}, {-2, 8, -21, 84, 74, -21, 8, -2}, - {-2, 8, -21, 79, 79, -21, 8, -2}, {-2, 8, -21, 74, 84, -21, 8, -2}, - {-2, 8, -20, 69, 89, -22, 8, -2}, {-2, 7, -19, 64, 94, -22, 8, -2}, - {-2, 7, -18, 59, 98, -22, 8, -2}, {-2, 7, -17, 54, 102, -21, 7, -2}, - {-2, 6, -16, 49, 106, -20, 7, -2}, {-2, 6, -15, 43, 110, -19, 7, -2}, - {-1, 5, -13, 38, 113, -18, 6, -2}, {-1, 5, -12, 33, 116, -17, 6, -2}, - {-1, 4, -10, 27, 119, -15, 5, -1}, {-1, 3, -8, 23, 121, -13, 4, -1}, - {-1, 3, -7, 18, 123, -11, 4, -1}, {-1, 2, -5, 13, 125, -8, 3, -1}, - { 0, 1, -3, 8, 126, -6, 2, 0}, { 0, 1, -2, 4, 127, -3, 1, 0}, - // [1, 2) - {0, 0, 0, 1, 127, 0, 0, 0}, {0, 0, 1, -3, 127, 4, -1, 0}, - {0, 0, 1, -5, 126, 8, -3, 1}, {0, 0, 1, -7, 124, 13, -4, 1}, - {0, 0, 2, -9, 122, 18, -6, 1}, {0, 0, 2, -11, 120, 22, -7, 2}, - {0, 0, 3, -13, 117, 27, -8, 2}, {0, 0, 3, -14, 114, 32, -10, 3}, - {0, 0, 3, -15, 111, 37, -11, 3}, {0, 0, 3, -16, 108, 42, -12, 3}, - {0, 0, 4, -17, 104, 47, -13, 3}, {0, 0, 4, -17, 100, 52, -14, 3}, - {0, 0, 4, -18, 96, 58, -15, 3}, {0, 0, 4, -18, 91, 63, -16, 4}, - {0, 0, 4, -18, 87, 68, -17, 4}, {0, 0, 4, -18, 82, 73, -17, 4}, - {0, 0, 4, -18, 78, 78, -18, 4}, {0, 0, 4, -17, 73, 82, -18, 4}, - {0, 0, 4, -17, 68, 87, -18, 4}, {0, 0, 4, -16, 63, 91, -18, 4}, - {0, 0, 3, -15, 58, 96, -18, 4}, {0, 0, 3, -14, 52, 100, -17, 4}, - {0, 0, 3, -13, 47, 104, -17, 4}, {0, 0, 3, -12, 42, 108, -16, 3}, - {0, 0, 3, -11, 37, 111, -15, 3}, {0, 0, 3, -10, 32, 114, -14, 3}, - {0, 0, 2, -8, 27, 117, -13, 3}, {0, 0, 2, -7, 22, 120, -11, 2}, - {0, 0, 1, -6, 18, 122, -9, 2}, {0, 0, 1, -4, 13, 124, -7, 1}, - {0, 0, 1, -3, 8, 126, -5, 1}, {0, 0, 0, -1, 4, 127, -3, 1}, - // dummy (replicate row index 95) - {0, 0, 0, -1, 4, 127, -3, 1}, - -#endif // WARPEDPIXEL_PREC_BITS == 6 -}; - -/* clang-format on */ - -#define DIV_LUT_PREC_BITS 14 -#define DIV_LUT_BITS 8 -#define DIV_LUT_NUM (1 << DIV_LUT_BITS) - -static const uint16_t div_lut[DIV_LUT_NUM + 1] = { - 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768, - 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142, - 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564, - 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028, - 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530, - 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066, - 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633, - 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228, - 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848, - 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491, - 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155, - 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838, - 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538, - 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255, - 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986, - 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732, - 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489, - 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259, - 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039, - 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830, - 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630, - 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439, - 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257, - 8240, 8224, 8208, 8192, -}; - -// Decomposes a divisor D such that 1/D = y/2^shift, where y is returned -// at precision of DIV_LUT_PREC_BITS along with the shift. -static int16_t resolve_divisor_64(uint64_t D, int16_t *shift) { - int64_t f; - *shift = (int16_t)((D >> 32) ? get_msb((unsigned int)(D >> 32)) + 32 - : get_msb((unsigned int)D)); - // e is obtained from D after resetting the most significant 1 bit. - const int64_t e = D - ((uint64_t)1 << *shift); - // Get the most significant DIV_LUT_BITS (8) bits of e into f - if (*shift > DIV_LUT_BITS) - f = ROUND_POWER_OF_TWO_64(e, *shift - DIV_LUT_BITS); - else - f = e << (DIV_LUT_BITS - *shift); - assert(f <= DIV_LUT_NUM); - *shift += DIV_LUT_PREC_BITS; - // Use f as lookup into the precomputed table of multipliers - return div_lut[f]; -} - -static int16_t resolve_divisor_32(uint32_t D, int16_t *shift) { - int32_t f; - *shift = get_msb(D); - // e is obtained from D after resetting the most significant 1 bit. - const int32_t e = D - ((uint32_t)1 << *shift); - // Get the most significant DIV_LUT_BITS (8) bits of e into f - if (*shift > DIV_LUT_BITS) - f = ROUND_POWER_OF_TWO(e, *shift - DIV_LUT_BITS); - else - f = e << (DIV_LUT_BITS - *shift); - assert(f <= DIV_LUT_NUM); - *shift += DIV_LUT_PREC_BITS; - // Use f as lookup into the precomputed table of multipliers - return div_lut[f]; -} - -static int is_affine_valid(const WarpedMotionParams *const wm) { - const int32_t *mat = wm->wmmat; - return (mat[2] > 0); -} - -static int is_affine_shear_allowed(int16_t alpha, int16_t beta, int16_t gamma, - int16_t delta) { - if ((4 * abs(alpha) + 7 * abs(beta) >= (1 << WARPEDMODEL_PREC_BITS)) || - (4 * abs(gamma) + 4 * abs(delta) >= (1 << WARPEDMODEL_PREC_BITS))) - return 0; - else - return 1; -} - -// Returns 1 on success or 0 on an invalid affine set -int get_shear_params(WarpedMotionParams *wm) { - const int32_t *mat = wm->wmmat; - if (!is_affine_valid(wm)) return 0; - wm->alpha = - clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX); - wm->beta = clamp(mat[3], INT16_MIN, INT16_MAX); - int16_t shift; - int16_t y = resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1); - int64_t v = ((int64_t)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y; - wm->gamma = - clamp((int)ROUND_POWER_OF_TWO_SIGNED_64(v, shift), INT16_MIN, INT16_MAX); - v = ((int64_t)mat[3] * mat[4]) * y; - wm->delta = clamp(mat[5] - (int)ROUND_POWER_OF_TWO_SIGNED_64(v, shift) - - (1 << WARPEDMODEL_PREC_BITS), - INT16_MIN, INT16_MAX); - - wm->alpha = ROUND_POWER_OF_TWO_SIGNED(wm->alpha, WARP_PARAM_REDUCE_BITS) * - (1 << WARP_PARAM_REDUCE_BITS); - wm->beta = ROUND_POWER_OF_TWO_SIGNED(wm->beta, WARP_PARAM_REDUCE_BITS) * - (1 << WARP_PARAM_REDUCE_BITS); - wm->gamma = ROUND_POWER_OF_TWO_SIGNED(wm->gamma, WARP_PARAM_REDUCE_BITS) * - (1 << WARP_PARAM_REDUCE_BITS); - wm->delta = ROUND_POWER_OF_TWO_SIGNED(wm->delta, WARP_PARAM_REDUCE_BITS) * - (1 << WARP_PARAM_REDUCE_BITS); - - if (!is_affine_shear_allowed(wm->alpha, wm->beta, wm->gamma, wm->delta)) - return 0; - - return 1; -} - -static INLINE int highbd_error_measure(int err, int bd) { - const int b = bd - 8; - const int bmask = (1 << b) - 1; - const int v = (1 << b); - err = abs(err); - const int e1 = err >> b; - const int e2 = err & bmask; - return error_measure_lut[255 + e1] * (v - e2) + - error_measure_lut[256 + e1] * e2; -} - -/* Note: For an explanation of the warp algorithm, and some notes on bit widths - for hardware implementations, see the comments above av1_warp_affine_c -*/ -void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, - int width, int height, int stride, uint16_t *pred, - int p_col, int p_row, int p_width, int p_height, - int p_stride, int subsampling_x, - int subsampling_y, int bd, - ConvolveParams *conv_params, int16_t alpha, - int16_t beta, int16_t gamma, int16_t delta) { - int32_t tmp[15 * 8]; - const int reduce_bits_horiz = - conv_params->round_0 + - AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0); - const int reduce_bits_vert = conv_params->is_compound - ? conv_params->round_1 - : 2 * FILTER_BITS - reduce_bits_horiz; - const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz; - const int offset_bits_horiz = bd + FILTER_BITS - 1; - const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz; - const int round_bits = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - (void)max_bits_horiz; - assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL)); - - for (int i = p_row; i < p_row + p_height; i += 8) { - for (int j = p_col; j < p_col + p_width; j += 8) { - // Calculate the center of this 8x8 block, - // project to luma coordinates (if in a subsampled chroma plane), - // apply the affine transformation, - // then convert back to the original coordinates (if necessary) - const int32_t src_x = (j + 4) << subsampling_x; - const int32_t src_y = (i + 4) << subsampling_y; - const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0]; - const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1]; - const int32_t x4 = dst_x >> subsampling_x; - const int32_t y4 = dst_y >> subsampling_y; - - const int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS; - int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); - const int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS; - int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); - - sx4 += alpha * (-4) + beta * (-4); - sy4 += gamma * (-4) + delta * (-4); - - sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); - sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); - - // Horizontal filter - for (int k = -7; k < 8; ++k) { - const int iy = clamp(iy4 + k, 0, height - 1); - - int sx = sx4 + beta * (k + 4); - for (int l = -4; l < 4; ++l) { - int ix = ix4 + l - 3; - const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) + - WARPEDPIXEL_PREC_SHIFTS; - assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3); - const int16_t *coeffs = warped_filter[offs]; - - int32_t sum = 1 << offset_bits_horiz; - for (int m = 0; m < 8; ++m) { - const int sample_x = clamp(ix + m, 0, width - 1); - sum += ref[iy * stride + sample_x] * coeffs[m]; - } - sum = ROUND_POWER_OF_TWO(sum, reduce_bits_horiz); - assert(0 <= sum && sum < (1 << max_bits_horiz)); - tmp[(k + 7) * 8 + (l + 4)] = sum; - sx += alpha; - } - } - - // Vertical filter - for (int k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) { - int sy = sy4 + delta * (k + 4); - for (int l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) { - const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) + - WARPEDPIXEL_PREC_SHIFTS; - assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3); - const int16_t *coeffs = warped_filter[offs]; - - int32_t sum = 1 << offset_bits_vert; - for (int m = 0; m < 8; ++m) { - sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m]; - } - - if (conv_params->is_compound) { - CONV_BUF_TYPE *p = - &conv_params - ->dst[(i - p_row + k + 4) * conv_params->dst_stride + - (j - p_col + l + 4)]; - sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert); - if (conv_params->do_average) { - uint16_t *dst16 = - &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)]; - int32_t tmp32 = *p; - if (conv_params->use_jnt_comp_avg) { - tmp32 = tmp32 * conv_params->fwd_offset + - sum * conv_params->bck_offset; - tmp32 = tmp32 >> DIST_PRECISION_BITS; - } else { - tmp32 += sum; - tmp32 = tmp32 >> 1; - } - tmp32 = tmp32 - (1 << (offset_bits - conv_params->round_1)) - - (1 << (offset_bits - conv_params->round_1 - 1)); - *dst16 = - clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp32, round_bits), bd); - } else { - *p = sum; - } - } else { - uint16_t *p = - &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)]; - sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert); - assert(0 <= sum && sum < (1 << (bd + 2))); - *p = clip_pixel_highbd(sum - (1 << (bd - 1)) - (1 << bd), bd); - } - sy += gamma; - } - } - } - } -} - -static void highbd_warp_plane(WarpedMotionParams *wm, const uint8_t *const ref8, - int width, int height, int stride, - const uint8_t *const pred8, int p_col, int p_row, - int p_width, int p_height, int p_stride, - int subsampling_x, int subsampling_y, int bd, - ConvolveParams *conv_params) { - assert(wm->wmtype <= AFFINE); - if (wm->wmtype == ROTZOOM) { - wm->wmmat[5] = wm->wmmat[2]; - wm->wmmat[4] = -wm->wmmat[3]; - } - const int32_t *const mat = wm->wmmat; - const int16_t alpha = wm->alpha; - const int16_t beta = wm->beta; - const int16_t gamma = wm->gamma; - const int16_t delta = wm->delta; - - const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8); - uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); - av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row, - p_width, p_height, p_stride, subsampling_x, - subsampling_y, bd, conv_params, alpha, beta, gamma, - delta); -} - -static int64_t highbd_frame_error(const uint16_t *const ref, int stride, - const uint16_t *const dst, int p_width, - int p_height, int p_stride, int bd) { - int64_t sum_error = 0; - for (int i = 0; i < p_height; ++i) { - for (int j = 0; j < p_width; ++j) { - sum_error += - highbd_error_measure(dst[j + i * p_stride] - ref[j + i * stride], bd); - } - } - return sum_error; -} - -static int64_t highbd_warp_error( - WarpedMotionParams *wm, const uint8_t *const ref8, int width, int height, - int stride, const uint8_t *const dst8, int p_col, int p_row, int p_width, - int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, - int64_t best_error) { - int64_t gm_sumerr = 0; - const int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK); - const int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK); - uint16_t tmp[WARP_ERROR_BLOCK * WARP_ERROR_BLOCK]; - - ConvolveParams conv_params = get_conv_params(0, 0, bd); - conv_params.use_jnt_comp_avg = 0; - for (int i = p_row; i < p_row + p_height; i += WARP_ERROR_BLOCK) { - for (int j = p_col; j < p_col + p_width; j += WARP_ERROR_BLOCK) { - // avoid warping extra 8x8 blocks in the padded region of the frame - // when p_width and p_height are not multiples of WARP_ERROR_BLOCK - const int warp_w = AOMMIN(error_bsize_w, p_col + p_width - j); - const int warp_h = AOMMIN(error_bsize_h, p_row + p_height - i); - highbd_warp_plane(wm, ref8, width, height, stride, - CONVERT_TO_BYTEPTR(tmp), j, i, warp_w, warp_h, - WARP_ERROR_BLOCK, subsampling_x, subsampling_y, bd, - &conv_params); - - gm_sumerr += highbd_frame_error( - tmp, WARP_ERROR_BLOCK, CONVERT_TO_SHORTPTR(dst8) + j + i * p_stride, - warp_w, warp_h, p_stride, bd); - if (gm_sumerr > best_error) return gm_sumerr; - } - } - return gm_sumerr; -} - -static INLINE int error_measure(int err) { - return error_measure_lut[255 + err]; -} - -/* The warp filter for ROTZOOM and AFFINE models works as follows: - * Split the input into 8x8 blocks - * For each block, project the point (4, 4) within the block, to get the - overall block position. Split into integer and fractional coordinates, - maintaining full WARPEDMODEL precision - * Filter horizontally: Generate 15 rows of 8 pixels each. Each pixel gets a - variable horizontal offset. This means that, while the rows of the - intermediate buffer align with the rows of the *reference* image, the - columns align with the columns of the *destination* image. - * Filter vertically: Generate the output block (up to 8x8 pixels, but if the - destination is too small we crop the output at this stage). Each pixel has - a variable vertical offset, so that the resulting rows are aligned with - the rows of the destination image. - - To accomplish these alignments, we factor the warp matrix as a - product of two shear / asymmetric zoom matrices: - / a b \ = / 1 0 \ * / 1+alpha beta \ - \ c d / \ gamma 1+delta / \ 0 1 / - where a, b, c, d are wmmat[2], wmmat[3], wmmat[4], wmmat[5] respectively. - The horizontal shear (with alpha and beta) is applied first, - then the vertical shear (with gamma and delta) is applied second. - - The only limitation is that, to fit this in a fixed 8-tap filter size, - the fractional pixel offsets must be at most +-1. Since the horizontal filter - generates 15 rows of 8 columns, and the initial point we project is at (4, 4) - within the block, the parameters must satisfy - 4 * |alpha| + 7 * |beta| <= 1 and 4 * |gamma| + 4 * |delta| <= 1 - for this filter to be applicable. - - Note: This function assumes that the caller has done all of the relevant - checks, ie. that we have a ROTZOOM or AFFINE model, that wm[4] and wm[5] - are set appropriately (if using a ROTZOOM model), and that alpha, beta, - gamma, delta are all in range. - - TODO(david.barker): Maybe support scaled references? -*/ -/* A note on hardware implementation: - The warp filter is intended to be implementable using the same hardware as - the high-precision convolve filters from the loop-restoration and - convolve-round experiments. - - For a single filter stage, considering all of the coefficient sets for the - warp filter and the regular convolution filter, an input in the range - [0, 2^k - 1] is mapped into the range [-56 * (2^k - 1), 184 * (2^k - 1)] - before rounding. - - Allowing for some changes to the filter coefficient sets, call the range - [-64 * 2^k, 192 * 2^k]. Then, if we initialize the accumulator to 64 * 2^k, - we can replace this by the range [0, 256 * 2^k], which can be stored in an - unsigned value with 8 + k bits. - - This allows the derivation of the appropriate bit widths and offsets for - the various intermediate values: If - - F := FILTER_BITS = 7 (or else the above ranges need adjusting) - So a *single* filter stage maps a k-bit input to a (k + F + 1)-bit - intermediate value. - H := ROUND0_BITS - V := VERSHEAR_REDUCE_PREC_BITS - (and note that we must have H + V = 2*F for the output to have the same - scale as the input) - - then we end up with the following offsets and ranges: - Horizontal filter: Apply an offset of 1 << (bd + F - 1), sum fits into a - uint{bd + F + 1} - After rounding: The values stored in 'tmp' fit into a uint{bd + F + 1 - H}. - Vertical filter: Apply an offset of 1 << (bd + 2*F - H), sum fits into a - uint{bd + 2*F + 2 - H} - After rounding: The final value, before undoing the offset, fits into a - uint{bd + 2}. - - Then we need to undo the offsets before clamping to a pixel. Note that, - if we do this at the end, the amount to subtract is actually independent - of H and V: - - offset to subtract = (1 << ((bd + F - 1) - H + F - V)) + - (1 << ((bd + 2*F - H) - V)) - == (1 << (bd - 1)) + (1 << bd) - - This allows us to entirely avoid clamping in both the warp filter and - the convolve-round experiment. As of the time of writing, the Wiener filter - from loop-restoration can encode a central coefficient up to 216, which - leads to a maximum value of about 282 * 2^k after applying the offset. - So in that case we still need to clamp. -*/ -void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, - int height, int stride, uint8_t *pred, int p_col, - int p_row, int p_width, int p_height, int p_stride, - int subsampling_x, int subsampling_y, - ConvolveParams *conv_params, int16_t alpha, int16_t beta, - int16_t gamma, int16_t delta) { - int32_t tmp[15 * 8]; - const int bd = 8; - const int reduce_bits_horiz = conv_params->round_0; - const int reduce_bits_vert = conv_params->is_compound - ? conv_params->round_1 - : 2 * FILTER_BITS - reduce_bits_horiz; - const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz; - const int offset_bits_horiz = bd + FILTER_BITS - 1; - const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz; - const int round_bits = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - (void)max_bits_horiz; - assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL)); - assert(IMPLIES(conv_params->do_average, conv_params->is_compound)); - - for (int i = p_row; i < p_row + p_height; i += 8) { - for (int j = p_col; j < p_col + p_width; j += 8) { - // Calculate the center of this 8x8 block, - // project to luma coordinates (if in a subsampled chroma plane), - // apply the affine transformation, - // then convert back to the original coordinates (if necessary) - const int32_t src_x = (j + 4) << subsampling_x; - const int32_t src_y = (i + 4) << subsampling_y; - const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0]; - const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1]; - const int32_t x4 = dst_x >> subsampling_x; - const int32_t y4 = dst_y >> subsampling_y; - - int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS; - int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); - int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS; - int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); - - sx4 += alpha * (-4) + beta * (-4); - sy4 += gamma * (-4) + delta * (-4); - - sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); - sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); - - // Horizontal filter - for (int k = -7; k < 8; ++k) { - // Clamp to top/bottom edge of the frame - const int iy = clamp(iy4 + k, 0, height - 1); - - int sx = sx4 + beta * (k + 4); - - for (int l = -4; l < 4; ++l) { - int ix = ix4 + l - 3; - // At this point, sx = sx4 + alpha * l + beta * k - const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) + - WARPEDPIXEL_PREC_SHIFTS; - assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3); - const int16_t *coeffs = warped_filter[offs]; - - int32_t sum = 1 << offset_bits_horiz; - for (int m = 0; m < 8; ++m) { - // Clamp to left/right edge of the frame - const int sample_x = clamp(ix + m, 0, width - 1); - - sum += ref[iy * stride + sample_x] * coeffs[m]; - } - sum = ROUND_POWER_OF_TWO(sum, reduce_bits_horiz); - assert(0 <= sum && sum < (1 << max_bits_horiz)); - tmp[(k + 7) * 8 + (l + 4)] = sum; - sx += alpha; - } - } - - // Vertical filter - for (int k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) { - int sy = sy4 + delta * (k + 4); - for (int l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) { - // At this point, sy = sy4 + gamma * l + delta * k - const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) + - WARPEDPIXEL_PREC_SHIFTS; - assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3); - const int16_t *coeffs = warped_filter[offs]; - - int32_t sum = 1 << offset_bits_vert; - for (int m = 0; m < 8; ++m) { - sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m]; - } - - if (conv_params->is_compound) { - CONV_BUF_TYPE *p = - &conv_params - ->dst[(i - p_row + k + 4) * conv_params->dst_stride + - (j - p_col + l + 4)]; - sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert); - if (conv_params->do_average) { - uint8_t *dst8 = - &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)]; - int32_t tmp32 = *p; - if (conv_params->use_jnt_comp_avg) { - tmp32 = tmp32 * conv_params->fwd_offset + - sum * conv_params->bck_offset; - tmp32 = tmp32 >> DIST_PRECISION_BITS; - } else { - tmp32 += sum; - tmp32 = tmp32 >> 1; - } - tmp32 = tmp32 - (1 << (offset_bits - conv_params->round_1)) - - (1 << (offset_bits - conv_params->round_1 - 1)); - *dst8 = clip_pixel(ROUND_POWER_OF_TWO(tmp32, round_bits)); - } else { - *p = sum; - } - } else { - uint8_t *p = - &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)]; - sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert); - assert(0 <= sum && sum < (1 << (bd + 2))); - *p = clip_pixel(sum - (1 << (bd - 1)) - (1 << bd)); - } - sy += gamma; - } - } - } - } -} - -static void warp_plane(WarpedMotionParams *wm, const uint8_t *const ref, - int width, int height, int stride, uint8_t *pred, - int p_col, int p_row, int p_width, int p_height, - int p_stride, int subsampling_x, int subsampling_y, - ConvolveParams *conv_params) { - assert(wm->wmtype <= AFFINE); - if (wm->wmtype == ROTZOOM) { - wm->wmmat[5] = wm->wmmat[2]; - wm->wmmat[4] = -wm->wmmat[3]; - } - const int32_t *const mat = wm->wmmat; - const int16_t alpha = wm->alpha; - const int16_t beta = wm->beta; - const int16_t gamma = wm->gamma; - const int16_t delta = wm->delta; - av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row, p_width, - p_height, p_stride, subsampling_x, subsampling_y, conv_params, - alpha, beta, gamma, delta); -} - -static int64_t frame_error(const uint8_t *const ref, int stride, - const uint8_t *const dst, int p_width, int p_height, - int p_stride) { - int64_t sum_error = 0; - for (int i = 0; i < p_height; ++i) { - for (int j = 0; j < p_width; ++j) { - sum_error += - (int64_t)error_measure(dst[j + i * p_stride] - ref[j + i * stride]); - } - } - return sum_error; -} - -static int64_t warp_error(WarpedMotionParams *wm, const uint8_t *const ref, - int width, int height, int stride, - const uint8_t *const dst, int p_col, int p_row, - int p_width, int p_height, int p_stride, - int subsampling_x, int subsampling_y, - int64_t best_error) { - int64_t gm_sumerr = 0; - int warp_w, warp_h; - int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK); - int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK); - uint8_t tmp[WARP_ERROR_BLOCK * WARP_ERROR_BLOCK]; - ConvolveParams conv_params = get_conv_params(0, 0, 8); - conv_params.use_jnt_comp_avg = 0; - - for (int i = p_row; i < p_row + p_height; i += WARP_ERROR_BLOCK) { - for (int j = p_col; j < p_col + p_width; j += WARP_ERROR_BLOCK) { - // avoid warping extra 8x8 blocks in the padded region of the frame - // when p_width and p_height are not multiples of WARP_ERROR_BLOCK - warp_w = AOMMIN(error_bsize_w, p_col + p_width - j); - warp_h = AOMMIN(error_bsize_h, p_row + p_height - i); - warp_plane(wm, ref, width, height, stride, tmp, j, i, warp_w, warp_h, - WARP_ERROR_BLOCK, subsampling_x, subsampling_y, &conv_params); - - gm_sumerr += frame_error(tmp, WARP_ERROR_BLOCK, dst + j + i * p_stride, - warp_w, warp_h, p_stride); - if (gm_sumerr > best_error) return gm_sumerr; - } - } - return gm_sumerr; -} - -int64_t av1_frame_error(int use_hbd, int bd, const uint8_t *ref, int stride, - uint8_t *dst, int p_width, int p_height, int p_stride) { - if (use_hbd) { - return highbd_frame_error(CONVERT_TO_SHORTPTR(ref), stride, - CONVERT_TO_SHORTPTR(dst), p_width, p_height, - p_stride, bd); - } - return frame_error(ref, stride, dst, p_width, p_height, p_stride); -} - -int64_t av1_warp_error(WarpedMotionParams *wm, int use_hbd, int bd, - const uint8_t *ref, int width, int height, int stride, - uint8_t *dst, int p_col, int p_row, int p_width, - int p_height, int p_stride, int subsampling_x, - int subsampling_y, int64_t best_error) { - if (wm->wmtype <= AFFINE) - if (!get_shear_params(wm)) return 1; - if (use_hbd) - return highbd_warp_error(wm, ref, width, height, stride, dst, p_col, p_row, - p_width, p_height, p_stride, subsampling_x, - subsampling_y, bd, best_error); - return warp_error(wm, ref, width, height, stride, dst, p_col, p_row, p_width, - p_height, p_stride, subsampling_x, subsampling_y, - best_error); -} - -void av1_warp_plane(WarpedMotionParams *wm, int use_hbd, int bd, - const uint8_t *ref, int width, int height, int stride, - uint8_t *pred, int p_col, int p_row, int p_width, - int p_height, int p_stride, int subsampling_x, - int subsampling_y, ConvolveParams *conv_params) { - if (use_hbd) - highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, - p_width, p_height, p_stride, subsampling_x, subsampling_y, - bd, conv_params); - else - warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width, - p_height, p_stride, subsampling_x, subsampling_y, conv_params); -} - -#define LS_MV_MAX 256 // max mv in 1/8-pel -// Use LS_STEP = 8 so that 2 less bits needed for A, Bx, By. -#define LS_STEP 8 - -// Assuming LS_MV_MAX is < MAX_SB_SIZE * 8, -// the precision needed is: -// (MAX_SB_SIZE_LOG2 + 3) [for sx * sx magnitude] + -// (MAX_SB_SIZE_LOG2 + 4) [for sx * dx magnitude] + -// 1 [for sign] + -// LEAST_SQUARES_SAMPLES_MAX_BITS -// [for adding up to LEAST_SQUARES_SAMPLES_MAX samples] -// The value is 23 -#define LS_MAT_RANGE_BITS \ - ((MAX_SB_SIZE_LOG2 + 4) * 2 + LEAST_SQUARES_SAMPLES_MAX_BITS) - -// Bit-depth reduction from the full-range -#define LS_MAT_DOWN_BITS 2 - -// bits range of A, Bx and By after downshifting -#define LS_MAT_BITS (LS_MAT_RANGE_BITS - LS_MAT_DOWN_BITS) -#define LS_MAT_MIN (-(1 << (LS_MAT_BITS - 1))) -#define LS_MAT_MAX ((1 << (LS_MAT_BITS - 1)) - 1) - -// By setting LS_STEP = 8, the least 2 bits of every elements in A, Bx, By are -// 0. So, we can reduce LS_MAT_RANGE_BITS(2) bits here. -#define LS_SQUARE(a) \ - (((a) * (a)*4 + (a)*4 * LS_STEP + LS_STEP * LS_STEP * 2) >> \ - (2 + LS_MAT_DOWN_BITS)) -#define LS_PRODUCT1(a, b) \ - (((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP) >> \ - (2 + LS_MAT_DOWN_BITS)) -#define LS_PRODUCT2(a, b) \ - (((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP * 2) >> \ - (2 + LS_MAT_DOWN_BITS)) - -#define USE_LIMITED_PREC_MULT 0 - -#if USE_LIMITED_PREC_MULT - -#define MUL_PREC_BITS 16 -static uint16_t resolve_multiplier_64(uint64_t D, int16_t *shift) { - int msb = 0; - uint16_t mult = 0; - *shift = 0; - if (D != 0) { - msb = (int16_t)((D >> 32) ? get_msb((unsigned int)(D >> 32)) + 32 - : get_msb((unsigned int)D)); - if (msb >= MUL_PREC_BITS) { - mult = (uint16_t)ROUND_POWER_OF_TWO_64(D, msb + 1 - MUL_PREC_BITS); - *shift = msb + 1 - MUL_PREC_BITS; - } else { - mult = (uint16_t)D; - *shift = 0; - } - } - return mult; -} - -static int32_t get_mult_shift_ndiag(int64_t Px, int16_t iDet, int shift) { - int32_t ret; - int16_t mshift; - uint16_t Mul = resolve_multiplier_64(llabs(Px), &mshift); - int32_t v = (int32_t)Mul * (int32_t)iDet * (Px < 0 ? -1 : 1); - shift -= mshift; - if (shift > 0) { - return (int32_t)clamp(ROUND_POWER_OF_TWO_SIGNED(v, shift), - -WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1, - WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1); - } else { - return (int32_t)clamp(v * (1 << (-shift)), - -WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1, - WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1); - } - return ret; -} - -static int32_t get_mult_shift_diag(int64_t Px, int16_t iDet, int shift) { - int16_t mshift; - uint16_t Mul = resolve_multiplier_64(llabs(Px), &mshift); - int32_t v = (int32_t)Mul * (int32_t)iDet * (Px < 0 ? -1 : 1); - shift -= mshift; - if (shift > 0) { - return (int32_t)clamp( - ROUND_POWER_OF_TWO_SIGNED(v, shift), - (1 << WARPEDMODEL_PREC_BITS) - WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1, - (1 << WARPEDMODEL_PREC_BITS) + WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1); - } else { - return (int32_t)clamp( - v * (1 << (-shift)), - (1 << WARPEDMODEL_PREC_BITS) - WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1, - (1 << WARPEDMODEL_PREC_BITS) + WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1); - } -} - -#else - -static int32_t get_mult_shift_ndiag(int64_t Px, int16_t iDet, int shift) { - int64_t v = Px * (int64_t)iDet; - return (int32_t)clamp64(ROUND_POWER_OF_TWO_SIGNED_64(v, shift), - -WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1, - WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1); -} - -static int32_t get_mult_shift_diag(int64_t Px, int16_t iDet, int shift) { - int64_t v = Px * (int64_t)iDet; - return (int32_t)clamp64( - ROUND_POWER_OF_TWO_SIGNED_64(v, shift), - (1 << WARPEDMODEL_PREC_BITS) - WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1, - (1 << WARPEDMODEL_PREC_BITS) + WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1); -} -#endif // USE_LIMITED_PREC_MULT - -static int find_affine_int(int np, const int *pts1, const int *pts2, - BLOCK_SIZE bsize, int mvy, int mvx, - WarpedMotionParams *wm, int mi_row, int mi_col) { - int32_t A[2][2] = { { 0, 0 }, { 0, 0 } }; - int32_t Bx[2] = { 0, 0 }; - int32_t By[2] = { 0, 0 }; - int i; - - const int bw = block_size_wide[bsize]; - const int bh = block_size_high[bsize]; - const int rsuy = (AOMMAX(bh, MI_SIZE) / 2 - 1); - const int rsux = (AOMMAX(bw, MI_SIZE) / 2 - 1); - const int suy = rsuy * 8; - const int sux = rsux * 8; - const int duy = suy + mvy; - const int dux = sux + mvx; - const int isuy = (mi_row * MI_SIZE + rsuy); - const int isux = (mi_col * MI_SIZE + rsux); - - // Assume the center pixel of the block has exactly the same motion vector - // as transmitted for the block. First shift the origin of the source - // points to the block center, and the origin of the destination points to - // the block center added to the motion vector transmitted. - // Let (xi, yi) denote the source points and (xi', yi') denote destination - // points after origin shfifting, for i = 0, 1, 2, .... n-1. - // Then if P = [x0, y0, - // x1, y1 - // x2, y1, - // .... - // ] - // q = [x0', x1', x2', ... ]' - // r = [y0', y1', y2', ... ]' - // the least squares problems that need to be solved are: - // [h1, h2]' = inv(P'P)P'q and - // [h3, h4]' = inv(P'P)P'r - // where the affine transformation is given by: - // x' = h1.x + h2.y - // y' = h3.x + h4.y - // - // The loop below computes: A = P'P, Bx = P'q, By = P'r - // We need to just compute inv(A).Bx and inv(A).By for the solutions. - // Contribution from neighbor block - for (i = 0; i < np; i++) { - const int dx = pts2[i * 2] - dux; - const int dy = pts2[i * 2 + 1] - duy; - const int sx = pts1[i * 2] - sux; - const int sy = pts1[i * 2 + 1] - suy; - // (TODO)yunqing: This comparison wouldn't be necessary if the sample - // selection is done in find_samples(). Also, global offset can be removed - // while collecting samples. - if (abs(sx - dx) < LS_MV_MAX && abs(sy - dy) < LS_MV_MAX) { - A[0][0] += LS_SQUARE(sx); - A[0][1] += LS_PRODUCT1(sx, sy); - A[1][1] += LS_SQUARE(sy); - Bx[0] += LS_PRODUCT2(sx, dx); - Bx[1] += LS_PRODUCT1(sy, dx); - By[0] += LS_PRODUCT1(sx, dy); - By[1] += LS_PRODUCT2(sy, dy); - } - } - - // Just for debugging, and can be removed later. - assert(A[0][0] >= LS_MAT_MIN && A[0][0] <= LS_MAT_MAX); - assert(A[0][1] >= LS_MAT_MIN && A[0][1] <= LS_MAT_MAX); - assert(A[1][1] >= LS_MAT_MIN && A[1][1] <= LS_MAT_MAX); - assert(Bx[0] >= LS_MAT_MIN && Bx[0] <= LS_MAT_MAX); - assert(Bx[1] >= LS_MAT_MIN && Bx[1] <= LS_MAT_MAX); - assert(By[0] >= LS_MAT_MIN && By[0] <= LS_MAT_MAX); - assert(By[1] >= LS_MAT_MIN && By[1] <= LS_MAT_MAX); - - int64_t Det; - int16_t iDet, shift; - - // Compute Determinant of A - Det = (int64_t)A[0][0] * A[1][1] - (int64_t)A[0][1] * A[0][1]; - if (Det == 0) return 1; - iDet = resolve_divisor_64(llabs(Det), &shift) * (Det < 0 ? -1 : 1); - shift -= WARPEDMODEL_PREC_BITS; - if (shift < 0) { - iDet <<= (-shift); - shift = 0; - } - - int64_t Px[2], Py[2]; - - // These divided by the Det, are the least squares solutions - Px[0] = (int64_t)A[1][1] * Bx[0] - (int64_t)A[0][1] * Bx[1]; - Px[1] = -(int64_t)A[0][1] * Bx[0] + (int64_t)A[0][0] * Bx[1]; - Py[0] = (int64_t)A[1][1] * By[0] - (int64_t)A[0][1] * By[1]; - Py[1] = -(int64_t)A[0][1] * By[0] + (int64_t)A[0][0] * By[1]; - - wm->wmmat[2] = get_mult_shift_diag(Px[0], iDet, shift); - wm->wmmat[3] = get_mult_shift_ndiag(Px[1], iDet, shift); - wm->wmmat[4] = get_mult_shift_ndiag(Py[0], iDet, shift); - wm->wmmat[5] = get_mult_shift_diag(Py[1], iDet, shift); - - // Note: In the vx, vy expressions below, the max value of each of the - // 2nd and 3rd terms are (2^16 - 1) * (2^13 - 1). That leaves enough room - // for the first term so that the overall sum in the worst case fits - // within 32 bits overall. - int32_t vx = mvx * (1 << (WARPEDMODEL_PREC_BITS - 3)) - - (isux * (wm->wmmat[2] - (1 << WARPEDMODEL_PREC_BITS)) + - isuy * wm->wmmat[3]); - int32_t vy = mvy * (1 << (WARPEDMODEL_PREC_BITS - 3)) - - (isux * wm->wmmat[4] + - isuy * (wm->wmmat[5] - (1 << WARPEDMODEL_PREC_BITS))); - wm->wmmat[0] = - clamp(vx, -WARPEDMODEL_TRANS_CLAMP, WARPEDMODEL_TRANS_CLAMP - 1); - wm->wmmat[1] = - clamp(vy, -WARPEDMODEL_TRANS_CLAMP, WARPEDMODEL_TRANS_CLAMP - 1); - - wm->wmmat[6] = wm->wmmat[7] = 0; - return 0; -} - -int find_projection(int np, int *pts1, int *pts2, BLOCK_SIZE bsize, int mvy, - int mvx, WarpedMotionParams *wm_params, int mi_row, - int mi_col) { - assert(wm_params->wmtype == AFFINE); - - if (find_affine_int(np, pts1, pts2, bsize, mvy, mvx, wm_params, mi_row, - mi_col)) - return 1; - - // check compatibility with the fast warp filter - if (!get_shear_params(wm_params)) return 1; - - return 0; -} diff --git a/third_party/aom/av1/common/warped_motion.h b/third_party/aom/av1/common/warped_motion.h deleted file mode 100644 index a1a4f067d..000000000 --- a/third_party/aom/av1/common/warped_motion.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_WARPED_MOTION_H_ -#define AOM_AV1_COMMON_WARPED_MOTION_H_ - -#include <stdio.h> -#include <stdlib.h> -#include <memory.h> -#include <math.h> -#include <assert.h> - -#include "config/aom_config.h" - -#include "aom_ports/mem.h" -#include "aom_dsp/aom_dsp_common.h" -#include "av1/common/mv.h" -#include "av1/common/convolve.h" - -#define MAX_PARAMDIM 9 -#define LEAST_SQUARES_SAMPLES_MAX_BITS 3 -#define LEAST_SQUARES_SAMPLES_MAX (1 << LEAST_SQUARES_SAMPLES_MAX_BITS) -#define SAMPLES_ARRAY_SIZE (LEAST_SQUARES_SAMPLES_MAX * 2) -#define WARPED_MOTION_DEBUG 0 -#define DEFAULT_WMTYPE AFFINE - -extern const int16_t warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8]; - -static const uint8_t warp_pad_left[14][16] = { - { 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 2, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 3, 3, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 5, 5, 5, 5, 5, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 6, 6, 6, 6, 6, 6, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 7, 7, 7, 7, 7, 7, 7, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 11, 12, 13, 14, 15 }, - { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 12, 13, 14, 15 }, - { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 13, 14, 15 }, - { 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 14, 15 }, - { 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 15 }, - { 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15 }, -}; - -static const uint8_t warp_pad_right[14][16] = { - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 14 }, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 13, 13 }, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 12, 12 }, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11, 11 }, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 10, 10 }, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 9, 9, 9, 9 }, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8 }, - { 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7 }, - { 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 }, - { 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }, - { 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }, - { 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 }, - { 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, - { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } -}; - -// Returns the error between the result of applying motion 'wm' to the frame -// described by 'ref' and the frame described by 'dst'. -int64_t av1_warp_error(WarpedMotionParams *wm, int use_hbd, int bd, - const uint8_t *ref, int width, int height, int stride, - uint8_t *dst, int p_col, int p_row, int p_width, - int p_height, int p_stride, int subsampling_x, - int subsampling_y, int64_t best_error); - -// Returns the error between the frame described by 'ref' and the frame -// described by 'dst'. -int64_t av1_frame_error(int use_hbd, int bd, const uint8_t *ref, int stride, - uint8_t *dst, int p_width, int p_height, int p_stride); - -void av1_warp_plane(WarpedMotionParams *wm, int use_hbd, int bd, - const uint8_t *ref, int width, int height, int stride, - uint8_t *pred, int p_col, int p_row, int p_width, - int p_height, int p_stride, int subsampling_x, - int subsampling_y, ConvolveParams *conv_params); - -int find_projection(int np, int *pts1, int *pts2, BLOCK_SIZE bsize, int mvy, - int mvx, WarpedMotionParams *wm_params, int mi_row, - int mi_col); - -int get_shear_params(WarpedMotionParams *wm); -#endif // AOM_AV1_COMMON_WARPED_MOTION_H_ diff --git a/third_party/aom/av1/common/x86/av1_convolve_horiz_rs_sse4.c b/third_party/aom/av1/common/x86/av1_convolve_horiz_rs_sse4.c deleted file mode 100644 index 8aa14696f..000000000 --- a/third_party/aom/av1/common/x86/av1_convolve_horiz_rs_sse4.c +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <assert.h> -#include <smmintrin.h> - -#include "config/av1_rtcd.h" - -#include "av1/common/convolve.h" -#include "av1/common/resize.h" -#include "aom_dsp/x86/synonyms.h" - -// Note: If the crop width is not a multiple of 4, then, unlike the C version, -// this function will overwrite some of the padding on the right hand side of -// the frame. This padding appears to be trashed anyway, so this should not -// affect the running of the decoder. -void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, int w, int h, - const int16_t *x_filters, int x0_qn, - int x_step_qn) { - assert(UPSCALE_NORMATIVE_TAPS == 8); - - src -= UPSCALE_NORMATIVE_TAPS / 2 - 1; - - const __m128i round_add = _mm_set1_epi32((1 << FILTER_BITS) >> 1); - const __m128i zero = _mm_setzero_si128(); - - const uint8_t *src_y; - uint8_t *dst_y; - int x_qn = x0_qn; - for (int x = 0; x < w; x += 4, x_qn += 4 * x_step_qn) { - const int x_filter_idx0 = - ((x_qn + 0 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS; - const int x_filter_idx1 = - ((x_qn + 1 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS; - const int x_filter_idx2 = - ((x_qn + 2 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS; - const int x_filter_idx3 = - ((x_qn + 3 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS; - - assert(x_filter_idx0 <= RS_SUBPEL_MASK); - assert(x_filter_idx1 <= RS_SUBPEL_MASK); - assert(x_filter_idx2 <= RS_SUBPEL_MASK); - assert(x_filter_idx3 <= RS_SUBPEL_MASK); - - const int16_t *const x_filter0 = - &x_filters[x_filter_idx0 * UPSCALE_NORMATIVE_TAPS]; - const int16_t *const x_filter1 = - &x_filters[x_filter_idx1 * UPSCALE_NORMATIVE_TAPS]; - const int16_t *const x_filter2 = - &x_filters[x_filter_idx2 * UPSCALE_NORMATIVE_TAPS]; - const int16_t *const x_filter3 = - &x_filters[x_filter_idx3 * UPSCALE_NORMATIVE_TAPS]; - - const __m128i fil0_16 = xx_loadu_128(x_filter0); - const __m128i fil1_16 = xx_loadu_128(x_filter1); - const __m128i fil2_16 = xx_loadu_128(x_filter2); - const __m128i fil3_16 = xx_loadu_128(x_filter3); - - src_y = src; - dst_y = dst; - for (int y = 0; y < h; y++, src_y += src_stride, dst_y += dst_stride) { - const uint8_t *const src_x0 = - &src_y[(x_qn + 0 * x_step_qn) >> RS_SCALE_SUBPEL_BITS]; - const uint8_t *const src_x1 = - &src_y[(x_qn + 1 * x_step_qn) >> RS_SCALE_SUBPEL_BITS]; - const uint8_t *const src_x2 = - &src_y[(x_qn + 2 * x_step_qn) >> RS_SCALE_SUBPEL_BITS]; - const uint8_t *const src_x3 = - &src_y[(x_qn + 3 * x_step_qn) >> RS_SCALE_SUBPEL_BITS]; - - // Load up the source data. This is 8-bit input data, so each load - // gets 8 pixels. - const __m128i src0_8 = xx_loadl_64(src_x0); - const __m128i src1_8 = xx_loadl_64(src_x1); - const __m128i src2_8 = xx_loadl_64(src_x2); - const __m128i src3_8 = xx_loadl_64(src_x3); - - // Now zero-extend up to 16-bit precision, i.e. - // [ 00 00 00 00 hg fe dc ba ] -> [ 0h 0g 0f 0e 0d 0c 0b 0a ] - const __m128i src0_16 = _mm_cvtepu8_epi16(src0_8); - const __m128i src1_16 = _mm_cvtepu8_epi16(src1_8); - const __m128i src2_16 = _mm_cvtepu8_epi16(src2_8); - const __m128i src3_16 = _mm_cvtepu8_epi16(src3_8); - - // Multiply by filter coefficients (results in a 32-bit value), - // and add adjacent pairs, i.e. - // ([ s7 s6 s5 s4 s3 s2 s1 s0], [ f7 f6 f5 f4 f3 f2 f1 f0 ]) - // -> [ {s7*f7+s6*f6} {s5*f5+s4*f4} {s3*f3+s2*f2} {s1*f1+s0*f0} ] - const __m128i conv0_32 = _mm_madd_epi16(src0_16, fil0_16); - const __m128i conv1_32 = _mm_madd_epi16(src1_16, fil1_16); - const __m128i conv2_32 = _mm_madd_epi16(src2_16, fil2_16); - const __m128i conv3_32 = _mm_madd_epi16(src3_16, fil3_16); - - // Reduce horizontally and add, i.e. - // ([ D C B A ], [ S R Q P ]) -> [ S+R Q+P D+C B+A ] - const __m128i conv01_32 = _mm_hadd_epi32(conv0_32, conv1_32); - const __m128i conv23_32 = _mm_hadd_epi32(conv2_32, conv3_32); - - const __m128i conv0123_32 = _mm_hadd_epi32(conv01_32, conv23_32); - - // Divide down by (1 << FILTER_BITS), rounding to nearest. - const __m128i shifted_32 = - _mm_srai_epi32(_mm_add_epi32(conv0123_32, round_add), FILTER_BITS); - - // Pack 32-bit values into 16-bit values, i.e. - // ([ D C B A ], [ 0 0 0 0 ]) -> [ 0 0 0 0 D C B A ] - const __m128i shifted_16 = _mm_packus_epi32(shifted_32, zero); - - // Pack 16-bit values into 8-bit values, i.e. - // ([ 0 0 0 0 D C B A ], [ 0 0 0 0 0 0 0 0 ]) - // -> [ 0 0 0 0 0 0 DC BA ] - const __m128i shifted_8 = _mm_packus_epi16(shifted_16, zero); - - // Write to the output - xx_storel_32(&dst_y[x], shifted_8); - } - } -} - -// Note: If the crop width is not a multiple of 4, then, unlike the C version, -// this function will overwrite some of the padding on the right hand side of -// the frame. This padding appears to be trashed anyway, so this should not -// affect the running of the decoder. -void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, - uint16_t *dst, int dst_stride, int w, - int h, const int16_t *x_filters, - int x0_qn, int x_step_qn, int bd) { - assert(UPSCALE_NORMATIVE_TAPS == 8); - assert(bd == 8 || bd == 10 || bd == 12); - - src -= UPSCALE_NORMATIVE_TAPS / 2 - 1; - - const __m128i round_add = _mm_set1_epi32((1 << FILTER_BITS) >> 1); - const __m128i zero = _mm_setzero_si128(); - const __m128i clip_maximum = _mm_set1_epi16((1 << bd) - 1); - - const uint16_t *src_y; - uint16_t *dst_y; - int x_qn = x0_qn; - for (int x = 0; x < w; x += 4, x_qn += 4 * x_step_qn) { - const int x_filter_idx0 = - ((x_qn + 0 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS; - const int x_filter_idx1 = - ((x_qn + 1 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS; - const int x_filter_idx2 = - ((x_qn + 2 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS; - const int x_filter_idx3 = - ((x_qn + 3 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS; - - assert(x_filter_idx0 <= RS_SUBPEL_MASK); - assert(x_filter_idx1 <= RS_SUBPEL_MASK); - assert(x_filter_idx2 <= RS_SUBPEL_MASK); - assert(x_filter_idx3 <= RS_SUBPEL_MASK); - - const int16_t *const x_filter0 = - &x_filters[x_filter_idx0 * UPSCALE_NORMATIVE_TAPS]; - const int16_t *const x_filter1 = - &x_filters[x_filter_idx1 * UPSCALE_NORMATIVE_TAPS]; - const int16_t *const x_filter2 = - &x_filters[x_filter_idx2 * UPSCALE_NORMATIVE_TAPS]; - const int16_t *const x_filter3 = - &x_filters[x_filter_idx3 * UPSCALE_NORMATIVE_TAPS]; - - const __m128i fil0_16 = xx_loadu_128(x_filter0); - const __m128i fil1_16 = xx_loadu_128(x_filter1); - const __m128i fil2_16 = xx_loadu_128(x_filter2); - const __m128i fil3_16 = xx_loadu_128(x_filter3); - - src_y = src; - dst_y = dst; - for (int y = 0; y < h; y++, src_y += src_stride, dst_y += dst_stride) { - const uint16_t *const src_x0 = - &src_y[(x_qn + 0 * x_step_qn) >> RS_SCALE_SUBPEL_BITS]; - const uint16_t *const src_x1 = - &src_y[(x_qn + 1 * x_step_qn) >> RS_SCALE_SUBPEL_BITS]; - const uint16_t *const src_x2 = - &src_y[(x_qn + 2 * x_step_qn) >> RS_SCALE_SUBPEL_BITS]; - const uint16_t *const src_x3 = - &src_y[(x_qn + 3 * x_step_qn) >> RS_SCALE_SUBPEL_BITS]; - - // Load up the source data. This is 16-bit input data, so each load - // gets 8 pixels. - const __m128i src0_16 = xx_loadu_128(src_x0); - const __m128i src1_16 = xx_loadu_128(src_x1); - const __m128i src2_16 = xx_loadu_128(src_x2); - const __m128i src3_16 = xx_loadu_128(src_x3); - - // Multiply by filter coefficients (results in a 32-bit value), - // and add adjacent pairs, i.e. - // ([ s7 s6 s5 s4 s3 s2 s1 s0], [ f7 f6 f5 f4 f3 f2 f1 f0 ]) - // -> [ {s7*f7+s6*f6} {s5*f5+s4*f4} {s3*f3+s2*f2} {s1*f1+s0*f0} ] - const __m128i conv0_32 = _mm_madd_epi16(src0_16, fil0_16); - const __m128i conv1_32 = _mm_madd_epi16(src1_16, fil1_16); - const __m128i conv2_32 = _mm_madd_epi16(src2_16, fil2_16); - const __m128i conv3_32 = _mm_madd_epi16(src3_16, fil3_16); - - // Reduce horizontally and add, i.e. - // ([ D C B A ], [ S R Q P ]) -> [ S+R Q+P D+C B+A ] - const __m128i conv01_32 = _mm_hadd_epi32(conv0_32, conv1_32); - const __m128i conv23_32 = _mm_hadd_epi32(conv2_32, conv3_32); - - const __m128i conv0123_32 = _mm_hadd_epi32(conv01_32, conv23_32); - - // Divide down by (1 << FILTER_BITS), rounding to nearest. - const __m128i shifted_32 = - _mm_srai_epi32(_mm_add_epi32(conv0123_32, round_add), FILTER_BITS); - - // Pack 32-bit values into 16-bit values, i.e. - // ([ D C B A ], [ 0 0 0 0 ]) -> [ 0 0 0 0 D C B A ] - const __m128i shifted_16 = _mm_packus_epi32(shifted_32, zero); - - // Clip the values at (1 << bd) - 1 - const __m128i clipped_16 = _mm_min_epi16(shifted_16, clip_maximum); - - // Write to the output - xx_storel_64(&dst_y[x], clipped_16); - } - } -} diff --git a/third_party/aom/av1/common/x86/av1_convolve_scale_sse4.c b/third_party/aom/av1/common/x86/av1_convolve_scale_sse4.c deleted file mode 100644 index d9fb53785..000000000 --- a/third_party/aom/av1/common/x86/av1_convolve_scale_sse4.c +++ /dev/null @@ -1,499 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <assert.h> -#include <smmintrin.h> - -#include "config/aom_dsp_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/aom_filter.h" -#include "av1/common/convolve.h" - -// A specialised version of hfilter, the horizontal filter for -// av1_convolve_2d_scale_sse4_1. This version only supports 8 tap filters. -static void hfilter8(const uint8_t *src, int src_stride, int16_t *dst, int w, - int h, int subpel_x_qn, int x_step_qn, - const InterpFilterParams *filter_params, unsigned round) { - const int bd = 8; - const int ntaps = 8; - - src -= ntaps / 2 - 1; - - int32_t round_add32 = (1 << round) / 2 + (1 << (bd + FILTER_BITS - 1)); - const __m128i round_add = _mm_set1_epi32(round_add32); - const __m128i round_shift = _mm_cvtsi32_si128(round); - - int x_qn = subpel_x_qn; - for (int x = 0; x < w; ++x, x_qn += x_step_qn) { - const uint8_t *const src_col = src + (x_qn >> SCALE_SUBPEL_BITS); - const int filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS; - assert(filter_idx < SUBPEL_SHIFTS); - const int16_t *filter = - av1_get_interp_filter_subpel_kernel(filter_params, filter_idx); - - // Load the filter coefficients - const __m128i coefflo = _mm_loadu_si128((__m128i *)filter); - const __m128i zero = _mm_castps_si128(_mm_setzero_ps()); - - int y; - for (y = 0; y <= h - 4; y += 4) { - const uint8_t *const src0 = src_col + y * src_stride; - const uint8_t *const src1 = src0 + 1 * src_stride; - const uint8_t *const src2 = src0 + 2 * src_stride; - const uint8_t *const src3 = src0 + 3 * src_stride; - - // Load up source data. This is 8-bit input data; each load is just - // loading the lower half of the register and gets 8 pixels - const __m128i data08 = _mm_loadl_epi64((__m128i *)src0); - const __m128i data18 = _mm_loadl_epi64((__m128i *)src1); - const __m128i data28 = _mm_loadl_epi64((__m128i *)src2); - const __m128i data38 = _mm_loadl_epi64((__m128i *)src3); - - // Now zero-extend up to 16-bit precision by interleaving with - // zeros. Drop the upper half of each register (which just had zeros) - const __m128i data0lo = _mm_unpacklo_epi8(data08, zero); - const __m128i data1lo = _mm_unpacklo_epi8(data18, zero); - const __m128i data2lo = _mm_unpacklo_epi8(data28, zero); - const __m128i data3lo = _mm_unpacklo_epi8(data38, zero); - - // Multiply by coefficients - const __m128i conv0lo = _mm_madd_epi16(data0lo, coefflo); - const __m128i conv1lo = _mm_madd_epi16(data1lo, coefflo); - const __m128i conv2lo = _mm_madd_epi16(data2lo, coefflo); - const __m128i conv3lo = _mm_madd_epi16(data3lo, coefflo); - - // Reduce horizontally and add - const __m128i conv01lo = _mm_hadd_epi32(conv0lo, conv1lo); - const __m128i conv23lo = _mm_hadd_epi32(conv2lo, conv3lo); - const __m128i conv = _mm_hadd_epi32(conv01lo, conv23lo); - - // Divide down by (1 << round), rounding to nearest. - __m128i shifted = - _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift); - - shifted = _mm_packus_epi32(shifted, shifted); - // Write transposed to the output - _mm_storel_epi64((__m128i *)(dst + y + x * h), shifted); - } - for (; y < h; ++y) { - const uint8_t *const src_row = src_col + y * src_stride; - - int32_t sum = (1 << (bd + FILTER_BITS - 1)); - for (int k = 0; k < ntaps; ++k) { - sum += filter[k] * src_row[k]; - } - - dst[y + x * h] = ROUND_POWER_OF_TWO(sum, round); - } - } -} - -static __m128i convolve_16_8(const int16_t *src, __m128i coeff) { - __m128i data = _mm_loadu_si128((__m128i *)src); - return _mm_madd_epi16(data, coeff); -} - -// A specialised version of vfilter, the vertical filter for -// av1_convolve_2d_scale_sse4_1. This version only supports 8 tap filters. -static void vfilter8(const int16_t *src, int src_stride, uint8_t *dst, - int dst_stride, int w, int h, int subpel_y_qn, - int y_step_qn, const InterpFilterParams *filter_params, - const ConvolveParams *conv_params, int bd) { - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - const int ntaps = 8; - - const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1); - - const int32_t sub32 = ((1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1))); - const __m128i sub = _mm_set1_epi16(sub32); - - CONV_BUF_TYPE *dst16 = conv_params->dst; - const int dst16_stride = conv_params->dst_stride; - const int bits = - FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1; - const __m128i bits_shift = _mm_cvtsi32_si128(bits); - const __m128i bits_const = _mm_set1_epi16(((1 << bits) >> 1)); - const __m128i round_shift_add = - _mm_set1_epi32(((1 << conv_params->round_1) >> 1)); - const __m128i res_add_const = _mm_set1_epi32(1 << offset_bits); - - const int w0 = conv_params->fwd_offset; - const int w1 = conv_params->bck_offset; - const __m128i wt0 = _mm_set1_epi16(w0); - const __m128i wt1 = _mm_set1_epi16(w1); - const __m128i wt = _mm_unpacklo_epi16(wt0, wt1); - - int y_qn = subpel_y_qn; - for (int y = 0; y < h; ++y, y_qn += y_step_qn) { - const int16_t *src_y = src + (y_qn >> SCALE_SUBPEL_BITS); - const int filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS; - assert(filter_idx < SUBPEL_SHIFTS); - const int16_t *filter = - av1_get_interp_filter_subpel_kernel(filter_params, filter_idx); - - const __m128i coeff0716 = _mm_loadu_si128((__m128i *)filter); - int x; - for (x = 0; x <= w - 4; x += 4) { - const int16_t *const src0 = src_y + x * src_stride; - const int16_t *const src1 = src0 + 1 * src_stride; - const int16_t *const src2 = src0 + 2 * src_stride; - const int16_t *const src3 = src0 + 3 * src_stride; - - // Load the source data for the three rows, adding the three registers of - // convolved products to one as we go (conv0..conv3) to avoid the - // register pressure getting too high. - const __m128i conv0 = convolve_16_8(src0, coeff0716); - const __m128i conv1 = convolve_16_8(src1, coeff0716); - const __m128i conv2 = convolve_16_8(src2, coeff0716); - const __m128i conv3 = convolve_16_8(src3, coeff0716); - - // Now reduce horizontally to get one lane for each result - const __m128i conv01 = _mm_hadd_epi32(conv0, conv1); - const __m128i conv23 = _mm_hadd_epi32(conv2, conv3); - __m128i conv = _mm_hadd_epi32(conv01, conv23); - - conv = _mm_add_epi32(conv, res_add_const); - // Divide down by (1 << round_1), rounding to nearest and subtract sub32. - __m128i shifted = - _mm_sra_epi32(_mm_add_epi32(conv, round_shift_add), round_shift); - - uint8_t *dst_x = dst + y * dst_stride + x; - CONV_BUF_TYPE *dst_16_x = dst16 + y * dst16_stride + x; - __m128i result; - __m128i shifted_16 = _mm_packus_epi32(shifted, shifted); - - if (conv_params->is_compound) { - if (conv_params->do_average) { - const __m128i p_16 = _mm_loadl_epi64((__m128i *)dst_16_x); - if (conv_params->use_jnt_comp_avg) { - const __m128i p_16_lo = _mm_unpacklo_epi16(p_16, shifted_16); - const __m128i wt_res_lo = _mm_madd_epi16(p_16_lo, wt); - const __m128i shifted_32 = - _mm_srai_epi32(wt_res_lo, DIST_PRECISION_BITS); - shifted_16 = _mm_packus_epi32(shifted_32, shifted_32); - } else { - shifted_16 = _mm_srai_epi16(_mm_add_epi16(p_16, shifted_16), 1); - } - const __m128i subbed = _mm_sub_epi16(shifted_16, sub); - result = _mm_sra_epi16(_mm_add_epi16(subbed, bits_const), bits_shift); - const __m128i result_8 = _mm_packus_epi16(result, result); - *(uint32_t *)dst_x = _mm_cvtsi128_si32(result_8); - } else { - _mm_storel_epi64((__m128i *)dst_16_x, shifted_16); - } - } else { - const __m128i subbed = _mm_sub_epi16(shifted_16, sub); - result = _mm_sra_epi16(_mm_add_epi16(subbed, bits_const), bits_shift); - const __m128i result_8 = _mm_packus_epi16(result, result); - *(uint32_t *)dst_x = _mm_cvtsi128_si32(result_8); - } - } - for (; x < w; ++x) { - const int16_t *src_x = src_y + x * src_stride; - int32_t sum = 1 << offset_bits; - for (int k = 0; k < ntaps; ++k) sum += filter[k] * src_x[k]; - CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1); - - if (conv_params->is_compound) { - if (conv_params->do_average) { - int32_t tmp = dst16[y * dst16_stride + x]; - if (conv_params->use_jnt_comp_avg) { - tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; - tmp = tmp >> DIST_PRECISION_BITS; - } else { - tmp += res; - tmp = tmp >> 1; - } - /* Subtract round offset and convolve round */ - tmp = tmp - sub32; - dst[y * dst_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits)); - } else { - dst16[y * dst16_stride + x] = res; - } - } else { - /* Subtract round offset and convolve round */ - int32_t tmp = res - ((1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1))); - dst[y * dst_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits)); - } - } - } -} -void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, - uint8_t *dst8, int dst8_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_qn, const int x_step_qn, - const int subpel_y_qn, const int y_step_qn, - ConvolveParams *conv_params) { - // TODO(yaowu): remove unnecessary initializations - int16_t tmp[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE] = { 0 }; - int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) + - filter_params_y->taps; - - const int xtaps = filter_params_x->taps; - const int ytaps = filter_params_y->taps; - const int fo_vert = ytaps / 2 - 1; - assert((xtaps == 8) && (ytaps == 8)); - (void)xtaps; - - // horizontal filter - hfilter8(src - fo_vert * src_stride, src_stride, tmp, w, im_h, subpel_x_qn, - x_step_qn, filter_params_x, conv_params->round_0); - - // vertical filter (input is transposed) - vfilter8(tmp, im_h, dst8, dst8_stride, w, h, subpel_y_qn, y_step_qn, - filter_params_y, conv_params, 8); -} - -// A specialised version of hfilter, the horizontal filter for -// av1_highbd_convolve_2d_scale_sse4_1. This version only supports 8 tap -// filters. -static void highbd_hfilter8(const uint16_t *src, int src_stride, int16_t *dst, - int w, int h, int subpel_x_qn, int x_step_qn, - const InterpFilterParams *filter_params, - unsigned round, int bd) { - const int ntaps = 8; - - src -= ntaps / 2 - 1; - - int32_t round_add32 = (1 << round) / 2 + (1 << (bd + FILTER_BITS - 1)); - const __m128i round_add = _mm_set1_epi32(round_add32); - const __m128i round_shift = _mm_cvtsi32_si128(round); - - int x_qn = subpel_x_qn; - for (int x = 0; x < w; ++x, x_qn += x_step_qn) { - const uint16_t *const src_col = src + (x_qn >> SCALE_SUBPEL_BITS); - const int filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS; - assert(filter_idx < SUBPEL_SHIFTS); - const int16_t *filter = - av1_get_interp_filter_subpel_kernel(filter_params, filter_idx); - - // Load the filter coefficients - const __m128i coefflo = _mm_loadu_si128((__m128i *)filter); - - int y; - for (y = 0; y <= h - 4; y += 4) { - const uint16_t *const src0 = src_col + y * src_stride; - const uint16_t *const src1 = src0 + 1 * src_stride; - const uint16_t *const src2 = src0 + 2 * src_stride; - const uint16_t *const src3 = src0 + 3 * src_stride; - - // Load up source data. This is 16-bit input data, so each load gets the 8 - // pixels we need. - const __m128i data0lo = _mm_loadu_si128((__m128i *)src0); - const __m128i data1lo = _mm_loadu_si128((__m128i *)src1); - const __m128i data2lo = _mm_loadu_si128((__m128i *)src2); - const __m128i data3lo = _mm_loadu_si128((__m128i *)src3); - - // Multiply by coefficients - const __m128i conv0lo = _mm_madd_epi16(data0lo, coefflo); - const __m128i conv1lo = _mm_madd_epi16(data1lo, coefflo); - const __m128i conv2lo = _mm_madd_epi16(data2lo, coefflo); - const __m128i conv3lo = _mm_madd_epi16(data3lo, coefflo); - - // Reduce horizontally and add - const __m128i conv01lo = _mm_hadd_epi32(conv0lo, conv1lo); - const __m128i conv23lo = _mm_hadd_epi32(conv2lo, conv3lo); - const __m128i conv = _mm_hadd_epi32(conv01lo, conv23lo); - - // Divide down by (1 << round), rounding to nearest. - __m128i shifted = - _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift); - - shifted = _mm_packus_epi32(shifted, shifted); - // Write transposed to the output - _mm_storel_epi64((__m128i *)(dst + y + x * h), shifted); - } - for (; y < h; ++y) { - const uint16_t *const src_row = src_col + y * src_stride; - - int32_t sum = (1 << (bd + FILTER_BITS - 1)); - for (int k = 0; k < ntaps; ++k) { - sum += filter[k] * src_row[k]; - } - - dst[y + x * h] = ROUND_POWER_OF_TWO(sum, round); - } - } -} -// A specialised version of vfilter, the vertical filter for -// av1_highbd_convolve_2d_scale_sse4_1. This version only supports 8 tap -// filters. -static void highbd_vfilter8(const int16_t *src, int src_stride, uint16_t *dst, - int dst_stride, int w, int h, int subpel_y_qn, - int y_step_qn, - const InterpFilterParams *filter_params, - const ConvolveParams *conv_params, int bd) { - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - const int ntaps = 8; - - const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1); - - const int32_t sub32 = ((1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1))); - const __m128i sub = _mm_set1_epi32(sub32); - - CONV_BUF_TYPE *dst16 = conv_params->dst; - const int dst16_stride = conv_params->dst_stride; - const __m128i clip_pixel_ = - _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255)); - const int bits = - FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1; - const __m128i bits_shift = _mm_cvtsi32_si128(bits); - const __m128i bits_const = _mm_set1_epi32(((1 << bits) >> 1)); - const __m128i round_shift_add = - _mm_set1_epi32(((1 << conv_params->round_1) >> 1)); - const __m128i res_add_const = _mm_set1_epi32(1 << offset_bits); - const int round_bits = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - __m128i round_bits_shift = _mm_cvtsi32_si128(round_bits); - __m128i round_bits_const = _mm_set1_epi32(((1 << round_bits) >> 1)); - - const int w0 = conv_params->fwd_offset; - const int w1 = conv_params->bck_offset; - const __m128i wt0 = _mm_set1_epi32(w0); - const __m128i wt1 = _mm_set1_epi32(w1); - - int y_qn = subpel_y_qn; - for (int y = 0; y < h; ++y, y_qn += y_step_qn) { - const int16_t *src_y = src + (y_qn >> SCALE_SUBPEL_BITS); - const int filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS; - assert(filter_idx < SUBPEL_SHIFTS); - const int16_t *filter = - av1_get_interp_filter_subpel_kernel(filter_params, filter_idx); - - const __m128i coeff0716 = _mm_loadu_si128((__m128i *)filter); - int x; - for (x = 0; x <= w - 4; x += 4) { - const int16_t *const src0 = src_y + x * src_stride; - const int16_t *const src1 = src0 + 1 * src_stride; - const int16_t *const src2 = src0 + 2 * src_stride; - const int16_t *const src3 = src0 + 3 * src_stride; - - // Load the source data for the three rows, adding the three registers of - // convolved products to one as we go (conv0..conv3) to avoid the - // register pressure getting too high. - const __m128i conv0 = convolve_16_8(src0, coeff0716); - const __m128i conv1 = convolve_16_8(src1, coeff0716); - const __m128i conv2 = convolve_16_8(src2, coeff0716); - const __m128i conv3 = convolve_16_8(src3, coeff0716); - - // Now reduce horizontally to get one lane for each result - const __m128i conv01 = _mm_hadd_epi32(conv0, conv1); - const __m128i conv23 = _mm_hadd_epi32(conv2, conv3); - __m128i conv = _mm_hadd_epi32(conv01, conv23); - conv = _mm_add_epi32(conv, res_add_const); - - // Divide down by (1 << round_1), rounding to nearest and subtract sub32. - __m128i shifted = - _mm_sra_epi32(_mm_add_epi32(conv, round_shift_add), round_shift); - - uint16_t *dst_x = dst + y * dst_stride + x; - CONV_BUF_TYPE *dst_16_x = dst16 + y * dst16_stride + x; - - __m128i result; - if (conv_params->is_compound) { - if (conv_params->do_average) { - __m128i p_32 = - _mm_cvtepu16_epi32(_mm_loadl_epi64((__m128i *)dst_16_x)); - - if (conv_params->use_jnt_comp_avg) { - shifted = _mm_add_epi32(_mm_mullo_epi32(p_32, wt0), - _mm_mullo_epi32(shifted, wt1)); - shifted = _mm_srai_epi32(shifted, DIST_PRECISION_BITS); - } else { - shifted = _mm_srai_epi32(_mm_add_epi32(p_32, shifted), 1); - } - __m128i res32 = _mm_sub_epi32(shifted, sub); - res32 = _mm_sra_epi32(_mm_add_epi32(res32, round_bits_const), - round_bits_shift); - - __m128i res16 = _mm_packus_epi32(res32, res32); - res16 = _mm_min_epi16(res16, clip_pixel_); - _mm_storel_epi64((__m128i *)dst_x, res16); - } else { - __m128i shifted_16 = _mm_packus_epi32(shifted, shifted); - _mm_storel_epi64((__m128i *)dst_16_x, shifted_16); - } - } else { - const __m128i subbed = _mm_sub_epi32(shifted, sub); - result = _mm_sra_epi16(_mm_add_epi32(subbed, bits_const), bits_shift); - result = _mm_packus_epi32(result, result); - result = _mm_min_epi16(result, clip_pixel_); - _mm_storel_epi64((__m128i *)dst_x, result); - } - } - - for (; x < w; ++x) { - const int16_t *src_x = src_y + x * src_stride; - int32_t sum = 1 << offset_bits; - for (int k = 0; k < ntaps; ++k) sum += filter[k] * src_x[k]; - CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1); - if (conv_params->is_compound) { - if (conv_params->do_average) { - int32_t tmp = dst16[y * dst16_stride + x]; - if (conv_params->use_jnt_comp_avg) { - tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset; - tmp = tmp >> DIST_PRECISION_BITS; - } else { - tmp += res; - tmp = tmp >> 1; - } - /* Subtract round offset and convolve round */ - tmp = tmp - ((1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1))); - dst[y * dst_stride + x] = - clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd); - } else { - dst16[y * dst16_stride + x] = res; - } - } else { - /* Subtract round offset and convolve round */ - int32_t tmp = res - ((1 << (offset_bits - conv_params->round_1)) + - (1 << (offset_bits - conv_params->round_1 - 1))); - dst[y * dst_stride + x] = - clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd); - } - } - } -} - -void av1_highbd_convolve_2d_scale_sse4_1( - const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, - int h, const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, const int subpel_x_qn, - const int x_step_qn, const int subpel_y_qn, const int y_step_qn, - ConvolveParams *conv_params, int bd) { - // TODO(yaowu): Move this out of stack - DECLARE_ALIGNED(16, int16_t, - tmp[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE]); - int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) + - filter_params_y->taps; - const int xtaps = filter_params_x->taps; - const int ytaps = filter_params_y->taps; - const int fo_vert = ytaps / 2 - 1; - - memset(tmp, 0, sizeof(tmp)); - assert((xtaps == 8) && (ytaps == 8)); - (void)xtaps; - - // horizontal filter - highbd_hfilter8(src - fo_vert * src_stride, src_stride, tmp, w, im_h, - subpel_x_qn, x_step_qn, filter_params_x, conv_params->round_0, - bd); - - // vertical filter (input is transposed) - highbd_vfilter8(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn, - filter_params_y, conv_params, bd); -} diff --git a/third_party/aom/av1/common/x86/av1_highbd_convolve_sse4.c b/third_party/aom/av1/common/x86/av1_highbd_convolve_sse4.c deleted file mode 100644 index 212d3bd72..000000000 --- a/third_party/aom/av1/common/x86/av1_highbd_convolve_sse4.c +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <assert.h> -#include <smmintrin.h> - -#include "config/av1_rtcd.h" - -#include "av1/common/filter.h" - -typedef void (*TransposeSave)(int width, int pixelsNum, uint32_t *src, - int src_stride, uint16_t *dst, int dst_stride, - int bd); - -// pixelsNum 0: write all 4 pixels -// 1/2/3: residual pixels 1/2/3 -static void writePixel(__m128i *u, int width, int pixelsNum, uint16_t *dst, - int dst_stride) { - if (2 == width) { - if (0 == pixelsNum) { - *(int *)dst = _mm_cvtsi128_si32(u[0]); - *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u[1]); - *(int *)(dst + 2 * dst_stride) = _mm_cvtsi128_si32(u[2]); - *(int *)(dst + 3 * dst_stride) = _mm_cvtsi128_si32(u[3]); - } else if (1 == pixelsNum) { - *(int *)dst = _mm_cvtsi128_si32(u[0]); - } else if (2 == pixelsNum) { - *(int *)dst = _mm_cvtsi128_si32(u[0]); - *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u[1]); - } else if (3 == pixelsNum) { - *(int *)dst = _mm_cvtsi128_si32(u[0]); - *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u[1]); - *(int *)(dst + 2 * dst_stride) = _mm_cvtsi128_si32(u[2]); - } - } else { - if (0 == pixelsNum) { - _mm_storel_epi64((__m128i *)dst, u[0]); - _mm_storel_epi64((__m128i *)(dst + dst_stride), u[1]); - _mm_storel_epi64((__m128i *)(dst + 2 * dst_stride), u[2]); - _mm_storel_epi64((__m128i *)(dst + 3 * dst_stride), u[3]); - } else if (1 == pixelsNum) { - _mm_storel_epi64((__m128i *)dst, u[0]); - } else if (2 == pixelsNum) { - _mm_storel_epi64((__m128i *)dst, u[0]); - _mm_storel_epi64((__m128i *)(dst + dst_stride), u[1]); - } else if (3 == pixelsNum) { - _mm_storel_epi64((__m128i *)dst, u[0]); - _mm_storel_epi64((__m128i *)(dst + dst_stride), u[1]); - _mm_storel_epi64((__m128i *)(dst + 2 * dst_stride), u[2]); - } - } -} - -// 16-bit pixels clip with bd (10/12) -static void highbd_clip(__m128i *p, int numVecs, int bd) { - const __m128i zero = _mm_setzero_si128(); - const __m128i one = _mm_set1_epi16(1); - const __m128i max = _mm_sub_epi16(_mm_slli_epi16(one, bd), one); - __m128i clamped, mask; - int i; - - for (i = 0; i < numVecs; i++) { - mask = _mm_cmpgt_epi16(p[i], max); - clamped = _mm_andnot_si128(mask, p[i]); - mask = _mm_and_si128(mask, max); - clamped = _mm_or_si128(mask, clamped); - mask = _mm_cmpgt_epi16(clamped, zero); - p[i] = _mm_and_si128(clamped, mask); - } -} - -static void transClipPixel(uint32_t *src, int src_stride, __m128i *u, int bd) { - __m128i v0, v1; - __m128i rnd = _mm_set1_epi32(1 << (FILTER_BITS - 1)); - - u[0] = _mm_loadu_si128((__m128i const *)src); - u[1] = _mm_loadu_si128((__m128i const *)(src + src_stride)); - u[2] = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride)); - u[3] = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride)); - - u[0] = _mm_add_epi32(u[0], rnd); - u[1] = _mm_add_epi32(u[1], rnd); - u[2] = _mm_add_epi32(u[2], rnd); - u[3] = _mm_add_epi32(u[3], rnd); - - u[0] = _mm_srai_epi32(u[0], FILTER_BITS); - u[1] = _mm_srai_epi32(u[1], FILTER_BITS); - u[2] = _mm_srai_epi32(u[2], FILTER_BITS); - u[3] = _mm_srai_epi32(u[3], FILTER_BITS); - - u[0] = _mm_packus_epi32(u[0], u[1]); - u[1] = _mm_packus_epi32(u[2], u[3]); - - highbd_clip(u, 2, bd); - - v0 = _mm_unpacklo_epi16(u[0], u[1]); - v1 = _mm_unpackhi_epi16(u[0], u[1]); - - u[0] = _mm_unpacklo_epi16(v0, v1); - u[2] = _mm_unpackhi_epi16(v0, v1); - - u[1] = _mm_srli_si128(u[0], 8); - u[3] = _mm_srli_si128(u[2], 8); -} - -// pixelsNum = 0 : all 4 rows of pixels will be saved. -// pixelsNum = 1/2/3 : residual 1/2/4 rows of pixels will be saved. -void trans_save_4x4(int width, int pixelsNum, uint32_t *src, int src_stride, - uint16_t *dst, int dst_stride, int bd) { - __m128i u[4]; - transClipPixel(src, src_stride, u, bd); - writePixel(u, width, pixelsNum, dst, dst_stride); -} - -void trans_accum_save_4x4(int width, int pixelsNum, uint32_t *src, - int src_stride, uint16_t *dst, int dst_stride, - int bd) { - __m128i u[4], v[4]; - const __m128i ones = _mm_set1_epi16(1); - - transClipPixel(src, src_stride, u, bd); - - v[0] = _mm_loadl_epi64((__m128i const *)dst); - v[1] = _mm_loadl_epi64((__m128i const *)(dst + dst_stride)); - v[2] = _mm_loadl_epi64((__m128i const *)(dst + 2 * dst_stride)); - v[3] = _mm_loadl_epi64((__m128i const *)(dst + 3 * dst_stride)); - - u[0] = _mm_add_epi16(u[0], v[0]); - u[1] = _mm_add_epi16(u[1], v[1]); - u[2] = _mm_add_epi16(u[2], v[2]); - u[3] = _mm_add_epi16(u[3], v[3]); - - u[0] = _mm_add_epi16(u[0], ones); - u[1] = _mm_add_epi16(u[1], ones); - u[2] = _mm_add_epi16(u[2], ones); - u[3] = _mm_add_epi16(u[3], ones); - - u[0] = _mm_srai_epi16(u[0], 1); - u[1] = _mm_srai_epi16(u[1], 1); - u[2] = _mm_srai_epi16(u[2], 1); - u[3] = _mm_srai_epi16(u[3], 1); - - writePixel(u, width, pixelsNum, dst, dst_stride); -} - -// Vertical convolutional filter - -typedef void (*WritePixels)(__m128i *u, int bd, uint16_t *dst); - -static void highbdRndingPacks(__m128i *u) { - __m128i rnd = _mm_set1_epi32(1 << (FILTER_BITS - 1)); - u[0] = _mm_add_epi32(u[0], rnd); - u[0] = _mm_srai_epi32(u[0], FILTER_BITS); - u[0] = _mm_packus_epi32(u[0], u[0]); -} - -static void write2pixelsOnly(__m128i *u, int bd, uint16_t *dst) { - highbdRndingPacks(u); - highbd_clip(u, 1, bd); - *(uint32_t *)dst = _mm_cvtsi128_si32(u[0]); -} - -static void write2pixelsAccum(__m128i *u, int bd, uint16_t *dst) { - __m128i v = _mm_loadl_epi64((__m128i const *)dst); - const __m128i ones = _mm_set1_epi16(1); - - highbdRndingPacks(u); - highbd_clip(u, 1, bd); - - v = _mm_add_epi16(v, u[0]); - v = _mm_add_epi16(v, ones); - v = _mm_srai_epi16(v, 1); - *(uint32_t *)dst = _mm_cvtsi128_si32(v); -} - -WritePixels write2pixelsTab[2] = { write2pixelsOnly, write2pixelsAccum }; - -static void write4pixelsOnly(__m128i *u, int bd, uint16_t *dst) { - highbdRndingPacks(u); - highbd_clip(u, 1, bd); - _mm_storel_epi64((__m128i *)dst, u[0]); -} - -static void write4pixelsAccum(__m128i *u, int bd, uint16_t *dst) { - __m128i v = _mm_loadl_epi64((__m128i const *)dst); - const __m128i ones = _mm_set1_epi16(1); - - highbdRndingPacks(u); - highbd_clip(u, 1, bd); - - v = _mm_add_epi16(v, u[0]); - v = _mm_add_epi16(v, ones); - v = _mm_srai_epi16(v, 1); - _mm_storel_epi64((__m128i *)dst, v); -} - -WritePixels write4pixelsTab[2] = { write4pixelsOnly, write4pixelsAccum }; diff --git a/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.c b/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.c deleted file mode 100644 index 5db2ccf6c..000000000 --- a/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.c +++ /dev/null @@ -1,1945 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "config/aom_config.h" - -#include "config/av1_rtcd.h" - -#include "av1/common/av1_inv_txfm1d_cfg.h" -#include "av1/common/x86/av1_txfm_sse2.h" -#include "av1/common/x86/av1_inv_txfm_avx2.h" -#include "av1/common/x86/av1_inv_txfm_ssse3.h" - -// TODO(venkatsanampudi@ittiam.com): move this to header file - -// Sqrt2, Sqrt2^2, Sqrt2^3, Sqrt2^4, Sqrt2^5 -static int32_t NewSqrt2list[TX_SIZES] = { 5793, 2 * 4096, 2 * 5793, 4 * 4096, - 4 * 5793 }; - -static INLINE void idct16_stage5_avx2(__m256i *x1, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]); - const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]); - btf_16_adds_subs_avx2(&x1[0], &x1[3]); - btf_16_adds_subs_avx2(&x1[1], &x1[2]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[5], &x1[6], _r, cos_bit); - - btf_16_adds_subs_avx2(&x1[8], &x1[11]); - btf_16_adds_subs_avx2(&x1[9], &x1[10]); - btf_16_adds_subs_avx2(&x1[15], &x1[12]); - btf_16_adds_subs_avx2(&x1[14], &x1[13]); -} - -static INLINE void idct16_stage6_avx2(__m256i *x, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]); - const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]); - btf_16_adds_subs_avx2(&x[0], &x[7]); - btf_16_adds_subs_avx2(&x[1], &x[6]); - btf_16_adds_subs_avx2(&x[2], &x[5]); - btf_16_adds_subs_avx2(&x[3], &x[4]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[10], &x[13], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[11], &x[12], _r, cos_bit); -} - -static INLINE void idct16_stage7_avx2(__m256i *output, __m256i *x1) { - btf_16_adds_subs_out_avx2(&output[0], &output[15], x1[0], x1[15]); - btf_16_adds_subs_out_avx2(&output[1], &output[14], x1[1], x1[14]); - btf_16_adds_subs_out_avx2(&output[2], &output[13], x1[2], x1[13]); - btf_16_adds_subs_out_avx2(&output[3], &output[12], x1[3], x1[12]); - btf_16_adds_subs_out_avx2(&output[4], &output[11], x1[4], x1[11]); - btf_16_adds_subs_out_avx2(&output[5], &output[10], x1[5], x1[10]); - btf_16_adds_subs_out_avx2(&output[6], &output[9], x1[6], x1[9]); - btf_16_adds_subs_out_avx2(&output[7], &output[8], x1[7], x1[8]); -} - -static void idct16_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - (void)(cos_bit); - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1)); - - __m256i cospi_p60_m04 = pair_set_w16_epi16(cospi[60], -cospi[4]); - __m256i cospi_p04_p60 = pair_set_w16_epi16(cospi[4], cospi[60]); - __m256i cospi_p28_m36 = pair_set_w16_epi16(cospi[28], -cospi[36]); - __m256i cospi_p36_p28 = pair_set_w16_epi16(cospi[36], cospi[28]); - __m256i cospi_p44_m20 = pair_set_w16_epi16(cospi[44], -cospi[20]); - __m256i cospi_p20_p44 = pair_set_w16_epi16(cospi[20], cospi[44]); - __m256i cospi_p12_m52 = pair_set_w16_epi16(cospi[12], -cospi[52]); - __m256i cospi_p52_p12 = pair_set_w16_epi16(cospi[52], cospi[12]); - __m256i cospi_p56_m08 = pair_set_w16_epi16(cospi[56], -cospi[8]); - __m256i cospi_p08_p56 = pair_set_w16_epi16(cospi[8], cospi[56]); - __m256i cospi_p24_m40 = pair_set_w16_epi16(cospi[24], -cospi[40]); - __m256i cospi_p40_p24 = pair_set_w16_epi16(cospi[40], cospi[24]); - __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]); - __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]); - __m256i cospi_p48_m16 = pair_set_w16_epi16(cospi[48], -cospi[16]); - __m256i cospi_p16_p48 = pair_set_w16_epi16(cospi[16], cospi[48]); - __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]); - __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]); - __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]); - - // stage 1 - __m256i x1[16]; - x1[0] = input[0]; - x1[1] = input[8]; - x1[2] = input[4]; - x1[3] = input[12]; - x1[4] = input[2]; - x1[5] = input[10]; - x1[6] = input[6]; - x1[7] = input[14]; - x1[8] = input[1]; - x1[9] = input[9]; - x1[10] = input[5]; - x1[11] = input[13]; - x1[12] = input[3]; - x1[13] = input[11]; - x1[14] = input[7]; - x1[15] = input[15]; - - // stage 2 - btf_16_w16_avx2(cospi_p60_m04, cospi_p04_p60, &x1[8], &x1[15], _r, cos_bit); - btf_16_w16_avx2(cospi_p28_m36, cospi_p36_p28, &x1[9], &x1[14], _r, cos_bit); - btf_16_w16_avx2(cospi_p44_m20, cospi_p20_p44, &x1[10], &x1[13], _r, cos_bit); - btf_16_w16_avx2(cospi_p12_m52, cospi_p52_p12, &x1[11], &x1[12], _r, cos_bit); - - // stage 3 - btf_16_w16_avx2(cospi_p56_m08, cospi_p08_p56, &x1[4], &x1[7], _r, cos_bit); - btf_16_w16_avx2(cospi_p24_m40, cospi_p40_p24, &x1[5], &x1[6], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[8], &x1[9]); - btf_16_adds_subs_avx2(&x1[11], &x1[10]); - btf_16_adds_subs_avx2(&x1[12], &x1[13]); - btf_16_adds_subs_avx2(&x1[15], &x1[14]); - - // stage 4 - btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[0], &x1[1], _r, cos_bit); - btf_16_w16_avx2(cospi_p48_m16, cospi_p16_p48, &x1[2], &x1[3], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[4], &x1[5]); - btf_16_adds_subs_avx2(&x1[7], &x1[6]); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[9], &x1[14], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[10], &x1[13], _r, cos_bit); - - idct16_stage5_avx2(x1, cospi, _r, cos_bit); - idct16_stage6_avx2(x1, cospi, _r, cos_bit); - idct16_stage7_avx2(output, x1); -} - -static void idct16_low8_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - (void)(cos_bit); - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]); - const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]); - const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]); - - // stage 1 - __m256i x1[16]; - x1[0] = input[0]; - x1[2] = input[4]; - x1[4] = input[2]; - x1[6] = input[6]; - x1[8] = input[1]; - x1[10] = input[5]; - x1[12] = input[3]; - x1[14] = input[7]; - - // stage 2 - btf_16_w16_0_avx2(cospi[60], cospi[4], x1[8], x1[8], x1[15]); - btf_16_w16_0_avx2(-cospi[36], cospi[28], x1[14], x1[9], x1[14]); - btf_16_w16_0_avx2(cospi[44], cospi[20], x1[10], x1[10], x1[13]); - btf_16_w16_0_avx2(-cospi[52], cospi[12], x1[12], x1[11], x1[12]); - - // stage 3 - btf_16_w16_0_avx2(cospi[56], cospi[8], x1[4], x1[4], x1[7]); - btf_16_w16_0_avx2(-cospi[40], cospi[24], x1[6], x1[5], x1[6]); - btf_16_adds_subs_avx2(&x1[8], &x1[9]); - btf_16_adds_subs_avx2(&x1[11], &x1[10]); - btf_16_adds_subs_avx2(&x1[12], &x1[13]); - btf_16_adds_subs_avx2(&x1[15], &x1[14]); - - // stage 4 - btf_16_w16_0_avx2(cospi[32], cospi[32], x1[0], x1[0], x1[1]); - btf_16_w16_0_avx2(cospi[48], cospi[16], x1[2], x1[2], x1[3]); - btf_16_adds_subs_avx2(&x1[4], &x1[5]); - btf_16_adds_subs_avx2(&x1[7], &x1[6]); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[9], &x1[14], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[10], &x1[13], _r, cos_bit); - - idct16_stage5_avx2(x1, cospi, _r, cos_bit); - idct16_stage6_avx2(x1, cospi, _r, cos_bit); - idct16_stage7_avx2(output, x1); -} - -static void idct16_low1_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - (void)(cos_bit); - const int32_t *cospi = cospi_arr(INV_COS_BIT); - - // stage 1 - __m256i x1[2]; - x1[0] = input[0]; - - // stage 2 - // stage 3 - // stage 4 - btf_16_w16_0_avx2(cospi[32], cospi[32], x1[0], x1[0], x1[1]); - - // stage 5 - // stage 6 - output[0] = x1[0]; - output[1] = x1[1]; - output[2] = x1[1]; - output[3] = x1[0]; - output[4] = x1[0]; - output[5] = x1[1]; - output[6] = x1[1]; - output[7] = x1[0]; - output[8] = x1[0]; - output[9] = x1[1]; - output[10] = x1[1]; - output[11] = x1[0]; - output[12] = x1[0]; - output[13] = x1[1]; - output[14] = x1[1]; - output[15] = x1[0]; -} - -static INLINE void iadst16_stage3_avx2(__m256i *x) { - btf_16_adds_subs_avx2(&x[0], &x[8]); - btf_16_adds_subs_avx2(&x[1], &x[9]); - btf_16_adds_subs_avx2(&x[2], &x[10]); - btf_16_adds_subs_avx2(&x[3], &x[11]); - btf_16_adds_subs_avx2(&x[4], &x[12]); - btf_16_adds_subs_avx2(&x[5], &x[13]); - btf_16_adds_subs_avx2(&x[6], &x[14]); - btf_16_adds_subs_avx2(&x[7], &x[15]); -} - -static INLINE void iadst16_stage4_avx2(__m256i *x, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - const __m256i cospi_p08_p56 = pair_set_w16_epi16(cospi[8], cospi[56]); - const __m256i cospi_p56_m08 = pair_set_w16_epi16(cospi[56], -cospi[8]); - const __m256i cospi_p40_p24 = pair_set_w16_epi16(cospi[40], cospi[24]); - const __m256i cospi_p24_m40 = pair_set_w16_epi16(cospi[24], -cospi[40]); - const __m256i cospi_m56_p08 = pair_set_w16_epi16(-cospi[56], cospi[8]); - const __m256i cospi_m24_p40 = pair_set_w16_epi16(-cospi[24], cospi[40]); - btf_16_w16_avx2(cospi_p08_p56, cospi_p56_m08, &x[8], &x[9], _r, cos_bit); - btf_16_w16_avx2(cospi_p40_p24, cospi_p24_m40, &x[10], &x[11], _r, cos_bit); - btf_16_w16_avx2(cospi_m56_p08, cospi_p08_p56, &x[12], &x[13], _r, cos_bit); - btf_16_w16_avx2(cospi_m24_p40, cospi_p40_p24, &x[14], &x[15], _r, cos_bit); -} - -static INLINE void iadst16_stage5_avx2(__m256i *x) { - btf_16_adds_subs_avx2(&x[0], &x[4]); - btf_16_adds_subs_avx2(&x[1], &x[5]); - btf_16_adds_subs_avx2(&x[2], &x[6]); - btf_16_adds_subs_avx2(&x[3], &x[7]); - btf_16_adds_subs_avx2(&x[8], &x[12]); - btf_16_adds_subs_avx2(&x[9], &x[13]); - btf_16_adds_subs_avx2(&x[10], &x[14]); - btf_16_adds_subs_avx2(&x[11], &x[15]); -} - -static INLINE void iadst16_stage6_avx2(__m256i *x, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - const __m256i cospi_p16_p48 = pair_set_w16_epi16(cospi[16], cospi[48]); - const __m256i cospi_p48_m16 = pair_set_w16_epi16(cospi[48], -cospi[16]); - const __m256i cospi_m48_p16 = pair_set_w16_epi16(-cospi[48], cospi[16]); - btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, &x[4], &x[5], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_p16, cospi_p16_p48, &x[6], &x[7], _r, cos_bit); - btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, &x[12], &x[13], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_p16, cospi_p16_p48, &x[14], &x[15], _r, cos_bit); -} - -static INLINE void iadst16_stage7_avx2(__m256i *x) { - btf_16_adds_subs_avx2(&x[0], &x[2]); - btf_16_adds_subs_avx2(&x[1], &x[3]); - btf_16_adds_subs_avx2(&x[4], &x[6]); - btf_16_adds_subs_avx2(&x[5], &x[7]); - btf_16_adds_subs_avx2(&x[8], &x[10]); - btf_16_adds_subs_avx2(&x[9], &x[11]); - btf_16_adds_subs_avx2(&x[12], &x[14]); - btf_16_adds_subs_avx2(&x[13], &x[15]); -} - -static INLINE void iadst16_stage8_avx2(__m256i *x1, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]); - const __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]); - btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[2], &x1[3], _r, cos_bit); - btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[6], &x1[7], _r, cos_bit); - btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[10], &x1[11], _r, cos_bit); - btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[14], &x1[15], _r, cos_bit); -} - -static INLINE void iadst16_stage9_avx2(__m256i *output, __m256i *x1) { - const __m256i __zero = _mm256_setzero_si256(); - output[0] = x1[0]; - output[1] = _mm256_subs_epi16(__zero, x1[8]); - output[2] = x1[12]; - output[3] = _mm256_subs_epi16(__zero, x1[4]); - output[4] = x1[6]; - output[5] = _mm256_subs_epi16(__zero, x1[14]); - output[6] = x1[10]; - output[7] = _mm256_subs_epi16(__zero, x1[2]); - output[8] = x1[3]; - output[9] = _mm256_subs_epi16(__zero, x1[11]); - output[10] = x1[15]; - output[11] = _mm256_subs_epi16(__zero, x1[7]); - output[12] = x1[5]; - output[13] = _mm256_subs_epi16(__zero, x1[13]); - output[14] = x1[9]; - output[15] = _mm256_subs_epi16(__zero, x1[1]); -} - -static void iadst16_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - (void)(cos_bit); - const int32_t *cospi = cospi_arr(INV_COS_BIT); - - const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1)); - - __m256i cospi_p02_p62 = pair_set_w16_epi16(cospi[2], cospi[62]); - __m256i cospi_p62_m02 = pair_set_w16_epi16(cospi[62], -cospi[2]); - __m256i cospi_p10_p54 = pair_set_w16_epi16(cospi[10], cospi[54]); - __m256i cospi_p54_m10 = pair_set_w16_epi16(cospi[54], -cospi[10]); - __m256i cospi_p18_p46 = pair_set_w16_epi16(cospi[18], cospi[46]); - __m256i cospi_p46_m18 = pair_set_w16_epi16(cospi[46], -cospi[18]); - __m256i cospi_p26_p38 = pair_set_w16_epi16(cospi[26], cospi[38]); - __m256i cospi_p38_m26 = pair_set_w16_epi16(cospi[38], -cospi[26]); - __m256i cospi_p34_p30 = pair_set_w16_epi16(cospi[34], cospi[30]); - __m256i cospi_p30_m34 = pair_set_w16_epi16(cospi[30], -cospi[34]); - __m256i cospi_p42_p22 = pair_set_w16_epi16(cospi[42], cospi[22]); - __m256i cospi_p22_m42 = pair_set_w16_epi16(cospi[22], -cospi[42]); - __m256i cospi_p50_p14 = pair_set_w16_epi16(cospi[50], cospi[14]); - __m256i cospi_p14_m50 = pair_set_w16_epi16(cospi[14], -cospi[50]); - __m256i cospi_p58_p06 = pair_set_w16_epi16(cospi[58], cospi[6]); - __m256i cospi_p06_m58 = pair_set_w16_epi16(cospi[6], -cospi[58]); - - // stage 1 - __m256i x1[16]; - x1[0] = input[15]; - x1[1] = input[0]; - x1[2] = input[13]; - x1[3] = input[2]; - x1[4] = input[11]; - x1[5] = input[4]; - x1[6] = input[9]; - x1[7] = input[6]; - x1[8] = input[7]; - x1[9] = input[8]; - x1[10] = input[5]; - x1[11] = input[10]; - x1[12] = input[3]; - x1[13] = input[12]; - x1[14] = input[1]; - x1[15] = input[14]; - - // stage 2 - btf_16_w16_avx2(cospi_p02_p62, cospi_p62_m02, &x1[0], &x1[1], _r, cos_bit); - btf_16_w16_avx2(cospi_p10_p54, cospi_p54_m10, &x1[2], &x1[3], _r, cos_bit); - btf_16_w16_avx2(cospi_p18_p46, cospi_p46_m18, &x1[4], &x1[5], _r, cos_bit); - btf_16_w16_avx2(cospi_p26_p38, cospi_p38_m26, &x1[6], &x1[7], _r, cos_bit); - btf_16_w16_avx2(cospi_p34_p30, cospi_p30_m34, &x1[8], &x1[9], _r, cos_bit); - btf_16_w16_avx2(cospi_p42_p22, cospi_p22_m42, &x1[10], &x1[11], _r, cos_bit); - btf_16_w16_avx2(cospi_p50_p14, cospi_p14_m50, &x1[12], &x1[13], _r, cos_bit); - btf_16_w16_avx2(cospi_p58_p06, cospi_p06_m58, &x1[14], &x1[15], _r, cos_bit); - - iadst16_stage3_avx2(x1); - iadst16_stage4_avx2(x1, cospi, _r, cos_bit); - iadst16_stage5_avx2(x1); - iadst16_stage6_avx2(x1, cospi, _r, cos_bit); - iadst16_stage7_avx2(x1); - iadst16_stage8_avx2(x1, cospi, _r, cos_bit); - iadst16_stage9_avx2(output, x1); -} - -static void iadst16_low8_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - (void)(cos_bit); - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1)); - - // stage 1 - __m256i x1[16]; - x1[1] = input[0]; - x1[3] = input[2]; - x1[5] = input[4]; - x1[7] = input[6]; - x1[8] = input[7]; - x1[10] = input[5]; - x1[12] = input[3]; - x1[14] = input[1]; - - // stage 2 - btf_16_w16_0_avx2(cospi[62], -cospi[2], x1[1], x1[0], x1[1]); - btf_16_w16_0_avx2(cospi[54], -cospi[10], x1[3], x1[2], x1[3]); - btf_16_w16_0_avx2(cospi[46], -cospi[18], x1[5], x1[4], x1[5]); - btf_16_w16_0_avx2(cospi[38], -cospi[26], x1[7], x1[6], x1[7]); - btf_16_w16_0_avx2(cospi[34], cospi[30], x1[8], x1[8], x1[9]); - btf_16_w16_0_avx2(cospi[42], cospi[22], x1[10], x1[10], x1[11]); - btf_16_w16_0_avx2(cospi[50], cospi[14], x1[12], x1[12], x1[13]); - btf_16_w16_0_avx2(cospi[58], cospi[06], x1[14], x1[14], x1[15]); - - iadst16_stage3_avx2(x1); - iadst16_stage4_avx2(x1, cospi, _r, cos_bit); - iadst16_stage5_avx2(x1); - iadst16_stage6_avx2(x1, cospi, _r, cos_bit); - iadst16_stage7_avx2(x1); - iadst16_stage8_avx2(x1, cospi, _r, cos_bit); - iadst16_stage9_avx2(output, x1); -} - -static void iadst16_low1_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - (void)(cos_bit); - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m256i cospi_p08_p56 = pair_set_w16_epi16(cospi[8], cospi[56]); - const __m256i cospi_p56_m08 = pair_set_w16_epi16(cospi[56], -cospi[8]); - const __m256i cospi_p16_p48 = pair_set_w16_epi16(cospi[16], cospi[48]); - const __m256i cospi_p48_m16 = pair_set_w16_epi16(cospi[48], -cospi[16]); - - // stage 1 - __m256i x1[16]; - x1[1] = input[0]; - - // stage 2 - btf_16_w16_0_avx2(cospi[62], -cospi[2], x1[1], x1[0], x1[1]); - - // stage 3 - x1[8] = x1[0]; - x1[9] = x1[1]; - - // stage 4 - btf_16_w16_avx2(cospi_p08_p56, cospi_p56_m08, &x1[8], &x1[9], _r, cos_bit); - - // stage 5 - x1[4] = x1[0]; - x1[5] = x1[1]; - - x1[12] = x1[8]; - x1[13] = x1[9]; - - // stage 6 - btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, &x1[4], &x1[5], _r, cos_bit); - btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, &x1[12], &x1[13], _r, cos_bit); - - // stage 7 - x1[2] = x1[0]; - x1[3] = x1[1]; - x1[6] = x1[4]; - x1[7] = x1[5]; - x1[10] = x1[8]; - x1[11] = x1[9]; - x1[14] = x1[12]; - x1[15] = x1[13]; - - iadst16_stage8_avx2(x1, cospi, _r, cos_bit); - iadst16_stage9_avx2(output, x1); -} - -static INLINE void idct32_high16_stage3_avx2(__m256i *x) { - btf_16_adds_subs_avx2(&x[16], &x[17]); - btf_16_adds_subs_avx2(&x[19], &x[18]); - btf_16_adds_subs_avx2(&x[20], &x[21]); - btf_16_adds_subs_avx2(&x[23], &x[22]); - btf_16_adds_subs_avx2(&x[24], &x[25]); - btf_16_adds_subs_avx2(&x[27], &x[26]); - btf_16_adds_subs_avx2(&x[28], &x[29]); - btf_16_adds_subs_avx2(&x[31], &x[30]); -} - -static INLINE void idct32_high16_stage4_avx2(__m256i *x, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - const __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]); - const __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]); - const __m256i cospi_m56_m08 = pair_set_w16_epi16(-cospi[56], -cospi[8]); - const __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]); - const __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]); - const __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]); - btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x[17], &x[30], _r, cos_bit); - btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x[18], &x[29], _r, cos_bit); - btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x[21], &x[26], _r, cos_bit); - btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x[22], &x[25], _r, cos_bit); -} - -static INLINE void idct32_high24_stage5_avx2(__m256i *x, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]); - const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]); - const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[9], &x[14], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[10], &x[13], _r, cos_bit); - btf_16_adds_subs_avx2(&x[16], &x[19]); - btf_16_adds_subs_avx2(&x[17], &x[18]); - btf_16_adds_subs_avx2(&x[23], &x[20]); - btf_16_adds_subs_avx2(&x[22], &x[21]); - btf_16_adds_subs_avx2(&x[24], &x[27]); - btf_16_adds_subs_avx2(&x[25], &x[26]); - btf_16_adds_subs_avx2(&x[31], &x[28]); - btf_16_adds_subs_avx2(&x[30], &x[29]); -} - -static INLINE void idct32_high28_stage6_avx2(__m256i *x, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]); - const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]); - const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]); - const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]); - const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[5], &x[6], _r, cos_bit); - btf_16_adds_subs_avx2(&x[8], &x[11]); - btf_16_adds_subs_avx2(&x[9], &x[10]); - btf_16_adds_subs_avx2(&x[15], &x[12]); - btf_16_adds_subs_avx2(&x[14], &x[13]); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[18], &x[29], _r, cos_bit); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[19], &x[28], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[20], &x[27], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[21], &x[26], _r, cos_bit); -} - -static INLINE void idct32_stage7_avx2(__m256i *x, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]); - const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]); - btf_16_adds_subs_avx2(&x[0], &x[7]); - btf_16_adds_subs_avx2(&x[1], &x[6]); - btf_16_adds_subs_avx2(&x[2], &x[5]); - btf_16_adds_subs_avx2(&x[3], &x[4]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[10], &x[13], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[11], &x[12], _r, cos_bit); - btf_16_adds_subs_avx2(&x[16], &x[23]); - btf_16_adds_subs_avx2(&x[17], &x[22]); - btf_16_adds_subs_avx2(&x[18], &x[21]); - btf_16_adds_subs_avx2(&x[19], &x[20]); - btf_16_adds_subs_avx2(&x[31], &x[24]); - btf_16_adds_subs_avx2(&x[30], &x[25]); - btf_16_adds_subs_avx2(&x[29], &x[26]); - btf_16_adds_subs_avx2(&x[28], &x[27]); -} - -static INLINE void idct32_stage8_avx2(__m256i *x, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]); - const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]); - btf_16_adds_subs_avx2(&x[0], &x[15]); - btf_16_adds_subs_avx2(&x[1], &x[14]); - btf_16_adds_subs_avx2(&x[2], &x[13]); - btf_16_adds_subs_avx2(&x[3], &x[12]); - btf_16_adds_subs_avx2(&x[4], &x[11]); - btf_16_adds_subs_avx2(&x[5], &x[10]); - btf_16_adds_subs_avx2(&x[6], &x[9]); - btf_16_adds_subs_avx2(&x[7], &x[8]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[20], &x[27], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[21], &x[26], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[22], &x[25], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[23], &x[24], _r, cos_bit); -} - -static INLINE void idct32_stage9_avx2(__m256i *output, __m256i *x) { - btf_16_adds_subs_out_avx2(&output[0], &output[31], x[0], x[31]); - btf_16_adds_subs_out_avx2(&output[1], &output[30], x[1], x[30]); - btf_16_adds_subs_out_avx2(&output[2], &output[29], x[2], x[29]); - btf_16_adds_subs_out_avx2(&output[3], &output[28], x[3], x[28]); - btf_16_adds_subs_out_avx2(&output[4], &output[27], x[4], x[27]); - btf_16_adds_subs_out_avx2(&output[5], &output[26], x[5], x[26]); - btf_16_adds_subs_out_avx2(&output[6], &output[25], x[6], x[25]); - btf_16_adds_subs_out_avx2(&output[7], &output[24], x[7], x[24]); - btf_16_adds_subs_out_avx2(&output[8], &output[23], x[8], x[23]); - btf_16_adds_subs_out_avx2(&output[9], &output[22], x[9], x[22]); - btf_16_adds_subs_out_avx2(&output[10], &output[21], x[10], x[21]); - btf_16_adds_subs_out_avx2(&output[11], &output[20], x[11], x[20]); - btf_16_adds_subs_out_avx2(&output[12], &output[19], x[12], x[19]); - btf_16_adds_subs_out_avx2(&output[13], &output[18], x[13], x[18]); - btf_16_adds_subs_out_avx2(&output[14], &output[17], x[14], x[17]); - btf_16_adds_subs_out_avx2(&output[15], &output[16], x[15], x[16]); -} - -static void idct32_low1_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - - // stage 1 - __m256i x[2]; - x[0] = input[0]; - - // stage 2 - // stage 3 - // stage 4 - // stage 5 - btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]); - - // stage 6 - // stage 7 - // stage 8 - // stage 9 - output[0] = x[0]; - output[31] = x[0]; - output[1] = x[1]; - output[30] = x[1]; - output[2] = x[1]; - output[29] = x[1]; - output[3] = x[0]; - output[28] = x[0]; - output[4] = x[0]; - output[27] = x[0]; - output[5] = x[1]; - output[26] = x[1]; - output[6] = x[1]; - output[25] = x[1]; - output[7] = x[0]; - output[24] = x[0]; - output[8] = x[0]; - output[23] = x[0]; - output[9] = x[1]; - output[22] = x[1]; - output[10] = x[1]; - output[21] = x[1]; - output[11] = x[0]; - output[20] = x[0]; - output[12] = x[0]; - output[19] = x[0]; - output[13] = x[1]; - output[18] = x[1]; - output[14] = x[1]; - output[17] = x[1]; - output[15] = x[0]; - output[16] = x[0]; -} - -static void idct32_low8_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1)); - - // stage 1 - __m256i x[32]; - x[0] = input[0]; - x[4] = input[4]; - x[8] = input[2]; - x[12] = input[6]; - x[16] = input[1]; - x[20] = input[5]; - x[24] = input[3]; - x[28] = input[7]; - - // stage 2 - btf_16_w16_0_avx2(cospi[62], cospi[2], x[16], x[16], x[31]); - btf_16_w16_0_avx2(-cospi[50], cospi[14], x[28], x[19], x[28]); - btf_16_w16_0_avx2(cospi[54], cospi[10], x[20], x[20], x[27]); - btf_16_w16_0_avx2(-cospi[58], cospi[6], x[24], x[23], x[24]); - - // stage 3 - btf_16_w16_0_avx2(cospi[60], cospi[4], x[8], x[8], x[15]); - btf_16_w16_0_avx2(-cospi[52], cospi[12], x[12], x[11], x[12]); - x[17] = x[16]; - x[18] = x[19]; - x[21] = x[20]; - x[22] = x[23]; - x[25] = x[24]; - x[26] = x[27]; - x[29] = x[28]; - x[30] = x[31]; - - // stage 4 - btf_16_w16_0_avx2(cospi[56], cospi[8], x[4], x[4], x[7]); - x[9] = x[8]; - x[10] = x[11]; - x[13] = x[12]; - x[14] = x[15]; - idct32_high16_stage4_avx2(x, cospi, _r, cos_bit); - - // stage 5 - btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]); - x[5] = x[4]; - x[6] = x[7]; - idct32_high24_stage5_avx2(x, cospi, _r, cos_bit); - // stage 6 - x[3] = x[0]; - x[2] = x[1]; - idct32_high28_stage6_avx2(x, cospi, _r, cos_bit); - - idct32_stage7_avx2(x, cospi, _r, cos_bit); - idct32_stage8_avx2(x, cospi, _r, cos_bit); - idct32_stage9_avx2(output, x); -} - -static void idct32_low16_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1)); - - // stage 1 - __m256i x[32]; - x[0] = input[0]; - x[2] = input[8]; - x[4] = input[4]; - x[6] = input[12]; - x[8] = input[2]; - x[10] = input[10]; - x[12] = input[6]; - x[14] = input[14]; - x[16] = input[1]; - x[18] = input[9]; - x[20] = input[5]; - x[22] = input[13]; - x[24] = input[3]; - x[26] = input[11]; - x[28] = input[7]; - x[30] = input[15]; - - // stage 2 - btf_16_w16_0_avx2(cospi[62], cospi[2], x[16], x[16], x[31]); - btf_16_w16_0_avx2(-cospi[34], cospi[30], x[30], x[17], x[30]); - btf_16_w16_0_avx2(cospi[46], cospi[18], x[18], x[18], x[29]); - btf_16_w16_0_avx2(-cospi[50], cospi[14], x[28], x[19], x[28]); - btf_16_w16_0_avx2(cospi[54], cospi[10], x[20], x[20], x[27]); - btf_16_w16_0_avx2(-cospi[42], cospi[22], x[26], x[21], x[26]); - btf_16_w16_0_avx2(cospi[38], cospi[26], x[22], x[22], x[25]); - btf_16_w16_0_avx2(-cospi[58], cospi[6], x[24], x[23], x[24]); - - // stage 3 - btf_16_w16_0_avx2(cospi[60], cospi[4], x[8], x[8], x[15]); - btf_16_w16_0_avx2(-cospi[36], cospi[28], x[14], x[9], x[14]); - btf_16_w16_0_avx2(cospi[44], cospi[20], x[10], x[10], x[13]); - btf_16_w16_0_avx2(-cospi[52], cospi[12], x[12], x[11], x[12]); - idct32_high16_stage3_avx2(x); - - // stage 4 - btf_16_w16_0_avx2(cospi[56], cospi[8], x[4], x[4], x[7]); - btf_16_w16_0_avx2(-cospi[40], cospi[24], x[6], x[5], x[6]); - btf_16_adds_subs_avx2(&x[8], &x[9]); - btf_16_adds_subs_avx2(&x[11], &x[10]); - btf_16_adds_subs_avx2(&x[12], &x[13]); - btf_16_adds_subs_avx2(&x[15], &x[14]); - idct32_high16_stage4_avx2(x, cospi, _r, cos_bit); - - // stage 5 - btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]); - btf_16_w16_0_avx2(cospi[48], cospi[16], x[2], x[2], x[3]); - btf_16_adds_subs_avx2(&x[4], &x[5]); - btf_16_adds_subs_avx2(&x[7], &x[6]); - idct32_high24_stage5_avx2(x, cospi, _r, cos_bit); - - btf_16_adds_subs_avx2(&x[0], &x[3]); - btf_16_adds_subs_avx2(&x[1], &x[2]); - idct32_high28_stage6_avx2(x, cospi, _r, cos_bit); - - idct32_stage7_avx2(x, cospi, _r, cos_bit); - idct32_stage8_avx2(x, cospi, _r, cos_bit); - idct32_stage9_avx2(output, x); -} - -static void idct32_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - (void)(cos_bit); - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1)); - - __m256i cospi_p62_m02 = pair_set_w16_epi16(cospi[62], -cospi[2]); - __m256i cospi_p02_p62 = pair_set_w16_epi16(cospi[2], cospi[62]); - __m256i cospi_p30_m34 = pair_set_w16_epi16(cospi[30], -cospi[34]); - __m256i cospi_p34_p30 = pair_set_w16_epi16(cospi[34], cospi[30]); - __m256i cospi_p46_m18 = pair_set_w16_epi16(cospi[46], -cospi[18]); - __m256i cospi_p18_p46 = pair_set_w16_epi16(cospi[18], cospi[46]); - __m256i cospi_p14_m50 = pair_set_w16_epi16(cospi[14], -cospi[50]); - __m256i cospi_p50_p14 = pair_set_w16_epi16(cospi[50], cospi[14]); - __m256i cospi_p54_m10 = pair_set_w16_epi16(cospi[54], -cospi[10]); - __m256i cospi_p10_p54 = pair_set_w16_epi16(cospi[10], cospi[54]); - __m256i cospi_p22_m42 = pair_set_w16_epi16(cospi[22], -cospi[42]); - __m256i cospi_p42_p22 = pair_set_w16_epi16(cospi[42], cospi[22]); - __m256i cospi_p38_m26 = pair_set_w16_epi16(cospi[38], -cospi[26]); - __m256i cospi_p26_p38 = pair_set_w16_epi16(cospi[26], cospi[38]); - __m256i cospi_p06_m58 = pair_set_w16_epi16(cospi[6], -cospi[58]); - __m256i cospi_p58_p06 = pair_set_w16_epi16(cospi[58], cospi[6]); - __m256i cospi_p60_m04 = pair_set_w16_epi16(cospi[60], -cospi[4]); - __m256i cospi_p04_p60 = pair_set_w16_epi16(cospi[4], cospi[60]); - __m256i cospi_p28_m36 = pair_set_w16_epi16(cospi[28], -cospi[36]); - __m256i cospi_p36_p28 = pair_set_w16_epi16(cospi[36], cospi[28]); - __m256i cospi_p44_m20 = pair_set_w16_epi16(cospi[44], -cospi[20]); - __m256i cospi_p20_p44 = pair_set_w16_epi16(cospi[20], cospi[44]); - __m256i cospi_p12_m52 = pair_set_w16_epi16(cospi[12], -cospi[52]); - __m256i cospi_p52_p12 = pair_set_w16_epi16(cospi[52], cospi[12]); - __m256i cospi_p56_m08 = pair_set_w16_epi16(cospi[56], -cospi[8]); - __m256i cospi_p08_p56 = pair_set_w16_epi16(cospi[8], cospi[56]); - __m256i cospi_p24_m40 = pair_set_w16_epi16(cospi[24], -cospi[40]); - __m256i cospi_p40_p24 = pair_set_w16_epi16(cospi[40], cospi[24]); - __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]); - __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]); - __m256i cospi_p48_m16 = pair_set_w16_epi16(cospi[48], -cospi[16]); - __m256i cospi_p16_p48 = pair_set_w16_epi16(cospi[16], cospi[48]); - - // stage 1 - __m256i x1[32]; - x1[0] = input[0]; - x1[1] = input[16]; - x1[2] = input[8]; - x1[3] = input[24]; - x1[4] = input[4]; - x1[5] = input[20]; - x1[6] = input[12]; - x1[7] = input[28]; - x1[8] = input[2]; - x1[9] = input[18]; - x1[10] = input[10]; - x1[11] = input[26]; - x1[12] = input[6]; - x1[13] = input[22]; - x1[14] = input[14]; - x1[15] = input[30]; - x1[16] = input[1]; - x1[17] = input[17]; - x1[18] = input[9]; - x1[19] = input[25]; - x1[20] = input[5]; - x1[21] = input[21]; - x1[22] = input[13]; - x1[23] = input[29]; - x1[24] = input[3]; - x1[25] = input[19]; - x1[26] = input[11]; - x1[27] = input[27]; - x1[28] = input[7]; - x1[29] = input[23]; - x1[30] = input[15]; - x1[31] = input[31]; - - // stage 2 - btf_16_w16_avx2(cospi_p62_m02, cospi_p02_p62, &x1[16], &x1[31], _r, cos_bit); - btf_16_w16_avx2(cospi_p30_m34, cospi_p34_p30, &x1[17], &x1[30], _r, cos_bit); - btf_16_w16_avx2(cospi_p46_m18, cospi_p18_p46, &x1[18], &x1[29], _r, cos_bit); - btf_16_w16_avx2(cospi_p14_m50, cospi_p50_p14, &x1[19], &x1[28], _r, cos_bit); - btf_16_w16_avx2(cospi_p54_m10, cospi_p10_p54, &x1[20], &x1[27], _r, cos_bit); - btf_16_w16_avx2(cospi_p22_m42, cospi_p42_p22, &x1[21], &x1[26], _r, cos_bit); - btf_16_w16_avx2(cospi_p38_m26, cospi_p26_p38, &x1[22], &x1[25], _r, cos_bit); - btf_16_w16_avx2(cospi_p06_m58, cospi_p58_p06, &x1[23], &x1[24], _r, cos_bit); - - // stage 3 - btf_16_w16_avx2(cospi_p60_m04, cospi_p04_p60, &x1[8], &x1[15], _r, cos_bit); - btf_16_w16_avx2(cospi_p28_m36, cospi_p36_p28, &x1[9], &x1[14], _r, cos_bit); - btf_16_w16_avx2(cospi_p44_m20, cospi_p20_p44, &x1[10], &x1[13], _r, cos_bit); - btf_16_w16_avx2(cospi_p12_m52, cospi_p52_p12, &x1[11], &x1[12], _r, cos_bit); - idct32_high16_stage3_avx2(x1); - - // stage 4 - btf_16_w16_avx2(cospi_p56_m08, cospi_p08_p56, &x1[4], &x1[7], _r, cos_bit); - btf_16_w16_avx2(cospi_p24_m40, cospi_p40_p24, &x1[5], &x1[6], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[8], &x1[9]); - btf_16_adds_subs_avx2(&x1[11], &x1[10]); - btf_16_adds_subs_avx2(&x1[12], &x1[13]); - btf_16_adds_subs_avx2(&x1[15], &x1[14]); - idct32_high16_stage4_avx2(x1, cospi, _r, cos_bit); - - // stage 5 - btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[0], &x1[1], _r, cos_bit); - btf_16_w16_avx2(cospi_p48_m16, cospi_p16_p48, &x1[2], &x1[3], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[4], &x1[5]); - btf_16_adds_subs_avx2(&x1[7], &x1[6]); - idct32_high24_stage5_avx2(x1, cospi, _r, cos_bit); - - // stage 6 - btf_16_adds_subs_avx2(&x1[0], &x1[3]); - btf_16_adds_subs_avx2(&x1[1], &x1[2]); - idct32_high28_stage6_avx2(x1, cospi, _r, cos_bit); - - idct32_stage7_avx2(x1, cospi, _r, cos_bit); - idct32_stage8_avx2(x1, cospi, _r, cos_bit); - idct32_stage9_avx2(output, x1); -} - -static INLINE void idct64_stage4_high32_avx2(__m256i *x, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - (void)cos_bit; - const __m256i cospi_m04_p60 = pair_set_w16_epi16(-cospi[4], cospi[60]); - const __m256i cospi_p60_p04 = pair_set_w16_epi16(cospi[60], cospi[4]); - const __m256i cospi_m60_m04 = pair_set_w16_epi16(-cospi[60], -cospi[4]); - const __m256i cospi_m36_p28 = pair_set_w16_epi16(-cospi[36], cospi[28]); - const __m256i cospi_p28_p36 = pair_set_w16_epi16(cospi[28], cospi[36]); - const __m256i cospi_m28_m36 = pair_set_w16_epi16(-cospi[28], -cospi[36]); - const __m256i cospi_m20_p44 = pair_set_w16_epi16(-cospi[20], cospi[44]); - const __m256i cospi_p44_p20 = pair_set_w16_epi16(cospi[44], cospi[20]); - const __m256i cospi_m44_m20 = pair_set_w16_epi16(-cospi[44], -cospi[20]); - const __m256i cospi_m52_p12 = pair_set_w16_epi16(-cospi[52], cospi[12]); - const __m256i cospi_p12_p52 = pair_set_w16_epi16(cospi[12], cospi[52]); - const __m256i cospi_m12_m52 = pair_set_w16_epi16(-cospi[12], -cospi[52]); - btf_16_w16_avx2(cospi_m04_p60, cospi_p60_p04, &x[33], &x[62], _r, cos_bit); - btf_16_w16_avx2(cospi_m60_m04, cospi_m04_p60, &x[34], &x[61], _r, cos_bit); - btf_16_w16_avx2(cospi_m36_p28, cospi_p28_p36, &x[37], &x[58], _r, cos_bit); - btf_16_w16_avx2(cospi_m28_m36, cospi_m36_p28, &x[38], &x[57], _r, cos_bit); - btf_16_w16_avx2(cospi_m20_p44, cospi_p44_p20, &x[41], &x[54], _r, cos_bit); - btf_16_w16_avx2(cospi_m44_m20, cospi_m20_p44, &x[42], &x[53], _r, cos_bit); - btf_16_w16_avx2(cospi_m52_p12, cospi_p12_p52, &x[45], &x[50], _r, cos_bit); - btf_16_w16_avx2(cospi_m12_m52, cospi_m52_p12, &x[46], &x[49], _r, cos_bit); -} - -static INLINE void idct64_stage5_high48_avx2(__m256i *x, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - (void)cos_bit; - const __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]); - const __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]); - const __m256i cospi_m56_m08 = pair_set_w16_epi16(-cospi[56], -cospi[8]); - const __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]); - const __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]); - const __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]); - btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x[17], &x[30], _r, cos_bit); - btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x[18], &x[29], _r, cos_bit); - btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x[21], &x[26], _r, cos_bit); - btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x[22], &x[25], _r, cos_bit); - btf_16_adds_subs_avx2(&x[32], &x[35]); - btf_16_adds_subs_avx2(&x[33], &x[34]); - btf_16_adds_subs_avx2(&x[39], &x[36]); - btf_16_adds_subs_avx2(&x[38], &x[37]); - btf_16_adds_subs_avx2(&x[40], &x[43]); - btf_16_adds_subs_avx2(&x[41], &x[42]); - btf_16_adds_subs_avx2(&x[47], &x[44]); - btf_16_adds_subs_avx2(&x[46], &x[45]); - btf_16_adds_subs_avx2(&x[48], &x[51]); - btf_16_adds_subs_avx2(&x[49], &x[50]); - btf_16_adds_subs_avx2(&x[55], &x[52]); - btf_16_adds_subs_avx2(&x[54], &x[53]); - btf_16_adds_subs_avx2(&x[56], &x[59]); - btf_16_adds_subs_avx2(&x[57], &x[58]); - btf_16_adds_subs_avx2(&x[63], &x[60]); - btf_16_adds_subs_avx2(&x[62], &x[61]); -} - -static INLINE void idct64_stage6_high32_avx2(__m256i *x, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - (void)cos_bit; - const __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]); - const __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]); - const __m256i cospi_m56_m08 = pair_set_w16_epi16(-cospi[56], -cospi[8]); - const __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]); - const __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]); - const __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]); - btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x[34], &x[61], _r, cos_bit); - btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x[35], &x[60], _r, cos_bit); - btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x[36], &x[59], _r, cos_bit); - btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x[37], &x[58], _r, cos_bit); - btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x[42], &x[53], _r, cos_bit); - btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x[43], &x[52], _r, cos_bit); - btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x[44], &x[51], _r, cos_bit); - btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x[45], &x[50], _r, cos_bit); -} - -static INLINE void idct64_stage6_high48_avx2(__m256i *x, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - btf_16_adds_subs_avx2(&x[16], &x[19]); - btf_16_adds_subs_avx2(&x[17], &x[18]); - btf_16_adds_subs_avx2(&x[23], &x[20]); - btf_16_adds_subs_avx2(&x[22], &x[21]); - btf_16_adds_subs_avx2(&x[24], &x[27]); - btf_16_adds_subs_avx2(&x[25], &x[26]); - btf_16_adds_subs_avx2(&x[31], &x[28]); - btf_16_adds_subs_avx2(&x[30], &x[29]); - idct64_stage6_high32_avx2(x, cospi, _r, cos_bit); -} - -static INLINE void idct64_stage7_high48_avx2(__m256i *x, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - (void)cos_bit; - const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]); - const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]); - const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[18], &x[29], _r, cos_bit); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[19], &x[28], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[20], &x[27], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[21], &x[26], _r, cos_bit); - btf_16_adds_subs_avx2(&x[32], &x[39]); - btf_16_adds_subs_avx2(&x[33], &x[38]); - btf_16_adds_subs_avx2(&x[34], &x[37]); - btf_16_adds_subs_avx2(&x[35], &x[36]); - btf_16_adds_subs_avx2(&x[47], &x[40]); - btf_16_adds_subs_avx2(&x[46], &x[41]); - btf_16_adds_subs_avx2(&x[45], &x[42]); - btf_16_adds_subs_avx2(&x[44], &x[43]); - btf_16_adds_subs_avx2(&x[48], &x[55]); - btf_16_adds_subs_avx2(&x[49], &x[54]); - btf_16_adds_subs_avx2(&x[50], &x[53]); - btf_16_adds_subs_avx2(&x[51], &x[52]); - btf_16_adds_subs_avx2(&x[63], &x[56]); - btf_16_adds_subs_avx2(&x[62], &x[57]); - btf_16_adds_subs_avx2(&x[61], &x[58]); - btf_16_adds_subs_avx2(&x[60], &x[59]); -} - -static INLINE void idct64_stage8_high48_avx2(__m256i *x, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - (void)cos_bit; - const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]); - const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]); - const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]); - btf_16_adds_subs_avx2(&x[16], &x[23]); - btf_16_adds_subs_avx2(&x[17], &x[22]); - btf_16_adds_subs_avx2(&x[18], &x[21]); - btf_16_adds_subs_avx2(&x[19], &x[20]); - btf_16_adds_subs_avx2(&x[31], &x[24]); - btf_16_adds_subs_avx2(&x[30], &x[25]); - btf_16_adds_subs_avx2(&x[29], &x[26]); - btf_16_adds_subs_avx2(&x[28], &x[27]); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[36], &x[59], _r, cos_bit); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[37], &x[58], _r, cos_bit); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[38], &x[57], _r, cos_bit); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[39], &x[56], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[40], &x[55], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[41], &x[54], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[42], &x[53], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[43], &x[52], _r, cos_bit); -} - -static INLINE void idct64_stage9_avx2(__m256i *x, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - (void)cos_bit; - const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]); - const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]); - btf_16_adds_subs_avx2(&x[0], &x[15]); - btf_16_adds_subs_avx2(&x[1], &x[14]); - btf_16_adds_subs_avx2(&x[2], &x[13]); - btf_16_adds_subs_avx2(&x[3], &x[12]); - btf_16_adds_subs_avx2(&x[4], &x[11]); - btf_16_adds_subs_avx2(&x[5], &x[10]); - btf_16_adds_subs_avx2(&x[6], &x[9]); - btf_16_adds_subs_avx2(&x[7], &x[8]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[20], &x[27], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[21], &x[26], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[22], &x[25], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[23], &x[24], _r, cos_bit); - btf_16_adds_subs_avx2(&x[32], &x[47]); - btf_16_adds_subs_avx2(&x[33], &x[46]); - btf_16_adds_subs_avx2(&x[34], &x[45]); - btf_16_adds_subs_avx2(&x[35], &x[44]); - btf_16_adds_subs_avx2(&x[36], &x[43]); - btf_16_adds_subs_avx2(&x[37], &x[42]); - btf_16_adds_subs_avx2(&x[38], &x[41]); - btf_16_adds_subs_avx2(&x[39], &x[40]); - btf_16_adds_subs_avx2(&x[63], &x[48]); - btf_16_adds_subs_avx2(&x[62], &x[49]); - btf_16_adds_subs_avx2(&x[61], &x[50]); - btf_16_adds_subs_avx2(&x[60], &x[51]); - btf_16_adds_subs_avx2(&x[59], &x[52]); - btf_16_adds_subs_avx2(&x[58], &x[53]); - btf_16_adds_subs_avx2(&x[57], &x[54]); - btf_16_adds_subs_avx2(&x[56], &x[55]); -} - -static INLINE void idct64_stage10_avx2(__m256i *x, const int32_t *cospi, - const __m256i _r, int8_t cos_bit) { - (void)cos_bit; - const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]); - const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]); - btf_16_adds_subs_avx2(&x[0], &x[31]); - btf_16_adds_subs_avx2(&x[1], &x[30]); - btf_16_adds_subs_avx2(&x[2], &x[29]); - btf_16_adds_subs_avx2(&x[3], &x[28]); - btf_16_adds_subs_avx2(&x[4], &x[27]); - btf_16_adds_subs_avx2(&x[5], &x[26]); - btf_16_adds_subs_avx2(&x[6], &x[25]); - btf_16_adds_subs_avx2(&x[7], &x[24]); - btf_16_adds_subs_avx2(&x[8], &x[23]); - btf_16_adds_subs_avx2(&x[9], &x[22]); - btf_16_adds_subs_avx2(&x[10], &x[21]); - btf_16_adds_subs_avx2(&x[11], &x[20]); - btf_16_adds_subs_avx2(&x[12], &x[19]); - btf_16_adds_subs_avx2(&x[13], &x[18]); - btf_16_adds_subs_avx2(&x[14], &x[17]); - btf_16_adds_subs_avx2(&x[15], &x[16]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[40], &x[55], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[41], &x[54], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[42], &x[53], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[43], &x[52], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[44], &x[51], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[45], &x[50], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[46], &x[49], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[47], &x[48], _r, cos_bit); -} - -static INLINE void idct64_stage11_avx2(__m256i *output, __m256i *x) { - btf_16_adds_subs_out_avx2(&output[0], &output[63], x[0], x[63]); - btf_16_adds_subs_out_avx2(&output[1], &output[62], x[1], x[62]); - btf_16_adds_subs_out_avx2(&output[2], &output[61], x[2], x[61]); - btf_16_adds_subs_out_avx2(&output[3], &output[60], x[3], x[60]); - btf_16_adds_subs_out_avx2(&output[4], &output[59], x[4], x[59]); - btf_16_adds_subs_out_avx2(&output[5], &output[58], x[5], x[58]); - btf_16_adds_subs_out_avx2(&output[6], &output[57], x[6], x[57]); - btf_16_adds_subs_out_avx2(&output[7], &output[56], x[7], x[56]); - btf_16_adds_subs_out_avx2(&output[8], &output[55], x[8], x[55]); - btf_16_adds_subs_out_avx2(&output[9], &output[54], x[9], x[54]); - btf_16_adds_subs_out_avx2(&output[10], &output[53], x[10], x[53]); - btf_16_adds_subs_out_avx2(&output[11], &output[52], x[11], x[52]); - btf_16_adds_subs_out_avx2(&output[12], &output[51], x[12], x[51]); - btf_16_adds_subs_out_avx2(&output[13], &output[50], x[13], x[50]); - btf_16_adds_subs_out_avx2(&output[14], &output[49], x[14], x[49]); - btf_16_adds_subs_out_avx2(&output[15], &output[48], x[15], x[48]); - btf_16_adds_subs_out_avx2(&output[16], &output[47], x[16], x[47]); - btf_16_adds_subs_out_avx2(&output[17], &output[46], x[17], x[46]); - btf_16_adds_subs_out_avx2(&output[18], &output[45], x[18], x[45]); - btf_16_adds_subs_out_avx2(&output[19], &output[44], x[19], x[44]); - btf_16_adds_subs_out_avx2(&output[20], &output[43], x[20], x[43]); - btf_16_adds_subs_out_avx2(&output[21], &output[42], x[21], x[42]); - btf_16_adds_subs_out_avx2(&output[22], &output[41], x[22], x[41]); - btf_16_adds_subs_out_avx2(&output[23], &output[40], x[23], x[40]); - btf_16_adds_subs_out_avx2(&output[24], &output[39], x[24], x[39]); - btf_16_adds_subs_out_avx2(&output[25], &output[38], x[25], x[38]); - btf_16_adds_subs_out_avx2(&output[26], &output[37], x[26], x[37]); - btf_16_adds_subs_out_avx2(&output[27], &output[36], x[27], x[36]); - btf_16_adds_subs_out_avx2(&output[28], &output[35], x[28], x[35]); - btf_16_adds_subs_out_avx2(&output[29], &output[34], x[29], x[34]); - btf_16_adds_subs_out_avx2(&output[30], &output[33], x[30], x[33]); - btf_16_adds_subs_out_avx2(&output[31], &output[32], x[31], x[32]); -} - -static void idct64_low1_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - - // stage 1 - __m256i x[32]; - x[0] = input[0]; - - // stage 2 - // stage 3 - // stage 4 - // stage 5 - // stage 6 - btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]); - - // stage 7 - // stage 8 - // stage 9 - // stage 10 - // stage 11 - output[0] = x[0]; - output[63] = x[0]; - output[1] = x[1]; - output[62] = x[1]; - output[2] = x[1]; - output[61] = x[1]; - output[3] = x[0]; - output[60] = x[0]; - output[4] = x[0]; - output[59] = x[0]; - output[5] = x[1]; - output[58] = x[1]; - output[6] = x[1]; - output[57] = x[1]; - output[7] = x[0]; - output[56] = x[0]; - output[8] = x[0]; - output[55] = x[0]; - output[9] = x[1]; - output[54] = x[1]; - output[10] = x[1]; - output[53] = x[1]; - output[11] = x[0]; - output[52] = x[0]; - output[12] = x[0]; - output[51] = x[0]; - output[13] = x[1]; - output[50] = x[1]; - output[14] = x[1]; - output[49] = x[1]; - output[15] = x[0]; - output[48] = x[0]; - output[16] = x[0]; - output[47] = x[0]; - output[17] = x[1]; - output[46] = x[1]; - output[18] = x[1]; - output[45] = x[1]; - output[19] = x[0]; - output[44] = x[0]; - output[20] = x[0]; - output[43] = x[0]; - output[21] = x[1]; - output[42] = x[1]; - output[22] = x[1]; - output[41] = x[1]; - output[23] = x[0]; - output[40] = x[0]; - output[24] = x[0]; - output[39] = x[0]; - output[25] = x[1]; - output[38] = x[1]; - output[26] = x[1]; - output[37] = x[1]; - output[27] = x[0]; - output[36] = x[0]; - output[28] = x[0]; - output[35] = x[0]; - output[29] = x[1]; - output[34] = x[1]; - output[30] = x[1]; - output[33] = x[1]; - output[31] = x[0]; - output[32] = x[0]; -} - -static void idct64_low8_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1)); - const __m256i cospi_m04_p60 = pair_set_w16_epi16(-cospi[4], cospi[60]); - const __m256i cospi_p60_p04 = pair_set_w16_epi16(cospi[60], cospi[4]); - const __m256i cospi_m36_p28 = pair_set_w16_epi16(-cospi[36], cospi[28]); - const __m256i cospi_m28_m36 = pair_set_w16_epi16(-cospi[28], -cospi[36]); - const __m256i cospi_m20_p44 = pair_set_w16_epi16(-cospi[20], cospi[44]); - const __m256i cospi_p44_p20 = pair_set_w16_epi16(cospi[44], cospi[20]); - const __m256i cospi_m52_p12 = pair_set_w16_epi16(-cospi[52], cospi[12]); - const __m256i cospi_m12_m52 = pair_set_w16_epi16(-cospi[12], -cospi[52]); - const __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]); - const __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]); - const __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]); - const __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]); - const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]); - const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]); - const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]); - const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]); - - // stage 1 - __m256i x[64]; - x[0] = input[0]; - x[8] = input[4]; - x[16] = input[2]; - x[24] = input[6]; - x[32] = input[1]; - x[40] = input[5]; - x[48] = input[3]; - x[56] = input[7]; - - // stage 2 - btf_16_w16_0_avx2(cospi[63], cospi[1], x[32], x[32], x[63]); - btf_16_w16_0_avx2(-cospi[57], cospi[7], x[56], x[39], x[56]); - btf_16_w16_0_avx2(cospi[59], cospi[5], x[40], x[40], x[55]); - btf_16_w16_0_avx2(-cospi[61], cospi[3], x[48], x[47], x[48]); - - // stage 3 - btf_16_w16_0_avx2(cospi[62], cospi[2], x[16], x[16], x[31]); - btf_16_w16_0_avx2(-cospi[58], cospi[6], x[24], x[23], x[24]); - x[33] = x[32]; - x[38] = x[39]; - x[41] = x[40]; - x[46] = x[47]; - x[49] = x[48]; - x[54] = x[55]; - x[57] = x[56]; - x[62] = x[63]; - - // stage 4 - btf_16_w16_0_avx2(cospi[60], cospi[4], x[8], x[8], x[15]); - x[17] = x[16]; - x[22] = x[23]; - x[25] = x[24]; - x[30] = x[31]; - btf_16_w16_avx2(cospi_m04_p60, cospi_p60_p04, &x[33], &x[62], _r, cos_bit); - btf_16_w16_avx2(cospi_m28_m36, cospi_m36_p28, &x[38], &x[57], _r, cos_bit); - btf_16_w16_avx2(cospi_m20_p44, cospi_p44_p20, &x[41], &x[54], _r, cos_bit); - btf_16_w16_avx2(cospi_m12_m52, cospi_m52_p12, &x[46], &x[49], _r, cos_bit); - - // stage 5 - x[9] = x[8]; - x[14] = x[15]; - btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x[17], &x[30], _r, cos_bit); - btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x[22], &x[25], _r, cos_bit); - x[35] = x[32]; - x[34] = x[33]; - x[36] = x[39]; - x[37] = x[38]; - x[43] = x[40]; - x[42] = x[41]; - x[44] = x[47]; - x[45] = x[46]; - x[51] = x[48]; - x[50] = x[49]; - x[52] = x[55]; - x[53] = x[54]; - x[59] = x[56]; - x[58] = x[57]; - x[60] = x[63]; - x[61] = x[62]; - - // stage 6 - btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[9], &x[14], _r, cos_bit); - x[19] = x[16]; - x[18] = x[17]; - x[20] = x[23]; - x[21] = x[22]; - x[27] = x[24]; - x[26] = x[25]; - x[28] = x[31]; - x[29] = x[30]; - idct64_stage6_high32_avx2(x, cospi, _r, cos_bit); - - // stage 7 - x[3] = x[0]; - x[2] = x[1]; - x[11] = x[8]; - x[10] = x[9]; - x[12] = x[15]; - x[13] = x[14]; - idct64_stage7_high48_avx2(x, cospi, _r, cos_bit); - - // stage 8 - x[7] = x[0]; - x[6] = x[1]; - x[5] = x[2]; - x[4] = x[3]; - x[9] = x[9]; - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[10], &x[13], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[11], &x[12], _r, cos_bit); - idct64_stage8_high48_avx2(x, cospi, _r, cos_bit); - - idct64_stage9_avx2(x, cospi, _r, cos_bit); - idct64_stage10_avx2(x, cospi, _r, cos_bit); - idct64_stage11_avx2(output, x); -} - -static void idct64_low16_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]); - const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]); - const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]); - const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]); - const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]); - - // stage 1 - __m256i x[64]; - x[0] = input[0]; - x[4] = input[8]; - x[8] = input[4]; - x[12] = input[12]; - x[16] = input[2]; - x[20] = input[10]; - x[24] = input[6]; - x[28] = input[14]; - x[32] = input[1]; - x[36] = input[9]; - x[40] = input[5]; - x[44] = input[13]; - x[48] = input[3]; - x[52] = input[11]; - x[56] = input[7]; - x[60] = input[15]; - - // stage 2 - btf_16_w16_0_avx2(cospi[63], cospi[1], x[32], x[32], x[63]); - btf_16_w16_0_avx2(-cospi[49], cospi[15], x[60], x[35], x[60]); - btf_16_w16_0_avx2(cospi[55], cospi[9], x[36], x[36], x[59]); - btf_16_w16_0_avx2(-cospi[57], cospi[7], x[56], x[39], x[56]); - btf_16_w16_0_avx2(cospi[59], cospi[5], x[40], x[40], x[55]); - btf_16_w16_0_avx2(-cospi[53], cospi[11], x[52], x[43], x[52]); - btf_16_w16_0_avx2(cospi[51], cospi[13], x[44], x[44], x[51]); - btf_16_w16_0_avx2(-cospi[61], cospi[3], x[48], x[47], x[48]); - - // stage 3 - btf_16_w16_0_avx2(cospi[62], cospi[2], x[16], x[16], x[31]); - btf_16_w16_0_avx2(-cospi[50], cospi[14], x[28], x[19], x[28]); - btf_16_w16_0_avx2(cospi[54], cospi[10], x[20], x[20], x[27]); - btf_16_w16_0_avx2(-cospi[58], cospi[6], x[24], x[23], x[24]); - x[33] = x[32]; - x[34] = x[35]; - x[37] = x[36]; - x[38] = x[39]; - x[41] = x[40]; - x[42] = x[43]; - x[45] = x[44]; - x[46] = x[47]; - x[49] = x[48]; - x[50] = x[51]; - x[53] = x[52]; - x[54] = x[55]; - x[57] = x[56]; - x[58] = x[59]; - x[61] = x[60]; - x[62] = x[63]; - - // stage 4 - btf_16_w16_0_avx2(cospi[60], cospi[4], x[8], x[8], x[15]); - btf_16_w16_0_avx2(-cospi[52], cospi[12], x[12], x[11], x[12]); - x[17] = x[16]; - x[18] = x[19]; - x[21] = x[20]; - x[22] = x[23]; - x[25] = x[24]; - x[26] = x[27]; - x[29] = x[28]; - x[30] = x[31]; - idct64_stage4_high32_avx2(x, cospi, _r, cos_bit); - - // stage 5 - btf_16_w16_0_avx2(cospi[56], cospi[8], x[4], x[4], x[7]); - x[9] = x[8]; - x[10] = x[11]; - x[13] = x[12]; - x[14] = x[15]; - idct64_stage5_high48_avx2(x, cospi, _r, cos_bit); - - // stage 6 - btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]); - x[5] = x[4]; - x[6] = x[7]; - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[9], &x[14], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[10], &x[13], _r, cos_bit); - idct64_stage6_high48_avx2(x, cospi, _r, cos_bit); - - // stage 7 - x[3] = x[0]; - x[2] = x[1]; - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[5], &x[6], _r, cos_bit); - btf_16_adds_subs_avx2(&x[8], &x[11]); - btf_16_adds_subs_avx2(&x[9], &x[10]); - btf_16_adds_subs_avx2(&x[15], &x[12]); - btf_16_adds_subs_avx2(&x[14], &x[13]); - idct64_stage7_high48_avx2(x, cospi, _r, cos_bit); - - // stage 8 - btf_16_adds_subs_avx2(&x[0], &x[7]); - btf_16_adds_subs_avx2(&x[1], &x[6]); - btf_16_adds_subs_avx2(&x[2], &x[5]); - btf_16_adds_subs_avx2(&x[3], &x[4]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[10], &x[13], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[11], &x[12], _r, cos_bit); - idct64_stage8_high48_avx2(x, cospi, _r, cos_bit); - - idct64_stage9_avx2(x, cospi, _r, cos_bit); - idct64_stage10_avx2(x, cospi, _r, cos_bit); - idct64_stage11_avx2(output, x); -} - -static void idct64_low32_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]); - const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]); - const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]); - const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]); - const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]); - - // stage 1 - __m256i x[64]; - x[0] = input[0]; - x[2] = input[16]; - x[4] = input[8]; - x[6] = input[24]; - x[8] = input[4]; - x[10] = input[20]; - x[12] = input[12]; - x[14] = input[28]; - x[16] = input[2]; - x[18] = input[18]; - x[20] = input[10]; - x[22] = input[26]; - x[24] = input[6]; - x[26] = input[22]; - x[28] = input[14]; - x[30] = input[30]; - x[32] = input[1]; - x[34] = input[17]; - x[36] = input[9]; - x[38] = input[25]; - x[40] = input[5]; - x[42] = input[21]; - x[44] = input[13]; - x[46] = input[29]; - x[48] = input[3]; - x[50] = input[19]; - x[52] = input[11]; - x[54] = input[27]; - x[56] = input[7]; - x[58] = input[23]; - x[60] = input[15]; - x[62] = input[31]; - - // stage 2 - btf_16_w16_0_avx2(cospi[63], cospi[1], x[32], x[32], x[63]); - btf_16_w16_0_avx2(-cospi[33], cospi[31], x[62], x[33], x[62]); - btf_16_w16_0_avx2(cospi[47], cospi[17], x[34], x[34], x[61]); - btf_16_w16_0_avx2(-cospi[49], cospi[15], x[60], x[35], x[60]); - btf_16_w16_0_avx2(cospi[55], cospi[9], x[36], x[36], x[59]); - btf_16_w16_0_avx2(-cospi[41], cospi[23], x[58], x[37], x[58]); - btf_16_w16_0_avx2(cospi[39], cospi[25], x[38], x[38], x[57]); - btf_16_w16_0_avx2(-cospi[57], cospi[7], x[56], x[39], x[56]); - btf_16_w16_0_avx2(cospi[59], cospi[5], x[40], x[40], x[55]); - btf_16_w16_0_avx2(-cospi[37], cospi[27], x[54], x[41], x[54]); - btf_16_w16_0_avx2(cospi[43], cospi[21], x[42], x[42], x[53]); - btf_16_w16_0_avx2(-cospi[53], cospi[11], x[52], x[43], x[52]); - btf_16_w16_0_avx2(cospi[51], cospi[13], x[44], x[44], x[51]); - btf_16_w16_0_avx2(-cospi[45], cospi[19], x[50], x[45], x[50]); - btf_16_w16_0_avx2(cospi[35], cospi[29], x[46], x[46], x[49]); - btf_16_w16_0_avx2(-cospi[61], cospi[3], x[48], x[47], x[48]); - - // stage 3 - btf_16_w16_0_avx2(cospi[62], cospi[2], x[16], x[16], x[31]); - btf_16_w16_0_avx2(-cospi[34], cospi[30], x[30], x[17], x[30]); - btf_16_w16_0_avx2(cospi[46], cospi[18], x[18], x[18], x[29]); - btf_16_w16_0_avx2(-cospi[50], cospi[14], x[28], x[19], x[28]); - btf_16_w16_0_avx2(cospi[54], cospi[10], x[20], x[20], x[27]); - btf_16_w16_0_avx2(-cospi[42], cospi[22], x[26], x[21], x[26]); - btf_16_w16_0_avx2(cospi[38], cospi[26], x[22], x[22], x[25]); - btf_16_w16_0_avx2(-cospi[58], cospi[6], x[24], x[23], x[24]); - btf_16_adds_subs_avx2(&x[32], &x[33]); - btf_16_adds_subs_avx2(&x[35], &x[34]); - btf_16_adds_subs_avx2(&x[36], &x[37]); - btf_16_adds_subs_avx2(&x[39], &x[38]); - btf_16_adds_subs_avx2(&x[40], &x[41]); - btf_16_adds_subs_avx2(&x[43], &x[42]); - btf_16_adds_subs_avx2(&x[44], &x[45]); - btf_16_adds_subs_avx2(&x[47], &x[46]); - btf_16_adds_subs_avx2(&x[48], &x[49]); - btf_16_adds_subs_avx2(&x[51], &x[50]); - btf_16_adds_subs_avx2(&x[52], &x[53]); - btf_16_adds_subs_avx2(&x[55], &x[54]); - btf_16_adds_subs_avx2(&x[56], &x[57]); - btf_16_adds_subs_avx2(&x[59], &x[58]); - btf_16_adds_subs_avx2(&x[60], &x[61]); - btf_16_adds_subs_avx2(&x[63], &x[62]); - - // stage 4 - btf_16_w16_0_avx2(cospi[60], cospi[4], x[8], x[8], x[15]); - btf_16_w16_0_avx2(-cospi[36], cospi[28], x[14], x[9], x[14]); - btf_16_w16_0_avx2(cospi[44], cospi[20], x[10], x[10], x[13]); - btf_16_w16_0_avx2(-cospi[52], cospi[12], x[12], x[11], x[12]); - btf_16_adds_subs_avx2(&x[16], &x[17]); - btf_16_adds_subs_avx2(&x[19], &x[18]); - btf_16_adds_subs_avx2(&x[20], &x[21]); - btf_16_adds_subs_avx2(&x[23], &x[22]); - btf_16_adds_subs_avx2(&x[24], &x[25]); - btf_16_adds_subs_avx2(&x[27], &x[26]); - btf_16_adds_subs_avx2(&x[28], &x[29]); - btf_16_adds_subs_avx2(&x[31], &x[30]); - idct64_stage4_high32_avx2(x, cospi, _r, cos_bit); - - // stage 5 - btf_16_w16_0_avx2(cospi[56], cospi[8], x[4], x[4], x[7]); - btf_16_w16_0_avx2(-cospi[40], cospi[24], x[6], x[5], x[6]); - btf_16_adds_subs_avx2(&x[8], &x[9]); - btf_16_adds_subs_avx2(&x[11], &x[10]); - btf_16_adds_subs_avx2(&x[12], &x[13]); - btf_16_adds_subs_avx2(&x[15], &x[14]); - idct64_stage5_high48_avx2(x, cospi, _r, cos_bit); - - // stage 6 - btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]); - btf_16_w16_0_avx2(cospi[48], cospi[16], x[2], x[2], x[3]); - btf_16_adds_subs_avx2(&x[4], &x[5]); - btf_16_adds_subs_avx2(&x[7], &x[6]); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[9], &x[14], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[10], &x[13], _r, cos_bit); - idct64_stage6_high48_avx2(x, cospi, _r, cos_bit); - - // stage 7 - btf_16_adds_subs_avx2(&x[0], &x[3]); - btf_16_adds_subs_avx2(&x[1], &x[2]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[5], &x[6], _r, cos_bit); - btf_16_adds_subs_avx2(&x[8], &x[11]); - btf_16_adds_subs_avx2(&x[9], &x[10]); - btf_16_adds_subs_avx2(&x[15], &x[12]); - btf_16_adds_subs_avx2(&x[14], &x[13]); - idct64_stage7_high48_avx2(x, cospi, _r, cos_bit); - - // stage 8 - btf_16_adds_subs_avx2(&x[0], &x[7]); - btf_16_adds_subs_avx2(&x[1], &x[6]); - btf_16_adds_subs_avx2(&x[2], &x[5]); - btf_16_adds_subs_avx2(&x[3], &x[4]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[10], &x[13], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[11], &x[12], _r, cos_bit); - idct64_stage8_high48_avx2(x, cospi, _r, cos_bit); - - // stage 9~11 - idct64_stage9_avx2(x, cospi, _r, cos_bit); - idct64_stage10_avx2(x, cospi, _r, cos_bit); - idct64_stage11_avx2(output, x); -} - -// 1D functions process 16 pixels at one time. -static const transform_1d_avx2 - lowbd_txfm_all_1d_zeros_w16_arr[TX_SIZES][ITX_TYPES_1D][4] = { - { - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL }, - }, - { { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL } }, - { - { idct16_low1_new_avx2, idct16_low8_new_avx2, idct16_new_avx2, NULL }, - { iadst16_low1_new_avx2, iadst16_low8_new_avx2, iadst16_new_avx2, - NULL }, - { NULL, NULL, NULL, NULL }, - }, - { { idct32_low1_new_avx2, idct32_low8_new_avx2, idct32_low16_new_avx2, - idct32_new_avx2 }, - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL } }, - { { idct64_low1_new_avx2, idct64_low8_new_avx2, idct64_low16_new_avx2, - idct64_low32_new_avx2 }, - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL } } - }; - -// only process w >= 16 h >= 16 -static INLINE void lowbd_inv_txfm2d_add_no_identity_avx2( - const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - __m256i buf1[64 * 16]; - int eobx, eoby; - get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob); - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int buf_size_w_div16 = txfm_size_col >> 4; - const int buf_size_nonzero_w_div16 = (eobx + 16) >> 4; - const int buf_size_nonzero_h_div16 = (eoby + 16) >> 4; - const int input_stride = AOMMIN(32, txfm_size_col); - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - - const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx]; - const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby]; - const transform_1d_avx2 row_txfm = - lowbd_txfm_all_1d_zeros_w16_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x]; - const transform_1d_avx2 col_txfm = - lowbd_txfm_all_1d_zeros_w16_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y]; - - assert(col_txfm != NULL); - assert(row_txfm != NULL); - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - for (int i = 0; i < buf_size_nonzero_h_div16; i++) { - __m256i buf0[64]; - const int32_t *input_row = input + (i << 4) * input_stride; - for (int j = 0; j < buf_size_nonzero_w_div16; ++j) { - __m256i *buf0_cur = buf0 + j * 16; - const int32_t *input_cur = input_row + j * 16; - load_buffer_32bit_to_16bit_w16_avx2(input_cur, input_stride, buf0_cur, - 16); - transpose_16bit_16x16_avx2(buf0_cur, buf0_cur); - } - if (rect_type == 1 || rect_type == -1) { - round_shift_avx2(buf0, buf0, input_stride); // rect special code - } - row_txfm(buf0, buf0, cos_bit_row); - round_shift_16bit_w16_avx2(buf0, txfm_size_col, shift[0]); - - __m256i *buf1_cur = buf1 + (i << 4); - if (lr_flip) { - for (int j = 0; j < buf_size_w_div16; ++j) { - __m256i temp[16]; - flip_buf_avx2(buf0 + 16 * j, temp, 16); - int offset = txfm_size_row * (buf_size_w_div16 - 1 - j); - transpose_16bit_16x16_avx2(temp, buf1_cur + offset); - } - } else { - for (int j = 0; j < buf_size_w_div16; ++j) { - transpose_16bit_16x16_avx2(buf0 + 16 * j, buf1_cur + txfm_size_row * j); - } - } - } - for (int i = 0; i < buf_size_w_div16; i++) { - __m256i *buf1_cur = buf1 + i * txfm_size_row; - col_txfm(buf1_cur, buf1_cur, cos_bit_col); - round_shift_16bit_w16_avx2(buf1_cur, txfm_size_row, shift[1]); - } - for (int i = 0; i < buf_size_w_div16; i++) { - lowbd_write_buffer_16xn_avx2(buf1 + i * txfm_size_row, output + 16 * i, - stride, ud_flip, txfm_size_row); - } -} - -static INLINE void iidentity_row_16xn_avx2(__m256i *out, const int32_t *input, - int stride, int shift, int height, - int txw_idx, int rect_type) { - const int32_t *input_row = input; - const __m256i scale = _mm256_set1_epi16(NewSqrt2list[txw_idx]); - const __m256i _r = _mm256_set1_epi16((1 << (NewSqrt2Bits - 1)) + - (1 << (NewSqrt2Bits - shift - 1))); - const __m256i one = _mm256_set1_epi16(1); - const __m256i scale__r = _mm256_unpacklo_epi16(scale, _r); - if (rect_type != 1 && rect_type != -1) { - for (int i = 0; i < height; ++i) { - const __m256i src = load_32bit_to_16bit_w16_avx2(input_row); - input_row += stride; - __m256i lo = _mm256_unpacklo_epi16(src, one); - __m256i hi = _mm256_unpackhi_epi16(src, one); - lo = _mm256_madd_epi16(lo, scale__r); - hi = _mm256_madd_epi16(hi, scale__r); - lo = _mm256_srai_epi32(lo, NewSqrt2Bits - shift); - hi = _mm256_srai_epi32(hi, NewSqrt2Bits - shift); - out[i] = _mm256_packs_epi32(lo, hi); - } - } else { - const __m256i rect_scale = - _mm256_set1_epi16(NewInvSqrt2 << (15 - NewSqrt2Bits)); - for (int i = 0; i < height; ++i) { - __m256i src = load_32bit_to_16bit_w16_avx2(input_row); - src = _mm256_mulhrs_epi16(src, rect_scale); - input_row += stride; - __m256i lo = _mm256_unpacklo_epi16(src, one); - __m256i hi = _mm256_unpackhi_epi16(src, one); - lo = _mm256_madd_epi16(lo, scale__r); - hi = _mm256_madd_epi16(hi, scale__r); - lo = _mm256_srai_epi32(lo, NewSqrt2Bits - shift); - hi = _mm256_srai_epi32(hi, NewSqrt2Bits - shift); - out[i] = _mm256_packs_epi32(lo, hi); - } - } -} - -static INLINE void iidentity_col_16xn_avx2(uint8_t *output, int stride, - __m256i *buf, int shift, int height, - int txh_idx) { - const __m256i scale = _mm256_set1_epi16(NewSqrt2list[txh_idx]); - const __m256i scale__r = _mm256_set1_epi16(1 << (NewSqrt2Bits - 1)); - const __m256i shift__r = _mm256_set1_epi32(1 << (-shift - 1)); - const __m256i one = _mm256_set1_epi16(1); - const __m256i scale_coeff = _mm256_unpacklo_epi16(scale, scale__r); - for (int h = 0; h < height; ++h) { - __m256i lo = _mm256_unpacklo_epi16(buf[h], one); - __m256i hi = _mm256_unpackhi_epi16(buf[h], one); - lo = _mm256_madd_epi16(lo, scale_coeff); - hi = _mm256_madd_epi16(hi, scale_coeff); - lo = _mm256_srai_epi32(lo, NewSqrt2Bits); - hi = _mm256_srai_epi32(hi, NewSqrt2Bits); - lo = _mm256_add_epi32(lo, shift__r); - hi = _mm256_add_epi32(hi, shift__r); - lo = _mm256_srai_epi32(lo, -shift); - hi = _mm256_srai_epi32(hi, -shift); - const __m256i x = _mm256_packs_epi32(lo, hi); - write_recon_w16_avx2(x, output); - output += stride; - } -} - -static INLINE void lowbd_inv_txfm2d_add_idtx_avx2(const int32_t *input, - uint8_t *output, int stride, - TX_SIZE tx_size, - int32_t eob) { - (void)eob; - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int input_stride = AOMMIN(32, txfm_size_col); - const int row_max = AOMMIN(32, txfm_size_row); - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - __m256i buf[32]; - for (int i = 0; i < input_stride; i += 16) { - iidentity_row_16xn_avx2(buf, input + i, input_stride, shift[0], row_max, - txw_idx, rect_type); - iidentity_col_16xn_avx2(output + i, stride, buf, shift[1], row_max, - txh_idx); - } -} - -static INLINE void lowbd_inv_txfm2d_add_h_identity_avx2( - const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - int eobx, eoby; - get_eobx_eoby_scan_h_identity(&eobx, &eoby, tx_size, eob); - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int txfm_size_col_notzero = AOMMIN(32, txfm_size_col); - const int input_stride = txfm_size_col_notzero; - const int buf_size_w_div16 = (eobx + 16) >> 4; - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - - const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby]; - const transform_1d_avx2 col_txfm = - lowbd_txfm_all_1d_zeros_w16_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y]; - - assert(col_txfm != NULL); - - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - for (int i = 0; i < buf_size_w_div16; i++) { - __m256i buf0[64]; - iidentity_row_16xn_avx2(buf0, input + (i << 4), input_stride, shift[0], - eoby + 1, txw_idx, rect_type); - col_txfm(buf0, buf0, cos_bit_col); - __m256i mshift = _mm256_set1_epi16(1 << (15 + shift[1])); - int k = ud_flip ? (txfm_size_row - 1) : 0; - const int step = ud_flip ? -1 : 1; - for (int j = 0; j < txfm_size_row; ++j, k += step) { - __m256i res = _mm256_mulhrs_epi16(buf0[k], mshift); - write_recon_w16_avx2(res, output + (i << 4) + j * stride); - } - } -} - -static INLINE void lowbd_inv_txfm2d_add_v_identity_avx2( - const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - __m256i buf1[64]; - int eobx, eoby; - get_eobx_eoby_scan_v_identity(&eobx, &eoby, tx_size, eob); - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int buf_size_w_div16 = txfm_size_col >> 4; - const int buf_size_h_div16 = (eoby + 16) >> 4; - const int input_stride = AOMMIN(32, txfm_size_col); - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - - const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx]; - const transform_1d_avx2 row_txfm = - lowbd_txfm_all_1d_zeros_w16_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x]; - - assert(row_txfm != NULL); - - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - for (int i = 0; i < buf_size_h_div16; i++) { - __m256i buf0[64]; - const int32_t *input_row = input + i * input_stride * 16; - for (int j = 0; j < AOMMIN(4, buf_size_w_div16); ++j) { - __m256i *buf0_cur = buf0 + j * 16; - load_buffer_32bit_to_16bit_w16_avx2(input_row + j * 16, input_stride, - buf0_cur, 16); - transpose_16bit_16x16_avx2(buf0_cur, buf0_cur); - } - if (rect_type == 1 || rect_type == -1) { - round_shift_avx2(buf0, buf0, input_stride); // rect special code - } - row_txfm(buf0, buf0, cos_bit_row); - round_shift_16bit_w16_avx2(buf0, txfm_size_col, shift[0]); - __m256i *_buf1 = buf1; - if (lr_flip) { - for (int j = 0; j < buf_size_w_div16; ++j) { - __m256i temp[16]; - flip_buf_avx2(buf0 + 16 * j, temp, 16); - transpose_16bit_16x16_avx2(temp, - _buf1 + 16 * (buf_size_w_div16 - 1 - j)); - } - } else { - for (int j = 0; j < buf_size_w_div16; ++j) { - transpose_16bit_16x16_avx2(buf0 + 16 * j, _buf1 + 16 * j); - } - } - for (int j = 0; j < buf_size_w_div16; ++j) { - iidentity_col_16xn_avx2(output + i * 16 * stride + j * 16, stride, - buf1 + j * 16, shift[1], 16, txh_idx); - } - } -} - -// for 32x32,32x64,64x32,64x64,16x32,32x16,64x16,16x64 -static INLINE void lowbd_inv_txfm2d_add_universe_avx2( - const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - (void)eob; - switch (tx_type) { - case DCT_DCT: - case ADST_DCT: // ADST in vertical, DCT in horizontal - case DCT_ADST: // DCT in vertical, ADST in horizontal - case ADST_ADST: // ADST in both directions - case FLIPADST_DCT: - case DCT_FLIPADST: - case FLIPADST_FLIPADST: - case ADST_FLIPADST: - case FLIPADST_ADST: - lowbd_inv_txfm2d_add_no_identity_avx2(input, output, stride, tx_type, - tx_size, eob); - break; - case IDTX: - lowbd_inv_txfm2d_add_idtx_avx2(input, output, stride, tx_size, eob); - break; - case V_DCT: - case V_ADST: - case V_FLIPADST: - lowbd_inv_txfm2d_add_h_identity_avx2(input, output, stride, tx_type, - tx_size, eob); - break; - case H_DCT: - case H_ADST: - case H_FLIPADST: - lowbd_inv_txfm2d_add_v_identity_avx2(input, output, stride, tx_type, - tx_size, eob); - break; - default: - av1_lowbd_inv_txfm2d_add_ssse3(input, output, stride, tx_type, tx_size, - eob); - break; - } -} - -void av1_lowbd_inv_txfm2d_add_avx2(const int32_t *input, uint8_t *output, - int stride, TX_TYPE tx_type, TX_SIZE tx_size, - int eob) { - switch (tx_size) { - case TX_4X4: - case TX_8X8: - case TX_4X8: - case TX_8X4: - case TX_8X16: - case TX_16X8: - case TX_4X16: - case TX_16X4: - case TX_8X32: - case TX_32X8: - av1_lowbd_inv_txfm2d_add_ssse3(input, output, stride, tx_type, tx_size, - eob); - break; - case TX_16X16: - case TX_32X32: - case TX_64X64: - case TX_16X32: - case TX_32X16: - case TX_32X64: - case TX_64X32: - case TX_16X64: - case TX_64X16: - default: - lowbd_inv_txfm2d_add_universe_avx2(input, output, stride, tx_type, - tx_size, eob); - break; - } -} - -void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, - const TxfmParam *txfm_param) { - const TX_TYPE tx_type = txfm_param->tx_type; - if (!txfm_param->lossless) { - av1_lowbd_inv_txfm2d_add_avx2(dqcoeff, dst, stride, tx_type, - txfm_param->tx_size, txfm_param->eob); - } else { - av1_inv_txfm_add_c(dqcoeff, dst, stride, txfm_param); - } -} diff --git a/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.h b/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.h deleted file mode 100644 index f74cbaeaa..000000000 --- a/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#ifndef AOM_AV1_COMMON_X86_AV1_INV_TXFM_AVX2_H_ -#define AOM_AV1_COMMON_X86_AV1_INV_TXFM_AVX2_H_ - -#include <immintrin.h> - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" - -#include "aom/aom_integer.h" -#include "aom_dsp/x86/transpose_sse2.h" -#include "aom_dsp/x86/txfm_common_sse2.h" -#include "aom_dsp/x86/txfm_common_avx2.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// half input is zero -#define btf_16_w16_0_avx2(w0, w1, in, out0, out1) \ - { \ - const __m256i _w0 = _mm256_set1_epi16(w0 * 8); \ - const __m256i _w1 = _mm256_set1_epi16(w1 * 8); \ - const __m256i _in = in; \ - out0 = _mm256_mulhrs_epi16(_in, _w0); \ - out1 = _mm256_mulhrs_epi16(_in, _w1); \ - } - -static INLINE void round_shift_avx2(const __m256i *input, __m256i *output, - int size) { - const __m256i scale = _mm256_set1_epi16(NewInvSqrt2 * 8); - for (int i = 0; i < size; ++i) { - output[i] = _mm256_mulhrs_epi16(input[i], scale); - } -} - -static INLINE void write_recon_w16_avx2(__m256i res, uint8_t *output) { - __m128i pred = _mm_loadu_si128((__m128i const *)(output)); - __m256i u = _mm256_adds_epi16(_mm256_cvtepu8_epi16(pred), res); - __m128i y = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(u, u), 168)); - _mm_storeu_si128((__m128i *)(output), y); -} - -static INLINE void lowbd_write_buffer_16xn_avx2(__m256i *in, uint8_t *output, - int stride, int flipud, - int height) { - int j = flipud ? (height - 1) : 0; - const int step = flipud ? -1 : 1; - for (int i = 0; i < height; ++i, j += step) { - write_recon_w16_avx2(in[j], output + i * stride); - } -} - -void av1_lowbd_inv_txfm2d_add_avx2(const int32_t *input, uint8_t *output, - int stride, TX_TYPE tx_type, TX_SIZE tx_size, - int eob); -#ifdef __cplusplus -} -#endif - -#endif // AOM_AV1_COMMON_X86_AV1_INV_TXFM_AVX2_H_ diff --git a/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.c b/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.c deleted file mode 100644 index 995bc3da4..000000000 --- a/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.c +++ /dev/null @@ -1,2923 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" - -#include "av1/common/av1_inv_txfm1d_cfg.h" -#include "av1/common/x86/av1_inv_txfm_ssse3.h" -#include "av1/common/x86/av1_txfm_sse2.h" - -// TODO(venkatsanampudi@ittiam.com): move this to header file - -// Sqrt2, Sqrt2^2, Sqrt2^3, Sqrt2^4, Sqrt2^5 -static int32_t NewSqrt2list[TX_SIZES] = { 5793, 2 * 4096, 2 * 5793, 4 * 4096, - 4 * 5793 }; - -// TODO(binpengsmail@gmail.com): replace some for loop with do {} while - -static void idct4_new_sse2(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]); - const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]); - - // stage 1 - __m128i x[4]; - x[0] = input[0]; - x[1] = input[2]; - x[2] = input[1]; - x[3] = input[3]; - - // stage 2 - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]); - btf_16_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]); - - // stage 3 - btf_16_adds_subs_out_sse2(output[0], output[3], x[0], x[3]); - btf_16_adds_subs_out_sse2(output[1], output[2], x[1], x[2]); -} - -void idct4_w4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]); - const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]); - - // stage 1 - __m128i x[4]; - x[0] = input[0]; - x[1] = input[2]; - x[2] = input[1]; - x[3] = input[3]; - - // stage 2 - btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]); - btf_16_4p_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]); - - // stage 3 - btf_16_adds_subs_out_sse2(output[0], output[3], x[0], x[3]); - btf_16_adds_subs_out_sse2(output[1], output[2], x[1], x[2]); -} - -void idct8_low1_new_ssse3(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - - // stage 1 - __m128i x[2]; - x[0] = input[0]; - - // stage 2 - // stage 3 - btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]); - - // stage 4 - // stage 5 - output[0] = x[0]; - output[7] = x[0]; - output[1] = x[1]; - output[6] = x[1]; - output[2] = x[1]; - output[5] = x[1]; - output[3] = x[0]; - output[4] = x[0]; -} - -void idct8_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]); - const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]); - const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]); - const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]); - const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]); - const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - - // stage 1 - __m128i x[8]; - x[0] = input[0]; - x[1] = input[4]; - x[2] = input[2]; - x[3] = input[6]; - x[4] = input[1]; - x[5] = input[5]; - x[6] = input[3]; - x[7] = input[7]; - - // stage 2 - btf_16_sse2(cospi_p56_m08, cospi_p08_p56, x[4], x[7], x[4], x[7]); - btf_16_sse2(cospi_p24_m40, cospi_p40_p24, x[5], x[6], x[5], x[6]); - - // stage 3 - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]); - btf_16_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]); - btf_16_adds_subs_sse2(x[4], x[5]); - btf_16_subs_adds_sse2(x[7], x[6]); - - // stage 4 - btf_16_adds_subs_sse2(x[0], x[3]); - btf_16_adds_subs_sse2(x[1], x[2]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]); - - // stage 5 - btf_16_adds_subs_out_sse2(output[0], output[7], x[0], x[7]); - btf_16_adds_subs_out_sse2(output[1], output[6], x[1], x[6]); - btf_16_adds_subs_out_sse2(output[2], output[5], x[2], x[5]); - btf_16_adds_subs_out_sse2(output[3], output[4], x[3], x[4]); -} - -void idct8_w4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]); - const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]); - const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]); - const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]); - const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]); - const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - - // stage 1 - __m128i x[8]; - x[0] = input[0]; - x[1] = input[4]; - x[2] = input[2]; - x[3] = input[6]; - x[4] = input[1]; - x[5] = input[5]; - x[6] = input[3]; - x[7] = input[7]; - - // stage 2 - btf_16_4p_sse2(cospi_p56_m08, cospi_p08_p56, x[4], x[7], x[4], x[7]); - btf_16_4p_sse2(cospi_p24_m40, cospi_p40_p24, x[5], x[6], x[5], x[6]); - - // stage 3 - btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]); - btf_16_4p_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]); - btf_16_adds_subs_sse2(x[4], x[5]); - btf_16_subs_adds_sse2(x[7], x[6]); - - // stage 4 - btf_16_adds_subs_sse2(x[0], x[3]); - btf_16_adds_subs_sse2(x[1], x[2]); - btf_16_4p_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]); - - // stage 5 - btf_16_adds_subs_out_sse2(output[0], output[7], x[0], x[7]); - btf_16_adds_subs_out_sse2(output[1], output[6], x[1], x[6]); - btf_16_adds_subs_out_sse2(output[2], output[5], x[2], x[5]); - btf_16_adds_subs_out_sse2(output[3], output[4], x[3], x[4]); -} - -static INLINE void idct16_stage5_sse2(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - btf_16_adds_subs_sse2(x[0], x[3]); - btf_16_adds_subs_sse2(x[1], x[2]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]); - btf_16_adds_subs_sse2(x[8], x[11]); - btf_16_adds_subs_sse2(x[9], x[10]); - btf_16_subs_adds_sse2(x[15], x[12]); - btf_16_subs_adds_sse2(x[14], x[13]); -} - -static INLINE void idct16_stage6_sse2(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - btf_16_adds_subs_sse2(x[0], x[7]); - btf_16_adds_subs_sse2(x[1], x[6]); - btf_16_adds_subs_sse2(x[2], x[5]); - btf_16_adds_subs_sse2(x[3], x[4]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]); -} - -static INLINE void idct16_stage7_sse2(__m128i *output, __m128i *x) { - btf_16_adds_subs_out_sse2(output[0], output[15], x[0], x[15]); - btf_16_adds_subs_out_sse2(output[1], output[14], x[1], x[14]); - btf_16_adds_subs_out_sse2(output[2], output[13], x[2], x[13]); - btf_16_adds_subs_out_sse2(output[3], output[12], x[3], x[12]); - btf_16_adds_subs_out_sse2(output[4], output[11], x[4], x[11]); - btf_16_adds_subs_out_sse2(output[5], output[10], x[5], x[10]); - btf_16_adds_subs_out_sse2(output[6], output[9], x[6], x[9]); - btf_16_adds_subs_out_sse2(output[7], output[8], x[7], x[8]); -} - -static void idct16_low1_new_ssse3(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - - // stage 1 - __m128i x[2]; - x[0] = input[0]; - - // stage 2 - // stage 3 - // stage 4 - btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]); - - // stage 5 - // stage 6 - // stage 7 - output[0] = x[0]; - output[15] = x[0]; - output[1] = x[1]; - output[14] = x[1]; - output[2] = x[1]; - output[13] = x[1]; - output[3] = x[0]; - output[12] = x[0]; - output[4] = x[0]; - output[11] = x[0]; - output[5] = x[1]; - output[10] = x[1]; - output[6] = x[1]; - output[9] = x[1]; - output[7] = x[0]; - output[8] = x[0]; -} - -static void idct16_low8_new_ssse3(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]); - const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]); - const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]); - - // stage 1 - __m128i x[16]; - x[0] = input[0]; - x[2] = input[4]; - x[4] = input[2]; - x[6] = input[6]; - x[8] = input[1]; - x[10] = input[5]; - x[12] = input[3]; - x[14] = input[7]; - - // stage 2 - btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]); - btf_16_ssse3(-cospi[36], cospi[28], x[14], x[9], x[14]); - btf_16_ssse3(cospi[44], cospi[20], x[10], x[10], x[13]); - btf_16_ssse3(-cospi[52], cospi[12], x[12], x[11], x[12]); - - // stage 3 - btf_16_ssse3(cospi[56], cospi[8], x[4], x[4], x[7]); - btf_16_ssse3(-cospi[40], cospi[24], x[6], x[5], x[6]); - btf_16_adds_subs_sse2(x[8], x[9]); - btf_16_subs_adds_sse2(x[11], x[10]); - btf_16_adds_subs_sse2(x[12], x[13]); - btf_16_subs_adds_sse2(x[15], x[14]); - - // stage 4 - btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]); - btf_16_ssse3(cospi[48], cospi[16], x[2], x[2], x[3]); - btf_16_adds_subs_sse2(x[4], x[5]); - btf_16_subs_adds_sse2(x[7], x[6]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]); - - idct16_stage5_sse2(x, cospi, __rounding, cos_bit); - idct16_stage6_sse2(x, cospi, __rounding, cos_bit); - idct16_stage7_sse2(output, x); -} - -void idct16_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]); - const __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]); - const __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]); - const __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]); - const __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]); - const __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]); - const __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]); - const __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]); - const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]); - const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]); - const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]); - const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]); - const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]); - const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]); - const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]); - const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]); - - // stage 1 - __m128i x[16]; - x[0] = input[0]; - x[1] = input[8]; - x[2] = input[4]; - x[3] = input[12]; - x[4] = input[2]; - x[5] = input[10]; - x[6] = input[6]; - x[7] = input[14]; - x[8] = input[1]; - x[9] = input[9]; - x[10] = input[5]; - x[11] = input[13]; - x[12] = input[3]; - x[13] = input[11]; - x[14] = input[7]; - x[15] = input[15]; - - // stage 2 - btf_16_sse2(cospi_p60_m04, cospi_p04_p60, x[8], x[15], x[8], x[15]); - btf_16_sse2(cospi_p28_m36, cospi_p36_p28, x[9], x[14], x[9], x[14]); - btf_16_sse2(cospi_p44_m20, cospi_p20_p44, x[10], x[13], x[10], x[13]); - btf_16_sse2(cospi_p12_m52, cospi_p52_p12, x[11], x[12], x[11], x[12]); - - // stage 3 - btf_16_sse2(cospi_p56_m08, cospi_p08_p56, x[4], x[7], x[4], x[7]); - btf_16_sse2(cospi_p24_m40, cospi_p40_p24, x[5], x[6], x[5], x[6]); - btf_16_adds_subs_sse2(x[8], x[9]); - btf_16_subs_adds_sse2(x[11], x[10]); - btf_16_adds_subs_sse2(x[12], x[13]); - btf_16_subs_adds_sse2(x[15], x[14]); - - // stage 4 - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]); - btf_16_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]); - btf_16_adds_subs_sse2(x[4], x[5]); - btf_16_subs_adds_sse2(x[7], x[6]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]); - - // stage 5~7 - idct16_stage5_sse2(x, cospi, __rounding, cos_bit); - idct16_stage6_sse2(x, cospi, __rounding, cos_bit); - idct16_stage7_sse2(output, x); -} - -void idct16_w4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]); - const __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]); - const __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]); - const __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]); - const __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]); - const __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]); - const __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]); - const __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]); - const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]); - const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]); - const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]); - const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]); - const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]); - const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]); - const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]); - const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]); - const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - - // stage 1 - __m128i x[16]; - x[0] = input[0]; - x[1] = input[8]; - x[2] = input[4]; - x[3] = input[12]; - x[4] = input[2]; - x[5] = input[10]; - x[6] = input[6]; - x[7] = input[14]; - x[8] = input[1]; - x[9] = input[9]; - x[10] = input[5]; - x[11] = input[13]; - x[12] = input[3]; - x[13] = input[11]; - x[14] = input[7]; - x[15] = input[15]; - - // stage 2 - btf_16_4p_sse2(cospi_p60_m04, cospi_p04_p60, x[8], x[15], x[8], x[15]); - btf_16_4p_sse2(cospi_p28_m36, cospi_p36_p28, x[9], x[14], x[9], x[14]); - btf_16_4p_sse2(cospi_p44_m20, cospi_p20_p44, x[10], x[13], x[10], x[13]); - btf_16_4p_sse2(cospi_p12_m52, cospi_p52_p12, x[11], x[12], x[11], x[12]); - - // stage 3 - btf_16_4p_sse2(cospi_p56_m08, cospi_p08_p56, x[4], x[7], x[4], x[7]); - btf_16_4p_sse2(cospi_p24_m40, cospi_p40_p24, x[5], x[6], x[5], x[6]); - btf_16_adds_subs_sse2(x[8], x[9]); - btf_16_subs_adds_sse2(x[11], x[10]); - btf_16_adds_subs_sse2(x[12], x[13]); - btf_16_subs_adds_sse2(x[15], x[14]); - - // stage 4 - btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]); - btf_16_4p_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]); - btf_16_adds_subs_sse2(x[4], x[5]); - btf_16_subs_adds_sse2(x[7], x[6]); - btf_16_4p_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]); - btf_16_4p_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]); - - // stage 5 - btf_16_adds_subs_sse2(x[0], x[3]); - btf_16_adds_subs_sse2(x[1], x[2]); - btf_16_4p_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]); - btf_16_adds_subs_sse2(x[8], x[11]); - btf_16_adds_subs_sse2(x[9], x[10]); - btf_16_subs_adds_sse2(x[15], x[12]); - btf_16_subs_adds_sse2(x[14], x[13]); - - // stage 6 - btf_16_adds_subs_sse2(x[0], x[7]); - btf_16_adds_subs_sse2(x[1], x[6]); - btf_16_adds_subs_sse2(x[2], x[5]); - btf_16_adds_subs_sse2(x[3], x[4]); - btf_16_4p_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]); - btf_16_4p_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]); - - // stage 7 - idct16_stage7_sse2(output, x); -} - -static INLINE void idct32_high16_stage3_sse2(__m128i *x) { - btf_16_adds_subs_sse2(x[16], x[17]); - btf_16_subs_adds_sse2(x[19], x[18]); - btf_16_adds_subs_sse2(x[20], x[21]); - btf_16_subs_adds_sse2(x[23], x[22]); - btf_16_adds_subs_sse2(x[24], x[25]); - btf_16_subs_adds_sse2(x[27], x[26]); - btf_16_adds_subs_sse2(x[28], x[29]); - btf_16_subs_adds_sse2(x[31], x[30]); -} - -static INLINE void idct32_high16_stage4_sse2(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - const __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]); - const __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]); - const __m128i cospi_m56_m08 = pair_set_epi16(-cospi[56], -cospi[8]); - const __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]); - const __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]); - const __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]); - btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x[17], x[30], x[17], x[30]); - btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x[18], x[29], x[18], x[29]); - btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x[21], x[26], x[21], x[26]); - btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x[22], x[25], x[22], x[25]); -} - -static INLINE void idct32_high24_stage5_sse2(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]); - const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]); - const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]); - btf_16_adds_subs_sse2(x[16], x[19]); - btf_16_adds_subs_sse2(x[17], x[18]); - btf_16_subs_adds_sse2(x[23], x[20]); - btf_16_subs_adds_sse2(x[22], x[21]); - btf_16_adds_subs_sse2(x[24], x[27]); - btf_16_adds_subs_sse2(x[25], x[26]); - btf_16_subs_adds_sse2(x[31], x[28]); - btf_16_subs_adds_sse2(x[30], x[29]); -} - -static INLINE void idct32_high28_stage6_sse2(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]); - const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]); - const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]); - btf_16_adds_subs_sse2(x[8], x[11]); - btf_16_adds_subs_sse2(x[9], x[10]); - btf_16_subs_adds_sse2(x[15], x[12]); - btf_16_subs_adds_sse2(x[14], x[13]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[18], x[29], x[18], x[29]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[19], x[28], x[19], x[28]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[20], x[27], x[20], x[27]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[21], x[26], x[21], x[26]); -} - -static INLINE void idct32_stage7_sse2(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - btf_16_adds_subs_sse2(x[0], x[7]); - btf_16_adds_subs_sse2(x[1], x[6]); - btf_16_adds_subs_sse2(x[2], x[5]); - btf_16_adds_subs_sse2(x[3], x[4]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]); - btf_16_adds_subs_sse2(x[16], x[23]); - btf_16_adds_subs_sse2(x[17], x[22]); - btf_16_adds_subs_sse2(x[18], x[21]); - btf_16_adds_subs_sse2(x[19], x[20]); - btf_16_subs_adds_sse2(x[31], x[24]); - btf_16_subs_adds_sse2(x[30], x[25]); - btf_16_subs_adds_sse2(x[29], x[26]); - btf_16_subs_adds_sse2(x[28], x[27]); -} - -static INLINE void idct32_stage8_sse2(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - btf_16_adds_subs_sse2(x[0], x[15]); - btf_16_adds_subs_sse2(x[1], x[14]); - btf_16_adds_subs_sse2(x[2], x[13]); - btf_16_adds_subs_sse2(x[3], x[12]); - btf_16_adds_subs_sse2(x[4], x[11]); - btf_16_adds_subs_sse2(x[5], x[10]); - btf_16_adds_subs_sse2(x[6], x[9]); - btf_16_adds_subs_sse2(x[7], x[8]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[20], x[27], x[20], x[27]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[21], x[26], x[21], x[26]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[22], x[25], x[22], x[25]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[23], x[24], x[23], x[24]); -} - -static INLINE void idct32_stage9_sse2(__m128i *output, __m128i *x) { - btf_16_adds_subs_out_sse2(output[0], output[31], x[0], x[31]); - btf_16_adds_subs_out_sse2(output[1], output[30], x[1], x[30]); - btf_16_adds_subs_out_sse2(output[2], output[29], x[2], x[29]); - btf_16_adds_subs_out_sse2(output[3], output[28], x[3], x[28]); - btf_16_adds_subs_out_sse2(output[4], output[27], x[4], x[27]); - btf_16_adds_subs_out_sse2(output[5], output[26], x[5], x[26]); - btf_16_adds_subs_out_sse2(output[6], output[25], x[6], x[25]); - btf_16_adds_subs_out_sse2(output[7], output[24], x[7], x[24]); - btf_16_adds_subs_out_sse2(output[8], output[23], x[8], x[23]); - btf_16_adds_subs_out_sse2(output[9], output[22], x[9], x[22]); - btf_16_adds_subs_out_sse2(output[10], output[21], x[10], x[21]); - btf_16_adds_subs_out_sse2(output[11], output[20], x[11], x[20]); - btf_16_adds_subs_out_sse2(output[12], output[19], x[12], x[19]); - btf_16_adds_subs_out_sse2(output[13], output[18], x[13], x[18]); - btf_16_adds_subs_out_sse2(output[14], output[17], x[14], x[17]); - btf_16_adds_subs_out_sse2(output[15], output[16], x[15], x[16]); -} - -static void idct32_low1_new_ssse3(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - - // stage 1 - __m128i x[2]; - x[0] = input[0]; - - // stage 2 - // stage 3 - // stage 4 - // stage 5 - btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]); - - // stage 6 - // stage 7 - // stage 8 - // stage 9 - output[0] = x[0]; - output[31] = x[0]; - output[1] = x[1]; - output[30] = x[1]; - output[2] = x[1]; - output[29] = x[1]; - output[3] = x[0]; - output[28] = x[0]; - output[4] = x[0]; - output[27] = x[0]; - output[5] = x[1]; - output[26] = x[1]; - output[6] = x[1]; - output[25] = x[1]; - output[7] = x[0]; - output[24] = x[0]; - output[8] = x[0]; - output[23] = x[0]; - output[9] = x[1]; - output[22] = x[1]; - output[10] = x[1]; - output[21] = x[1]; - output[11] = x[0]; - output[20] = x[0]; - output[12] = x[0]; - output[19] = x[0]; - output[13] = x[1]; - output[18] = x[1]; - output[14] = x[1]; - output[17] = x[1]; - output[15] = x[0]; - output[16] = x[0]; -} - -static void idct32_low8_new_ssse3(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - - // stage 1 - __m128i x[32]; - x[0] = input[0]; - x[4] = input[4]; - x[8] = input[2]; - x[12] = input[6]; - x[16] = input[1]; - x[20] = input[5]; - x[24] = input[3]; - x[28] = input[7]; - - // stage 2 - btf_16_ssse3(cospi[62], cospi[2], x[16], x[16], x[31]); - btf_16_ssse3(-cospi[50], cospi[14], x[28], x[19], x[28]); - btf_16_ssse3(cospi[54], cospi[10], x[20], x[20], x[27]); - btf_16_ssse3(-cospi[58], cospi[6], x[24], x[23], x[24]); - - // stage 3 - btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]); - btf_16_ssse3(-cospi[52], cospi[12], x[12], x[11], x[12]); - x[17] = x[16]; - x[18] = x[19]; - x[21] = x[20]; - x[22] = x[23]; - x[25] = x[24]; - x[26] = x[27]; - x[29] = x[28]; - x[30] = x[31]; - - // stage 4 - btf_16_ssse3(cospi[56], cospi[8], x[4], x[4], x[7]); - x[9] = x[8]; - x[10] = x[11]; - x[13] = x[12]; - x[14] = x[15]; - idct32_high16_stage4_sse2(x, cospi, __rounding, cos_bit); - - // stage 5 - btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]); - x[5] = x[4]; - x[6] = x[7]; - idct32_high24_stage5_sse2(x, cospi, __rounding, cos_bit); - // stage 6 - x[3] = x[0]; - x[2] = x[1]; - idct32_high28_stage6_sse2(x, cospi, __rounding, cos_bit); - - idct32_stage7_sse2(x, cospi, __rounding, cos_bit); - idct32_stage8_sse2(x, cospi, __rounding, cos_bit); - idct32_stage9_sse2(output, x); -} - -static void idct32_low16_new_ssse3(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - - // stage 1 - __m128i x[32]; - x[0] = input[0]; - x[2] = input[8]; - x[4] = input[4]; - x[6] = input[12]; - x[8] = input[2]; - x[10] = input[10]; - x[12] = input[6]; - x[14] = input[14]; - x[16] = input[1]; - x[18] = input[9]; - x[20] = input[5]; - x[22] = input[13]; - x[24] = input[3]; - x[26] = input[11]; - x[28] = input[7]; - x[30] = input[15]; - - // stage 2 - btf_16_ssse3(cospi[62], cospi[2], x[16], x[16], x[31]); - btf_16_ssse3(-cospi[34], cospi[30], x[30], x[17], x[30]); - btf_16_ssse3(cospi[46], cospi[18], x[18], x[18], x[29]); - btf_16_ssse3(-cospi[50], cospi[14], x[28], x[19], x[28]); - btf_16_ssse3(cospi[54], cospi[10], x[20], x[20], x[27]); - btf_16_ssse3(-cospi[42], cospi[22], x[26], x[21], x[26]); - btf_16_ssse3(cospi[38], cospi[26], x[22], x[22], x[25]); - btf_16_ssse3(-cospi[58], cospi[6], x[24], x[23], x[24]); - - // stage 3 - btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]); - btf_16_ssse3(-cospi[36], cospi[28], x[14], x[9], x[14]); - btf_16_ssse3(cospi[44], cospi[20], x[10], x[10], x[13]); - btf_16_ssse3(-cospi[52], cospi[12], x[12], x[11], x[12]); - idct32_high16_stage3_sse2(x); - - // stage 4 - btf_16_ssse3(cospi[56], cospi[8], x[4], x[4], x[7]); - btf_16_ssse3(-cospi[40], cospi[24], x[6], x[5], x[6]); - btf_16_adds_subs_sse2(x[8], x[9]); - btf_16_subs_adds_sse2(x[11], x[10]); - btf_16_adds_subs_sse2(x[12], x[13]); - btf_16_subs_adds_sse2(x[15], x[14]); - idct32_high16_stage4_sse2(x, cospi, __rounding, cos_bit); - - // stage 5 - btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]); - btf_16_ssse3(cospi[48], cospi[16], x[2], x[2], x[3]); - btf_16_adds_subs_sse2(x[4], x[5]); - btf_16_subs_adds_sse2(x[7], x[6]); - idct32_high24_stage5_sse2(x, cospi, __rounding, cos_bit); - - btf_16_adds_subs_sse2(x[0], x[3]); - btf_16_adds_subs_sse2(x[1], x[2]); - idct32_high28_stage6_sse2(x, cospi, __rounding, cos_bit); - - idct32_stage7_sse2(x, cospi, __rounding, cos_bit); - idct32_stage8_sse2(x, cospi, __rounding, cos_bit); - idct32_stage9_sse2(output, x); -} - -static void idct32_new_sse2(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m128i cospi_p62_m02 = pair_set_epi16(cospi[62], -cospi[2]); - const __m128i cospi_p02_p62 = pair_set_epi16(cospi[2], cospi[62]); - const __m128i cospi_p30_m34 = pair_set_epi16(cospi[30], -cospi[34]); - const __m128i cospi_p34_p30 = pair_set_epi16(cospi[34], cospi[30]); - const __m128i cospi_p46_m18 = pair_set_epi16(cospi[46], -cospi[18]); - const __m128i cospi_p18_p46 = pair_set_epi16(cospi[18], cospi[46]); - const __m128i cospi_p14_m50 = pair_set_epi16(cospi[14], -cospi[50]); - const __m128i cospi_p50_p14 = pair_set_epi16(cospi[50], cospi[14]); - const __m128i cospi_p54_m10 = pair_set_epi16(cospi[54], -cospi[10]); - const __m128i cospi_p10_p54 = pair_set_epi16(cospi[10], cospi[54]); - const __m128i cospi_p22_m42 = pair_set_epi16(cospi[22], -cospi[42]); - const __m128i cospi_p42_p22 = pair_set_epi16(cospi[42], cospi[22]); - const __m128i cospi_p38_m26 = pair_set_epi16(cospi[38], -cospi[26]); - const __m128i cospi_p26_p38 = pair_set_epi16(cospi[26], cospi[38]); - const __m128i cospi_p06_m58 = pair_set_epi16(cospi[6], -cospi[58]); - const __m128i cospi_p58_p06 = pair_set_epi16(cospi[58], cospi[6]); - const __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]); - const __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]); - const __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]); - const __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]); - const __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]); - const __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]); - const __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]); - const __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]); - const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]); - const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]); - const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]); - const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]); - const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]); - - // stage 1 - __m128i x[32]; - x[0] = input[0]; - x[1] = input[16]; - x[2] = input[8]; - x[3] = input[24]; - x[4] = input[4]; - x[5] = input[20]; - x[6] = input[12]; - x[7] = input[28]; - x[8] = input[2]; - x[9] = input[18]; - x[10] = input[10]; - x[11] = input[26]; - x[12] = input[6]; - x[13] = input[22]; - x[14] = input[14]; - x[15] = input[30]; - x[16] = input[1]; - x[17] = input[17]; - x[18] = input[9]; - x[19] = input[25]; - x[20] = input[5]; - x[21] = input[21]; - x[22] = input[13]; - x[23] = input[29]; - x[24] = input[3]; - x[25] = input[19]; - x[26] = input[11]; - x[27] = input[27]; - x[28] = input[7]; - x[29] = input[23]; - x[30] = input[15]; - x[31] = input[31]; - - // stage 2 - btf_16_sse2(cospi_p62_m02, cospi_p02_p62, x[16], x[31], x[16], x[31]); - btf_16_sse2(cospi_p30_m34, cospi_p34_p30, x[17], x[30], x[17], x[30]); - btf_16_sse2(cospi_p46_m18, cospi_p18_p46, x[18], x[29], x[18], x[29]); - btf_16_sse2(cospi_p14_m50, cospi_p50_p14, x[19], x[28], x[19], x[28]); - btf_16_sse2(cospi_p54_m10, cospi_p10_p54, x[20], x[27], x[20], x[27]); - btf_16_sse2(cospi_p22_m42, cospi_p42_p22, x[21], x[26], x[21], x[26]); - btf_16_sse2(cospi_p38_m26, cospi_p26_p38, x[22], x[25], x[22], x[25]); - btf_16_sse2(cospi_p06_m58, cospi_p58_p06, x[23], x[24], x[23], x[24]); - - // stage 3 - btf_16_sse2(cospi_p60_m04, cospi_p04_p60, x[8], x[15], x[8], x[15]); - btf_16_sse2(cospi_p28_m36, cospi_p36_p28, x[9], x[14], x[9], x[14]); - btf_16_sse2(cospi_p44_m20, cospi_p20_p44, x[10], x[13], x[10], x[13]); - btf_16_sse2(cospi_p12_m52, cospi_p52_p12, x[11], x[12], x[11], x[12]); - idct32_high16_stage3_sse2(x); - - // stage 4 - btf_16_sse2(cospi_p56_m08, cospi_p08_p56, x[4], x[7], x[4], x[7]); - btf_16_sse2(cospi_p24_m40, cospi_p40_p24, x[5], x[6], x[5], x[6]); - btf_16_adds_subs_sse2(x[8], x[9]); - btf_16_subs_adds_sse2(x[11], x[10]); - btf_16_adds_subs_sse2(x[12], x[13]); - btf_16_subs_adds_sse2(x[15], x[14]); - idct32_high16_stage4_sse2(x, cospi, __rounding, cos_bit); - - // stage 5 - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]); - btf_16_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]); - btf_16_adds_subs_sse2(x[4], x[5]); - btf_16_adds_subs_sse2(x[7], x[6]); - idct32_high24_stage5_sse2(x, cospi, __rounding, cos_bit); - - // stage 6 - btf_16_adds_subs_sse2(x[0], x[3]); - btf_16_adds_subs_sse2(x[1], x[2]); - idct32_high28_stage6_sse2(x, cospi, __rounding, cos_bit); - - // stage 7~8 - idct32_stage7_sse2(x, cospi, __rounding, cos_bit); - idct32_stage8_sse2(x, cospi, __rounding, cos_bit); - idct32_stage9_sse2(output, x); -} - -static INLINE void idct64_stage4_high32_sse2(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - const __m128i cospi_m04_p60 = pair_set_epi16(-cospi[4], cospi[60]); - const __m128i cospi_p60_p04 = pair_set_epi16(cospi[60], cospi[4]); - const __m128i cospi_m60_m04 = pair_set_epi16(-cospi[60], -cospi[4]); - const __m128i cospi_m36_p28 = pair_set_epi16(-cospi[36], cospi[28]); - const __m128i cospi_p28_p36 = pair_set_epi16(cospi[28], cospi[36]); - const __m128i cospi_m28_m36 = pair_set_epi16(-cospi[28], -cospi[36]); - const __m128i cospi_m20_p44 = pair_set_epi16(-cospi[20], cospi[44]); - const __m128i cospi_p44_p20 = pair_set_epi16(cospi[44], cospi[20]); - const __m128i cospi_m44_m20 = pair_set_epi16(-cospi[44], -cospi[20]); - const __m128i cospi_m52_p12 = pair_set_epi16(-cospi[52], cospi[12]); - const __m128i cospi_p12_p52 = pair_set_epi16(cospi[12], cospi[52]); - const __m128i cospi_m12_m52 = pair_set_epi16(-cospi[12], -cospi[52]); - btf_16_sse2(cospi_m04_p60, cospi_p60_p04, x[33], x[62], x[33], x[62]); - btf_16_sse2(cospi_m60_m04, cospi_m04_p60, x[34], x[61], x[34], x[61]); - btf_16_sse2(cospi_m36_p28, cospi_p28_p36, x[37], x[58], x[37], x[58]); - btf_16_sse2(cospi_m28_m36, cospi_m36_p28, x[38], x[57], x[38], x[57]); - btf_16_sse2(cospi_m20_p44, cospi_p44_p20, x[41], x[54], x[41], x[54]); - btf_16_sse2(cospi_m44_m20, cospi_m20_p44, x[42], x[53], x[42], x[53]); - btf_16_sse2(cospi_m52_p12, cospi_p12_p52, x[45], x[50], x[45], x[50]); - btf_16_sse2(cospi_m12_m52, cospi_m52_p12, x[46], x[49], x[46], x[49]); -} - -static INLINE void idct64_stage5_high48_sse2(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - const __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]); - const __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]); - const __m128i cospi_m56_m08 = pair_set_epi16(-cospi[56], -cospi[8]); - const __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]); - const __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]); - const __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]); - btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x[17], x[30], x[17], x[30]); - btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x[18], x[29], x[18], x[29]); - btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x[21], x[26], x[21], x[26]); - btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x[22], x[25], x[22], x[25]); - btf_16_adds_subs_sse2(x[32], x[35]); - btf_16_adds_subs_sse2(x[33], x[34]); - btf_16_subs_adds_sse2(x[39], x[36]); - btf_16_subs_adds_sse2(x[38], x[37]); - btf_16_adds_subs_sse2(x[40], x[43]); - btf_16_adds_subs_sse2(x[41], x[42]); - btf_16_subs_adds_sse2(x[47], x[44]); - btf_16_subs_adds_sse2(x[46], x[45]); - btf_16_adds_subs_sse2(x[48], x[51]); - btf_16_adds_subs_sse2(x[49], x[50]); - btf_16_subs_adds_sse2(x[55], x[52]); - btf_16_subs_adds_sse2(x[54], x[53]); - btf_16_adds_subs_sse2(x[56], x[59]); - btf_16_adds_subs_sse2(x[57], x[58]); - btf_16_subs_adds_sse2(x[63], x[60]); - btf_16_subs_adds_sse2(x[62], x[61]); -} - -static INLINE void idct64_stage6_high32_sse2(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - const __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]); - const __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]); - const __m128i cospi_m56_m08 = pair_set_epi16(-cospi[56], -cospi[8]); - const __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]); - const __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]); - const __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]); - btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x[34], x[61], x[34], x[61]); - btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x[35], x[60], x[35], x[60]); - btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x[36], x[59], x[36], x[59]); - btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x[37], x[58], x[37], x[58]); - btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x[42], x[53], x[42], x[53]); - btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x[43], x[52], x[43], x[52]); - btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x[44], x[51], x[44], x[51]); - btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x[45], x[50], x[45], x[50]); -} - -static INLINE void idct64_stage6_high48_sse2(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - btf_16_adds_subs_sse2(x[16], x[19]); - btf_16_adds_subs_sse2(x[17], x[18]); - btf_16_subs_adds_sse2(x[23], x[20]); - btf_16_subs_adds_sse2(x[22], x[21]); - btf_16_adds_subs_sse2(x[24], x[27]); - btf_16_adds_subs_sse2(x[25], x[26]); - btf_16_subs_adds_sse2(x[31], x[28]); - btf_16_subs_adds_sse2(x[30], x[29]); - idct64_stage6_high32_sse2(x, cospi, __rounding, cos_bit); -} - -static INLINE void idct64_stage7_high48_sse2(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]); - const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]); - const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[18], x[29], x[18], x[29]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[19], x[28], x[19], x[28]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[20], x[27], x[20], x[27]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[21], x[26], x[21], x[26]); - btf_16_adds_subs_sse2(x[32], x[39]); - btf_16_adds_subs_sse2(x[33], x[38]); - btf_16_adds_subs_sse2(x[34], x[37]); - btf_16_adds_subs_sse2(x[35], x[36]); - btf_16_subs_adds_sse2(x[47], x[40]); - btf_16_subs_adds_sse2(x[46], x[41]); - btf_16_subs_adds_sse2(x[45], x[42]); - btf_16_subs_adds_sse2(x[44], x[43]); - btf_16_adds_subs_sse2(x[48], x[55]); - btf_16_adds_subs_sse2(x[49], x[54]); - btf_16_adds_subs_sse2(x[50], x[53]); - btf_16_adds_subs_sse2(x[51], x[52]); - btf_16_subs_adds_sse2(x[63], x[56]); - btf_16_subs_adds_sse2(x[62], x[57]); - btf_16_subs_adds_sse2(x[61], x[58]); - btf_16_subs_adds_sse2(x[60], x[59]); -} - -static INLINE void idct64_stage8_high48_sse2(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]); - const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]); - const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]); - btf_16_adds_subs_sse2(x[16], x[23]); - btf_16_adds_subs_sse2(x[17], x[22]); - btf_16_adds_subs_sse2(x[18], x[21]); - btf_16_adds_subs_sse2(x[19], x[20]); - btf_16_subs_adds_sse2(x[31], x[24]); - btf_16_subs_adds_sse2(x[30], x[25]); - btf_16_subs_adds_sse2(x[29], x[26]); - btf_16_subs_adds_sse2(x[28], x[27]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[36], x[59], x[36], x[59]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[37], x[58], x[37], x[58]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[38], x[57], x[38], x[57]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[39], x[56], x[39], x[56]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[40], x[55], x[40], x[55]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[41], x[54], x[41], x[54]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[42], x[53], x[42], x[53]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[43], x[52], x[43], x[52]); -} - -static INLINE void idct64_stage9_sse2(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - btf_16_adds_subs_sse2(x[0], x[15]); - btf_16_adds_subs_sse2(x[1], x[14]); - btf_16_adds_subs_sse2(x[2], x[13]); - btf_16_adds_subs_sse2(x[3], x[12]); - btf_16_adds_subs_sse2(x[4], x[11]); - btf_16_adds_subs_sse2(x[5], x[10]); - btf_16_adds_subs_sse2(x[6], x[9]); - btf_16_adds_subs_sse2(x[7], x[8]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[20], x[27], x[20], x[27]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[21], x[26], x[21], x[26]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[22], x[25], x[22], x[25]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[23], x[24], x[23], x[24]); - btf_16_adds_subs_sse2(x[32], x[47]); - btf_16_adds_subs_sse2(x[33], x[46]); - btf_16_adds_subs_sse2(x[34], x[45]); - btf_16_adds_subs_sse2(x[35], x[44]); - btf_16_adds_subs_sse2(x[36], x[43]); - btf_16_adds_subs_sse2(x[37], x[42]); - btf_16_adds_subs_sse2(x[38], x[41]); - btf_16_adds_subs_sse2(x[39], x[40]); - btf_16_subs_adds_sse2(x[63], x[48]); - btf_16_subs_adds_sse2(x[62], x[49]); - btf_16_subs_adds_sse2(x[61], x[50]); - btf_16_subs_adds_sse2(x[60], x[51]); - btf_16_subs_adds_sse2(x[59], x[52]); - btf_16_subs_adds_sse2(x[58], x[53]); - btf_16_subs_adds_sse2(x[57], x[54]); - btf_16_subs_adds_sse2(x[56], x[55]); -} - -static INLINE void idct64_stage10_sse2(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - btf_16_adds_subs_sse2(x[0], x[31]); - btf_16_adds_subs_sse2(x[1], x[30]); - btf_16_adds_subs_sse2(x[2], x[29]); - btf_16_adds_subs_sse2(x[3], x[28]); - btf_16_adds_subs_sse2(x[4], x[27]); - btf_16_adds_subs_sse2(x[5], x[26]); - btf_16_adds_subs_sse2(x[6], x[25]); - btf_16_adds_subs_sse2(x[7], x[24]); - btf_16_adds_subs_sse2(x[8], x[23]); - btf_16_adds_subs_sse2(x[9], x[22]); - btf_16_adds_subs_sse2(x[10], x[21]); - btf_16_adds_subs_sse2(x[11], x[20]); - btf_16_adds_subs_sse2(x[12], x[19]); - btf_16_adds_subs_sse2(x[13], x[18]); - btf_16_adds_subs_sse2(x[14], x[17]); - btf_16_adds_subs_sse2(x[15], x[16]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[40], x[55], x[40], x[55]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[41], x[54], x[41], x[54]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[42], x[53], x[42], x[53]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[43], x[52], x[43], x[52]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[44], x[51], x[44], x[51]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[45], x[50], x[45], x[50]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[46], x[49], x[46], x[49]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[47], x[48], x[47], x[48]); -} - -static INLINE void idct64_stage11_sse2(__m128i *output, __m128i *x) { - btf_16_adds_subs_out_sse2(output[0], output[63], x[0], x[63]); - btf_16_adds_subs_out_sse2(output[1], output[62], x[1], x[62]); - btf_16_adds_subs_out_sse2(output[2], output[61], x[2], x[61]); - btf_16_adds_subs_out_sse2(output[3], output[60], x[3], x[60]); - btf_16_adds_subs_out_sse2(output[4], output[59], x[4], x[59]); - btf_16_adds_subs_out_sse2(output[5], output[58], x[5], x[58]); - btf_16_adds_subs_out_sse2(output[6], output[57], x[6], x[57]); - btf_16_adds_subs_out_sse2(output[7], output[56], x[7], x[56]); - btf_16_adds_subs_out_sse2(output[8], output[55], x[8], x[55]); - btf_16_adds_subs_out_sse2(output[9], output[54], x[9], x[54]); - btf_16_adds_subs_out_sse2(output[10], output[53], x[10], x[53]); - btf_16_adds_subs_out_sse2(output[11], output[52], x[11], x[52]); - btf_16_adds_subs_out_sse2(output[12], output[51], x[12], x[51]); - btf_16_adds_subs_out_sse2(output[13], output[50], x[13], x[50]); - btf_16_adds_subs_out_sse2(output[14], output[49], x[14], x[49]); - btf_16_adds_subs_out_sse2(output[15], output[48], x[15], x[48]); - btf_16_adds_subs_out_sse2(output[16], output[47], x[16], x[47]); - btf_16_adds_subs_out_sse2(output[17], output[46], x[17], x[46]); - btf_16_adds_subs_out_sse2(output[18], output[45], x[18], x[45]); - btf_16_adds_subs_out_sse2(output[19], output[44], x[19], x[44]); - btf_16_adds_subs_out_sse2(output[20], output[43], x[20], x[43]); - btf_16_adds_subs_out_sse2(output[21], output[42], x[21], x[42]); - btf_16_adds_subs_out_sse2(output[22], output[41], x[22], x[41]); - btf_16_adds_subs_out_sse2(output[23], output[40], x[23], x[40]); - btf_16_adds_subs_out_sse2(output[24], output[39], x[24], x[39]); - btf_16_adds_subs_out_sse2(output[25], output[38], x[25], x[38]); - btf_16_adds_subs_out_sse2(output[26], output[37], x[26], x[37]); - btf_16_adds_subs_out_sse2(output[27], output[36], x[27], x[36]); - btf_16_adds_subs_out_sse2(output[28], output[35], x[28], x[35]); - btf_16_adds_subs_out_sse2(output[29], output[34], x[29], x[34]); - btf_16_adds_subs_out_sse2(output[30], output[33], x[30], x[33]); - btf_16_adds_subs_out_sse2(output[31], output[32], x[31], x[32]); -} - -static void idct64_low1_new_ssse3(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - - // stage 1 - __m128i x[32]; - x[0] = input[0]; - - // stage 2 - // stage 3 - // stage 4 - // stage 5 - // stage 6 - btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]); - - // stage 7 - // stage 8 - // stage 9 - // stage 10 - // stage 11 - output[0] = x[0]; - output[63] = x[0]; - output[1] = x[1]; - output[62] = x[1]; - output[2] = x[1]; - output[61] = x[1]; - output[3] = x[0]; - output[60] = x[0]; - output[4] = x[0]; - output[59] = x[0]; - output[5] = x[1]; - output[58] = x[1]; - output[6] = x[1]; - output[57] = x[1]; - output[7] = x[0]; - output[56] = x[0]; - output[8] = x[0]; - output[55] = x[0]; - output[9] = x[1]; - output[54] = x[1]; - output[10] = x[1]; - output[53] = x[1]; - output[11] = x[0]; - output[52] = x[0]; - output[12] = x[0]; - output[51] = x[0]; - output[13] = x[1]; - output[50] = x[1]; - output[14] = x[1]; - output[49] = x[1]; - output[15] = x[0]; - output[48] = x[0]; - output[16] = x[0]; - output[47] = x[0]; - output[17] = x[1]; - output[46] = x[1]; - output[18] = x[1]; - output[45] = x[1]; - output[19] = x[0]; - output[44] = x[0]; - output[20] = x[0]; - output[43] = x[0]; - output[21] = x[1]; - output[42] = x[1]; - output[22] = x[1]; - output[41] = x[1]; - output[23] = x[0]; - output[40] = x[0]; - output[24] = x[0]; - output[39] = x[0]; - output[25] = x[1]; - output[38] = x[1]; - output[26] = x[1]; - output[37] = x[1]; - output[27] = x[0]; - output[36] = x[0]; - output[28] = x[0]; - output[35] = x[0]; - output[29] = x[1]; - output[34] = x[1]; - output[30] = x[1]; - output[33] = x[1]; - output[31] = x[0]; - output[32] = x[0]; -} - -static void idct64_low8_new_ssse3(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - const __m128i cospi_m04_p60 = pair_set_epi16(-cospi[4], cospi[60]); - const __m128i cospi_p60_p04 = pair_set_epi16(cospi[60], cospi[4]); - const __m128i cospi_m36_p28 = pair_set_epi16(-cospi[36], cospi[28]); - const __m128i cospi_m28_m36 = pair_set_epi16(-cospi[28], -cospi[36]); - const __m128i cospi_m20_p44 = pair_set_epi16(-cospi[20], cospi[44]); - const __m128i cospi_p44_p20 = pair_set_epi16(cospi[44], cospi[20]); - const __m128i cospi_m52_p12 = pair_set_epi16(-cospi[52], cospi[12]); - const __m128i cospi_m12_m52 = pair_set_epi16(-cospi[12], -cospi[52]); - const __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]); - const __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]); - const __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]); - const __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]); - const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]); - const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - - // stage 1 - __m128i x[64]; - x[0] = input[0]; - x[8] = input[4]; - x[16] = input[2]; - x[24] = input[6]; - x[32] = input[1]; - x[40] = input[5]; - x[48] = input[3]; - x[56] = input[7]; - - // stage 2 - btf_16_ssse3(cospi[63], cospi[1], x[32], x[32], x[63]); - btf_16_ssse3(-cospi[57], cospi[7], x[56], x[39], x[56]); - btf_16_ssse3(cospi[59], cospi[5], x[40], x[40], x[55]); - btf_16_ssse3(-cospi[61], cospi[3], x[48], x[47], x[48]); - - // stage 3 - btf_16_ssse3(cospi[62], cospi[2], x[16], x[16], x[31]); - btf_16_ssse3(-cospi[58], cospi[6], x[24], x[23], x[24]); - x[33] = x[32]; - x[38] = x[39]; - x[41] = x[40]; - x[46] = x[47]; - x[49] = x[48]; - x[54] = x[55]; - x[57] = x[56]; - x[62] = x[63]; - - // stage 4 - btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]); - x[17] = x[16]; - x[22] = x[23]; - x[25] = x[24]; - x[30] = x[31]; - btf_16_sse2(cospi_m04_p60, cospi_p60_p04, x[33], x[62], x[33], x[62]); - btf_16_sse2(cospi_m28_m36, cospi_m36_p28, x[38], x[57], x[38], x[57]); - btf_16_sse2(cospi_m20_p44, cospi_p44_p20, x[41], x[54], x[41], x[54]); - btf_16_sse2(cospi_m12_m52, cospi_m52_p12, x[46], x[49], x[46], x[49]); - - // stage 5 - x[9] = x[8]; - x[14] = x[15]; - btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x[17], x[30], x[17], x[30]); - btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x[22], x[25], x[22], x[25]); - x[35] = x[32]; - x[34] = x[33]; - x[36] = x[39]; - x[37] = x[38]; - x[43] = x[40]; - x[42] = x[41]; - x[44] = x[47]; - x[45] = x[46]; - x[51] = x[48]; - x[50] = x[49]; - x[52] = x[55]; - x[53] = x[54]; - x[59] = x[56]; - x[58] = x[57]; - x[60] = x[63]; - x[61] = x[62]; - - // stage 6 - btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]); - x[19] = x[16]; - x[18] = x[17]; - x[20] = x[23]; - x[21] = x[22]; - x[27] = x[24]; - x[26] = x[25]; - x[28] = x[31]; - x[29] = x[30]; - idct64_stage6_high32_sse2(x, cospi, __rounding, cos_bit); - - // stage 7 - x[3] = x[0]; - x[2] = x[1]; - x[11] = x[8]; - x[10] = x[9]; - x[12] = x[15]; - x[13] = x[14]; - idct64_stage7_high48_sse2(x, cospi, __rounding, cos_bit); - - // stage 8 - x[7] = x[0]; - x[6] = x[1]; - x[5] = x[2]; - x[4] = x[3]; - x[9] = x[9]; - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]); - idct64_stage8_high48_sse2(x, cospi, __rounding, cos_bit); - - idct64_stage9_sse2(x, cospi, __rounding, cos_bit); - idct64_stage10_sse2(x, cospi, __rounding, cos_bit); - idct64_stage11_sse2(output, x); -} - -static void idct64_low16_new_ssse3(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]); - const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]); - const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]); - const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - - // stage 1 - __m128i x[64]; - x[0] = input[0]; - x[4] = input[8]; - x[8] = input[4]; - x[12] = input[12]; - x[16] = input[2]; - x[20] = input[10]; - x[24] = input[6]; - x[28] = input[14]; - x[32] = input[1]; - x[36] = input[9]; - x[40] = input[5]; - x[44] = input[13]; - x[48] = input[3]; - x[52] = input[11]; - x[56] = input[7]; - x[60] = input[15]; - - // stage 2 - btf_16_ssse3(cospi[63], cospi[1], x[32], x[32], x[63]); - btf_16_ssse3(-cospi[49], cospi[15], x[60], x[35], x[60]); - btf_16_ssse3(cospi[55], cospi[9], x[36], x[36], x[59]); - btf_16_ssse3(-cospi[57], cospi[7], x[56], x[39], x[56]); - btf_16_ssse3(cospi[59], cospi[5], x[40], x[40], x[55]); - btf_16_ssse3(-cospi[53], cospi[11], x[52], x[43], x[52]); - btf_16_ssse3(cospi[51], cospi[13], x[44], x[44], x[51]); - btf_16_ssse3(-cospi[61], cospi[3], x[48], x[47], x[48]); - - // stage 3 - btf_16_ssse3(cospi[62], cospi[2], x[16], x[16], x[31]); - btf_16_ssse3(-cospi[50], cospi[14], x[28], x[19], x[28]); - btf_16_ssse3(cospi[54], cospi[10], x[20], x[20], x[27]); - btf_16_ssse3(-cospi[58], cospi[6], x[24], x[23], x[24]); - x[33] = x[32]; - x[34] = x[35]; - x[37] = x[36]; - x[38] = x[39]; - x[41] = x[40]; - x[42] = x[43]; - x[45] = x[44]; - x[46] = x[47]; - x[49] = x[48]; - x[50] = x[51]; - x[53] = x[52]; - x[54] = x[55]; - x[57] = x[56]; - x[58] = x[59]; - x[61] = x[60]; - x[62] = x[63]; - - // stage 4 - btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]); - btf_16_ssse3(-cospi[52], cospi[12], x[12], x[11], x[12]); - x[17] = x[16]; - x[18] = x[19]; - x[21] = x[20]; - x[22] = x[23]; - x[25] = x[24]; - x[26] = x[27]; - x[29] = x[28]; - x[30] = x[31]; - idct64_stage4_high32_sse2(x, cospi, __rounding, cos_bit); - - // stage 5 - btf_16_ssse3(cospi[56], cospi[8], x[4], x[4], x[7]); - x[9] = x[8]; - x[10] = x[11]; - x[13] = x[12]; - x[14] = x[15]; - idct64_stage5_high48_sse2(x, cospi, __rounding, cos_bit); - - // stage 6 - btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]); - x[5] = x[4]; - x[6] = x[7]; - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]); - idct64_stage6_high48_sse2(x, cospi, __rounding, cos_bit); - - // stage 7 - x[3] = x[0]; - x[2] = x[1]; - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]); - btf_16_adds_subs_sse2(x[8], x[11]); - btf_16_adds_subs_sse2(x[9], x[10]); - btf_16_subs_adds_sse2(x[15], x[12]); - btf_16_subs_adds_sse2(x[14], x[13]); - idct64_stage7_high48_sse2(x, cospi, __rounding, cos_bit); - - // stage 8 - btf_16_adds_subs_sse2(x[0], x[7]); - btf_16_adds_subs_sse2(x[1], x[6]); - btf_16_adds_subs_sse2(x[2], x[5]); - btf_16_adds_subs_sse2(x[3], x[4]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]); - idct64_stage8_high48_sse2(x, cospi, __rounding, cos_bit); - - idct64_stage9_sse2(x, cospi, __rounding, cos_bit); - idct64_stage10_sse2(x, cospi, __rounding, cos_bit); - idct64_stage11_sse2(output, x); -} - -static void idct64_low32_new_ssse3(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]); - const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]); - const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]); - const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - - // stage 1 - __m128i x[64]; - x[0] = input[0]; - x[2] = input[16]; - x[4] = input[8]; - x[6] = input[24]; - x[8] = input[4]; - x[10] = input[20]; - x[12] = input[12]; - x[14] = input[28]; - x[16] = input[2]; - x[18] = input[18]; - x[20] = input[10]; - x[22] = input[26]; - x[24] = input[6]; - x[26] = input[22]; - x[28] = input[14]; - x[30] = input[30]; - x[32] = input[1]; - x[34] = input[17]; - x[36] = input[9]; - x[38] = input[25]; - x[40] = input[5]; - x[42] = input[21]; - x[44] = input[13]; - x[46] = input[29]; - x[48] = input[3]; - x[50] = input[19]; - x[52] = input[11]; - x[54] = input[27]; - x[56] = input[7]; - x[58] = input[23]; - x[60] = input[15]; - x[62] = input[31]; - - // stage 2 - btf_16_ssse3(cospi[63], cospi[1], x[32], x[32], x[63]); - btf_16_ssse3(-cospi[33], cospi[31], x[62], x[33], x[62]); - btf_16_ssse3(cospi[47], cospi[17], x[34], x[34], x[61]); - btf_16_ssse3(-cospi[49], cospi[15], x[60], x[35], x[60]); - btf_16_ssse3(cospi[55], cospi[9], x[36], x[36], x[59]); - btf_16_ssse3(-cospi[41], cospi[23], x[58], x[37], x[58]); - btf_16_ssse3(cospi[39], cospi[25], x[38], x[38], x[57]); - btf_16_ssse3(-cospi[57], cospi[7], x[56], x[39], x[56]); - btf_16_ssse3(cospi[59], cospi[5], x[40], x[40], x[55]); - btf_16_ssse3(-cospi[37], cospi[27], x[54], x[41], x[54]); - btf_16_ssse3(cospi[43], cospi[21], x[42], x[42], x[53]); - btf_16_ssse3(-cospi[53], cospi[11], x[52], x[43], x[52]); - btf_16_ssse3(cospi[51], cospi[13], x[44], x[44], x[51]); - btf_16_ssse3(-cospi[45], cospi[19], x[50], x[45], x[50]); - btf_16_ssse3(cospi[35], cospi[29], x[46], x[46], x[49]); - btf_16_ssse3(-cospi[61], cospi[3], x[48], x[47], x[48]); - - // stage 3 - btf_16_ssse3(cospi[62], cospi[2], x[16], x[16], x[31]); - btf_16_ssse3(-cospi[34], cospi[30], x[30], x[17], x[30]); - btf_16_ssse3(cospi[46], cospi[18], x[18], x[18], x[29]); - btf_16_ssse3(-cospi[50], cospi[14], x[28], x[19], x[28]); - btf_16_ssse3(cospi[54], cospi[10], x[20], x[20], x[27]); - btf_16_ssse3(-cospi[42], cospi[22], x[26], x[21], x[26]); - btf_16_ssse3(cospi[38], cospi[26], x[22], x[22], x[25]); - btf_16_ssse3(-cospi[58], cospi[6], x[24], x[23], x[24]); - btf_16_adds_subs_sse2(x[32], x[33]); - btf_16_subs_adds_sse2(x[35], x[34]); - btf_16_adds_subs_sse2(x[36], x[37]); - btf_16_subs_adds_sse2(x[39], x[38]); - btf_16_adds_subs_sse2(x[40], x[41]); - btf_16_subs_adds_sse2(x[43], x[42]); - btf_16_adds_subs_sse2(x[44], x[45]); - btf_16_subs_adds_sse2(x[47], x[46]); - btf_16_adds_subs_sse2(x[48], x[49]); - btf_16_subs_adds_sse2(x[51], x[50]); - btf_16_adds_subs_sse2(x[52], x[53]); - btf_16_subs_adds_sse2(x[55], x[54]); - btf_16_adds_subs_sse2(x[56], x[57]); - btf_16_subs_adds_sse2(x[59], x[58]); - btf_16_adds_subs_sse2(x[60], x[61]); - btf_16_subs_adds_sse2(x[63], x[62]); - - // stage 4 - btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]); - btf_16_ssse3(-cospi[36], cospi[28], x[14], x[9], x[14]); - btf_16_ssse3(cospi[44], cospi[20], x[10], x[10], x[13]); - btf_16_ssse3(-cospi[52], cospi[12], x[12], x[11], x[12]); - btf_16_adds_subs_sse2(x[16], x[17]); - btf_16_subs_adds_sse2(x[19], x[18]); - btf_16_adds_subs_sse2(x[20], x[21]); - btf_16_subs_adds_sse2(x[23], x[22]); - btf_16_adds_subs_sse2(x[24], x[25]); - btf_16_subs_adds_sse2(x[27], x[26]); - btf_16_adds_subs_sse2(x[28], x[29]); - btf_16_subs_adds_sse2(x[31], x[30]); - idct64_stage4_high32_sse2(x, cospi, __rounding, cos_bit); - - // stage 5 - btf_16_ssse3(cospi[56], cospi[8], x[4], x[4], x[7]); - btf_16_ssse3(-cospi[40], cospi[24], x[6], x[5], x[6]); - btf_16_adds_subs_sse2(x[8], x[9]); - btf_16_subs_adds_sse2(x[11], x[10]); - btf_16_adds_subs_sse2(x[12], x[13]); - btf_16_subs_adds_sse2(x[15], x[14]); - idct64_stage5_high48_sse2(x, cospi, __rounding, cos_bit); - - // stage 6 - btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]); - btf_16_ssse3(cospi[48], cospi[16], x[2], x[2], x[3]); - btf_16_adds_subs_sse2(x[4], x[5]); - btf_16_subs_adds_sse2(x[7], x[6]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]); - idct64_stage6_high48_sse2(x, cospi, __rounding, cos_bit); - - // stage 7 - btf_16_adds_subs_sse2(x[0], x[3]); - btf_16_adds_subs_sse2(x[1], x[2]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]); - btf_16_adds_subs_sse2(x[8], x[11]); - btf_16_adds_subs_sse2(x[9], x[10]); - btf_16_subs_adds_sse2(x[15], x[12]); - btf_16_subs_adds_sse2(x[14], x[13]); - idct64_stage7_high48_sse2(x, cospi, __rounding, cos_bit); - - // stage 8 - btf_16_adds_subs_sse2(x[0], x[7]); - btf_16_adds_subs_sse2(x[1], x[6]); - btf_16_adds_subs_sse2(x[2], x[5]); - btf_16_adds_subs_sse2(x[3], x[4]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]); - idct64_stage8_high48_sse2(x, cospi, __rounding, cos_bit); - - // stage 9~11 - idct64_stage9_sse2(x, cospi, __rounding, cos_bit); - idct64_stage10_sse2(x, cospi, __rounding, cos_bit); - idct64_stage11_sse2(output, x); -} - -void iadst4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) { - (void)cos_bit; - const int32_t *sinpi = sinpi_arr(INV_COS_BIT); - const __m128i sinpi_p01_p04 = pair_set_epi16(sinpi[1], sinpi[4]); - const __m128i sinpi_p02_m01 = pair_set_epi16(sinpi[2], -sinpi[1]); - const __m128i sinpi_p03_p02 = pair_set_epi16(sinpi[3], sinpi[2]); - const __m128i sinpi_p03_m04 = pair_set_epi16(sinpi[3], -sinpi[4]); - const __m128i sinpi_p03_m03 = pair_set_epi16(sinpi[3], -sinpi[3]); - const __m128i sinpi_0_p03 = pair_set_epi16(0, sinpi[3]); - const __m128i sinpi_p04_p02 = pair_set_epi16(sinpi[4], sinpi[2]); - const __m128i sinpi_m03_m01 = pair_set_epi16(-sinpi[3], -sinpi[1]); - __m128i x0[4]; - x0[0] = input[0]; - x0[1] = input[1]; - x0[2] = input[2]; - x0[3] = input[3]; - - __m128i u[4]; - u[0] = _mm_unpacklo_epi16(x0[0], x0[2]); - u[1] = _mm_unpackhi_epi16(x0[0], x0[2]); - u[2] = _mm_unpacklo_epi16(x0[1], x0[3]); - u[3] = _mm_unpackhi_epi16(x0[1], x0[3]); - - __m128i x1[16]; - x1[0] = _mm_madd_epi16(u[0], sinpi_p01_p04); // x0*sin1 + x2*sin4 - x1[1] = _mm_madd_epi16(u[1], sinpi_p01_p04); - x1[2] = _mm_madd_epi16(u[0], sinpi_p02_m01); // x0*sin2 - x2*sin1 - x1[3] = _mm_madd_epi16(u[1], sinpi_p02_m01); - x1[4] = _mm_madd_epi16(u[2], sinpi_p03_p02); // x1*sin3 + x3*sin2 - x1[5] = _mm_madd_epi16(u[3], sinpi_p03_p02); - x1[6] = _mm_madd_epi16(u[2], sinpi_p03_m04); // x1*sin3 - x3*sin4 - x1[7] = _mm_madd_epi16(u[3], sinpi_p03_m04); - x1[8] = _mm_madd_epi16(u[0], sinpi_p03_m03); // x0*sin3 - x2*sin3 - x1[9] = _mm_madd_epi16(u[1], sinpi_p03_m03); - x1[10] = _mm_madd_epi16(u[2], sinpi_0_p03); // x2*sin3 - x1[11] = _mm_madd_epi16(u[3], sinpi_0_p03); - x1[12] = _mm_madd_epi16(u[0], sinpi_p04_p02); // x0*sin4 + x2*sin2 - x1[13] = _mm_madd_epi16(u[1], sinpi_p04_p02); - x1[14] = _mm_madd_epi16(u[2], sinpi_m03_m01); // -x1*sin3 - x3*sin1 - x1[15] = _mm_madd_epi16(u[3], sinpi_m03_m01); - - __m128i x2[8]; - x2[0] = _mm_add_epi32(x1[0], x1[4]); // x0*sin1 +x2*sin4 +x1*sin3 +x3*sin2 - x2[1] = _mm_add_epi32(x1[1], x1[5]); - x2[2] = _mm_add_epi32(x1[2], x1[6]); // x0*sin2 -x2*sin1 +x1*sin3 -x3*sin4 - x2[3] = _mm_add_epi32(x1[3], x1[7]); - x2[4] = _mm_add_epi32(x1[8], x1[10]); // x0*sin3 -x2*sin3 +x3*sin3 - x2[5] = _mm_add_epi32(x1[9], x1[11]); - x2[6] = _mm_add_epi32(x1[12], x1[14]); // x0*sin1 +x2*sin4 +x0*sin2 -x2*sin1 - x2[7] = _mm_add_epi32(x1[13], x1[15]); - - const __m128i rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - for (int i = 0; i < 4; ++i) { - __m128i out0 = _mm_add_epi32(x2[2 * i], rounding); - __m128i out1 = _mm_add_epi32(x2[2 * i + 1], rounding); - out0 = _mm_srai_epi32(out0, INV_COS_BIT); - out1 = _mm_srai_epi32(out1, INV_COS_BIT); - output[i] = _mm_packs_epi32(out0, out1); - } -} - -// TODO(binpengsmail@gmail.com): -// To explore the reuse of VP9 versions of corresponding SSE2 functions and -// evaluate whether there is a possibility for further speedup. -void iadst4_w4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) { - (void)cos_bit; - const int32_t *sinpi = sinpi_arr(INV_COS_BIT); - const __m128i sinpi_p01_p04 = pair_set_epi16(sinpi[1], sinpi[4]); - const __m128i sinpi_p02_m01 = pair_set_epi16(sinpi[2], -sinpi[1]); - const __m128i sinpi_p03_p02 = pair_set_epi16(sinpi[3], sinpi[2]); - const __m128i sinpi_p03_m04 = pair_set_epi16(sinpi[3], -sinpi[4]); - const __m128i sinpi_p03_m03 = pair_set_epi16(sinpi[3], -sinpi[3]); - const __m128i sinpi_0_p03 = pair_set_epi16(0, sinpi[3]); - const __m128i sinpi_p04_p02 = pair_set_epi16(sinpi[4], sinpi[2]); - const __m128i sinpi_m03_m01 = pair_set_epi16(-sinpi[3], -sinpi[1]); - __m128i x0[4]; - x0[0] = input[0]; - x0[1] = input[1]; - x0[2] = input[2]; - x0[3] = input[3]; - - __m128i u[2]; - u[0] = _mm_unpacklo_epi16(x0[0], x0[2]); - u[1] = _mm_unpacklo_epi16(x0[1], x0[3]); - - __m128i x1[8]; - x1[0] = _mm_madd_epi16(u[0], sinpi_p01_p04); // x0*sin1 + x2*sin4 - x1[1] = _mm_madd_epi16(u[0], sinpi_p02_m01); // x0*sin2 - x2*sin1 - x1[2] = _mm_madd_epi16(u[1], sinpi_p03_p02); // x1*sin3 + x3*sin2 - x1[3] = _mm_madd_epi16(u[1], sinpi_p03_m04); // x1*sin3 - x3*sin4 - x1[4] = _mm_madd_epi16(u[0], sinpi_p03_m03); // x0*sin3 - x2*sin3 - x1[5] = _mm_madd_epi16(u[1], sinpi_0_p03); // x2*sin3 - x1[6] = _mm_madd_epi16(u[0], sinpi_p04_p02); // x0*sin4 + x2*sin2 - x1[7] = _mm_madd_epi16(u[1], sinpi_m03_m01); // -x1*sin3 - x3*sin1 - - __m128i x2[4]; - x2[0] = _mm_add_epi32(x1[0], x1[2]); // x0*sin1 + x2*sin4 + x1*sin3 + x3*sin2 - x2[1] = _mm_add_epi32(x1[1], x1[3]); // x0*sin2 - x2*sin1 + x1*sin3 - x3*sin4 - x2[2] = _mm_add_epi32(x1[4], x1[5]); // x0*sin3 - x2*sin3 + x3*sin3 - x2[3] = _mm_add_epi32(x1[6], x1[7]); // x0*sin4 + x2*sin2 - x1*sin3 - x3*sin1 - - const __m128i rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - for (int i = 0; i < 4; ++i) { - __m128i out0 = _mm_add_epi32(x2[i], rounding); - out0 = _mm_srai_epi32(out0, INV_COS_BIT); - output[i] = _mm_packs_epi32(out0, out0); - } -} - -static void iadst8_low1_new_ssse3(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __zero = _mm_setzero_si128(); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]); - const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - - // stage 1 - __m128i x[8]; - x[1] = input[0]; - - // stage 2 - btf_16_ssse3(cospi[60], -cospi[4], x[1], x[0], x[1]); - - // stage 3 - x[4] = x[0]; - x[5] = x[1]; - - // stage 4 - btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]); - - // stage 5 - x[2] = x[0]; - x[3] = x[1]; - x[6] = x[4]; - x[7] = x[5]; - - // stage 6 - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[2], x[3], x[2], x[3]); - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[6], x[7], x[6], x[7]); - - // stage 7 - output[0] = x[0]; - output[1] = _mm_subs_epi16(__zero, x[4]); - output[2] = x[6]; - output[3] = _mm_subs_epi16(__zero, x[2]); - output[4] = x[3]; - output[5] = _mm_subs_epi16(__zero, x[7]); - output[6] = x[5]; - output[7] = _mm_subs_epi16(__zero, x[1]); -} - -void iadst8_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __zero = _mm_setzero_si128(); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]); - const __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]); - const __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]); - const __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]); - const __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]); - const __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]); - const __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]); - const __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]); - const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]); - const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]); - const __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - - // stage 1 - __m128i x[8]; - x[0] = input[7]; - x[1] = input[0]; - x[2] = input[5]; - x[3] = input[2]; - x[4] = input[3]; - x[5] = input[4]; - x[6] = input[1]; - x[7] = input[6]; - - // stage 2 - btf_16_sse2(cospi_p04_p60, cospi_p60_m04, x[0], x[1], x[0], x[1]); - btf_16_sse2(cospi_p20_p44, cospi_p44_m20, x[2], x[3], x[2], x[3]); - btf_16_sse2(cospi_p36_p28, cospi_p28_m36, x[4], x[5], x[4], x[5]); - btf_16_sse2(cospi_p52_p12, cospi_p12_m52, x[6], x[7], x[6], x[7]); - - // stage 3 - btf_16_adds_subs_sse2(x[0], x[4]); - btf_16_adds_subs_sse2(x[1], x[5]); - btf_16_adds_subs_sse2(x[2], x[6]); - btf_16_adds_subs_sse2(x[3], x[7]); - - // stage 4 - btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]); - btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x[6], x[7], x[6], x[7]); - - // stage 5 - btf_16_adds_subs_sse2(x[0], x[2]); - btf_16_adds_subs_sse2(x[1], x[3]); - btf_16_adds_subs_sse2(x[4], x[6]); - btf_16_adds_subs_sse2(x[5], x[7]); - - // stage 6 - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[2], x[3], x[2], x[3]); - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[6], x[7], x[6], x[7]); - - // stage 7 - output[0] = x[0]; - output[1] = _mm_subs_epi16(__zero, x[4]); - output[2] = x[6]; - output[3] = _mm_subs_epi16(__zero, x[2]); - output[4] = x[3]; - output[5] = _mm_subs_epi16(__zero, x[7]); - output[6] = x[5]; - output[7] = _mm_subs_epi16(__zero, x[1]); -} - -void iadst8_w4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __zero = _mm_setzero_si128(); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]); - const __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]); - const __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]); - const __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]); - const __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]); - const __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]); - const __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]); - const __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]); - const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]); - const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]); - const __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - - // stage 1 - __m128i x[8]; - x[0] = input[7]; - x[1] = input[0]; - x[2] = input[5]; - x[3] = input[2]; - x[4] = input[3]; - x[5] = input[4]; - x[6] = input[1]; - x[7] = input[6]; - - // stage 2 - btf_16_4p_sse2(cospi_p04_p60, cospi_p60_m04, x[0], x[1], x[0], x[1]); - btf_16_4p_sse2(cospi_p20_p44, cospi_p44_m20, x[2], x[3], x[2], x[3]); - btf_16_4p_sse2(cospi_p36_p28, cospi_p28_m36, x[4], x[5], x[4], x[5]); - btf_16_4p_sse2(cospi_p52_p12, cospi_p12_m52, x[6], x[7], x[6], x[7]); - - // stage 3 - btf_16_adds_subs_sse2(x[0], x[4]); - btf_16_adds_subs_sse2(x[1], x[5]); - btf_16_adds_subs_sse2(x[2], x[6]); - btf_16_adds_subs_sse2(x[3], x[7]); - - // stage 4 - btf_16_4p_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]); - btf_16_4p_sse2(cospi_m48_p16, cospi_p16_p48, x[6], x[7], x[6], x[7]); - - // stage 5 - btf_16_adds_subs_sse2(x[0], x[2]); - btf_16_adds_subs_sse2(x[1], x[3]); - btf_16_adds_subs_sse2(x[4], x[6]); - btf_16_adds_subs_sse2(x[5], x[7]); - - // stage 6 - btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[2], x[3], x[2], x[3]); - btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[6], x[7], x[6], x[7]); - - // stage 7 - output[0] = x[0]; - output[1] = _mm_subs_epi16(__zero, x[4]); - output[2] = x[6]; - output[3] = _mm_subs_epi16(__zero, x[2]); - output[4] = x[3]; - output[5] = _mm_subs_epi16(__zero, x[7]); - output[6] = x[5]; - output[7] = _mm_subs_epi16(__zero, x[1]); -} - -static INLINE void iadst16_stage3_ssse3(__m128i *x) { - btf_16_adds_subs_sse2(x[0], x[8]); - btf_16_adds_subs_sse2(x[1], x[9]); - btf_16_adds_subs_sse2(x[2], x[10]); - btf_16_adds_subs_sse2(x[3], x[11]); - btf_16_adds_subs_sse2(x[4], x[12]); - btf_16_adds_subs_sse2(x[5], x[13]); - btf_16_adds_subs_sse2(x[6], x[14]); - btf_16_adds_subs_sse2(x[7], x[15]); -} - -static INLINE void iadst16_stage4_ssse3(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]); - const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]); - const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]); - const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]); - const __m128i cospi_m56_p08 = pair_set_epi16(-cospi[56], cospi[8]); - const __m128i cospi_m24_p40 = pair_set_epi16(-cospi[24], cospi[40]); - btf_16_sse2(cospi_p08_p56, cospi_p56_m08, x[8], x[9], x[8], x[9]); - btf_16_sse2(cospi_p40_p24, cospi_p24_m40, x[10], x[11], x[10], x[11]); - btf_16_sse2(cospi_m56_p08, cospi_p08_p56, x[12], x[13], x[12], x[13]); - btf_16_sse2(cospi_m24_p40, cospi_p40_p24, x[14], x[15], x[14], x[15]); -} - -static INLINE void iadst16_stage5_ssse3(__m128i *x) { - btf_16_adds_subs_sse2(x[0], x[4]); - btf_16_adds_subs_sse2(x[1], x[5]); - btf_16_adds_subs_sse2(x[2], x[6]); - btf_16_adds_subs_sse2(x[3], x[7]); - btf_16_adds_subs_sse2(x[8], x[12]); - btf_16_adds_subs_sse2(x[9], x[13]); - btf_16_adds_subs_sse2(x[10], x[14]); - btf_16_adds_subs_sse2(x[11], x[15]); -} - -static INLINE void iadst16_stage6_ssse3(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]); - const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]); - const __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]); - btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]); - btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x[6], x[7], x[6], x[7]); - btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[12], x[13], x[12], x[13]); - btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x[14], x[15], x[14], x[15]); -} - -static INLINE void iadst16_stage7_ssse3(__m128i *x) { - btf_16_adds_subs_sse2(x[0], x[2]); - btf_16_adds_subs_sse2(x[1], x[3]); - btf_16_adds_subs_sse2(x[4], x[6]); - btf_16_adds_subs_sse2(x[5], x[7]); - btf_16_adds_subs_sse2(x[8], x[10]); - btf_16_adds_subs_sse2(x[9], x[11]); - btf_16_adds_subs_sse2(x[12], x[14]); - btf_16_adds_subs_sse2(x[13], x[15]); -} - -static INLINE void iadst16_stage8_ssse3(__m128i *x, const int32_t *cospi, - const __m128i __rounding, - int8_t cos_bit) { - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[2], x[3], x[2], x[3]); - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[6], x[7], x[6], x[7]); - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[10], x[11], x[10], x[11]); - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[14], x[15], x[14], x[15]); -} - -static INLINE void iadst16_stage9_ssse3(__m128i *output, __m128i *x) { - const __m128i __zero = _mm_setzero_si128(); - output[0] = x[0]; - output[1] = _mm_subs_epi16(__zero, x[8]); - output[2] = x[12]; - output[3] = _mm_subs_epi16(__zero, x[4]); - output[4] = x[6]; - output[5] = _mm_subs_epi16(__zero, x[14]); - output[6] = x[10]; - output[7] = _mm_subs_epi16(__zero, x[2]); - output[8] = x[3]; - output[9] = _mm_subs_epi16(__zero, x[11]); - output[10] = x[15]; - output[11] = _mm_subs_epi16(__zero, x[7]); - output[12] = x[5]; - output[13] = _mm_subs_epi16(__zero, x[13]); - output[14] = x[9]; - output[15] = _mm_subs_epi16(__zero, x[1]); -} - -static void iadst16_low1_new_ssse3(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]); - const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]); - const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]); - const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]); - - // stage 1 - __m128i x[16]; - x[1] = input[0]; - - // stage 2 - btf_16_ssse3(cospi[62], -cospi[2], x[1], x[0], x[1]); - - // stage 3 - x[8] = x[0]; - x[9] = x[1]; - - // stage 4 - btf_16_sse2(cospi_p08_p56, cospi_p56_m08, x[8], x[9], x[8], x[9]); - - // stage 5 - x[4] = x[0]; - x[5] = x[1]; - x[12] = x[8]; - x[13] = x[9]; - - // stage 6 - btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]); - btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[12], x[13], x[12], x[13]); - - // stage 7 - x[2] = x[0]; - x[3] = x[1]; - x[6] = x[4]; - x[7] = x[5]; - x[10] = x[8]; - x[11] = x[9]; - x[14] = x[12]; - x[15] = x[13]; - - iadst16_stage8_ssse3(x, cospi, __rounding, cos_bit); - iadst16_stage9_ssse3(output, x); -} - -static void iadst16_low8_new_ssse3(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - - // stage 1 - __m128i x[16]; - x[1] = input[0]; - x[3] = input[2]; - x[5] = input[4]; - x[7] = input[6]; - x[8] = input[7]; - x[10] = input[5]; - x[12] = input[3]; - x[14] = input[1]; - - // stage 2 - btf_16_ssse3(cospi[62], -cospi[2], x[1], x[0], x[1]); - btf_16_ssse3(cospi[54], -cospi[10], x[3], x[2], x[3]); - btf_16_ssse3(cospi[46], -cospi[18], x[5], x[4], x[5]); - btf_16_ssse3(cospi[38], -cospi[26], x[7], x[6], x[7]); - btf_16_ssse3(cospi[34], cospi[30], x[8], x[8], x[9]); - btf_16_ssse3(cospi[42], cospi[22], x[10], x[10], x[11]); - btf_16_ssse3(cospi[50], cospi[14], x[12], x[12], x[13]); - btf_16_ssse3(cospi[58], cospi[6], x[14], x[14], x[15]); - - // stage 3 - iadst16_stage3_ssse3(x); - iadst16_stage4_ssse3(x, cospi, __rounding, cos_bit); - iadst16_stage5_ssse3(x); - iadst16_stage6_ssse3(x, cospi, __rounding, cos_bit); - iadst16_stage7_ssse3(x); - iadst16_stage8_ssse3(x, cospi, __rounding, cos_bit); - iadst16_stage9_ssse3(output, x); -} -void iadst16_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - const __m128i cospi_p02_p62 = pair_set_epi16(cospi[2], cospi[62]); - const __m128i cospi_p62_m02 = pair_set_epi16(cospi[62], -cospi[2]); - const __m128i cospi_p10_p54 = pair_set_epi16(cospi[10], cospi[54]); - const __m128i cospi_p54_m10 = pair_set_epi16(cospi[54], -cospi[10]); - const __m128i cospi_p18_p46 = pair_set_epi16(cospi[18], cospi[46]); - const __m128i cospi_p46_m18 = pair_set_epi16(cospi[46], -cospi[18]); - const __m128i cospi_p26_p38 = pair_set_epi16(cospi[26], cospi[38]); - const __m128i cospi_p38_m26 = pair_set_epi16(cospi[38], -cospi[26]); - const __m128i cospi_p34_p30 = pair_set_epi16(cospi[34], cospi[30]); - const __m128i cospi_p30_m34 = pair_set_epi16(cospi[30], -cospi[34]); - const __m128i cospi_p42_p22 = pair_set_epi16(cospi[42], cospi[22]); - const __m128i cospi_p22_m42 = pair_set_epi16(cospi[22], -cospi[42]); - const __m128i cospi_p50_p14 = pair_set_epi16(cospi[50], cospi[14]); - const __m128i cospi_p14_m50 = pair_set_epi16(cospi[14], -cospi[50]); - const __m128i cospi_p58_p06 = pair_set_epi16(cospi[58], cospi[6]); - const __m128i cospi_p06_m58 = pair_set_epi16(cospi[6], -cospi[58]); - - // stage 1 - __m128i x[16]; - x[0] = input[15]; - x[1] = input[0]; - x[2] = input[13]; - x[3] = input[2]; - x[4] = input[11]; - x[5] = input[4]; - x[6] = input[9]; - x[7] = input[6]; - x[8] = input[7]; - x[9] = input[8]; - x[10] = input[5]; - x[11] = input[10]; - x[12] = input[3]; - x[13] = input[12]; - x[14] = input[1]; - x[15] = input[14]; - - // stage 2 - btf_16_sse2(cospi_p02_p62, cospi_p62_m02, x[0], x[1], x[0], x[1]); - btf_16_sse2(cospi_p10_p54, cospi_p54_m10, x[2], x[3], x[2], x[3]); - btf_16_sse2(cospi_p18_p46, cospi_p46_m18, x[4], x[5], x[4], x[5]); - btf_16_sse2(cospi_p26_p38, cospi_p38_m26, x[6], x[7], x[6], x[7]); - btf_16_sse2(cospi_p34_p30, cospi_p30_m34, x[8], x[9], x[8], x[9]); - btf_16_sse2(cospi_p42_p22, cospi_p22_m42, x[10], x[11], x[10], x[11]); - btf_16_sse2(cospi_p50_p14, cospi_p14_m50, x[12], x[13], x[12], x[13]); - btf_16_sse2(cospi_p58_p06, cospi_p06_m58, x[14], x[15], x[14], x[15]); - - // stage 3~9 - iadst16_stage3_ssse3(x); - iadst16_stage4_ssse3(x, cospi, __rounding, cos_bit); - iadst16_stage5_ssse3(x); - iadst16_stage6_ssse3(x, cospi, __rounding, cos_bit); - iadst16_stage7_ssse3(x); - iadst16_stage8_ssse3(x, cospi, __rounding, cos_bit); - iadst16_stage9_ssse3(output, x); -} - -void iadst16_w4_new_sse2(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int32_t *cospi = cospi_arr(INV_COS_BIT); - const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1)); - - const __m128i cospi_p02_p62 = pair_set_epi16(cospi[2], cospi[62]); - const __m128i cospi_p62_m02 = pair_set_epi16(cospi[62], -cospi[2]); - const __m128i cospi_p10_p54 = pair_set_epi16(cospi[10], cospi[54]); - const __m128i cospi_p54_m10 = pair_set_epi16(cospi[54], -cospi[10]); - const __m128i cospi_p18_p46 = pair_set_epi16(cospi[18], cospi[46]); - const __m128i cospi_p46_m18 = pair_set_epi16(cospi[46], -cospi[18]); - const __m128i cospi_p26_p38 = pair_set_epi16(cospi[26], cospi[38]); - const __m128i cospi_p38_m26 = pair_set_epi16(cospi[38], -cospi[26]); - const __m128i cospi_p34_p30 = pair_set_epi16(cospi[34], cospi[30]); - const __m128i cospi_p30_m34 = pair_set_epi16(cospi[30], -cospi[34]); - const __m128i cospi_p42_p22 = pair_set_epi16(cospi[42], cospi[22]); - const __m128i cospi_p22_m42 = pair_set_epi16(cospi[22], -cospi[42]); - const __m128i cospi_p50_p14 = pair_set_epi16(cospi[50], cospi[14]); - const __m128i cospi_p14_m50 = pair_set_epi16(cospi[14], -cospi[50]); - const __m128i cospi_p58_p06 = pair_set_epi16(cospi[58], cospi[6]); - const __m128i cospi_p06_m58 = pair_set_epi16(cospi[6], -cospi[58]); - const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]); - const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]); - const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]); - const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]); - const __m128i cospi_m56_p08 = pair_set_epi16(-cospi[56], cospi[8]); - const __m128i cospi_m24_p40 = pair_set_epi16(-cospi[24], cospi[40]); - const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]); - const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]); - const __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - - // stage 1 - __m128i x[16]; - x[0] = input[15]; - x[1] = input[0]; - x[2] = input[13]; - x[3] = input[2]; - x[4] = input[11]; - x[5] = input[4]; - x[6] = input[9]; - x[7] = input[6]; - x[8] = input[7]; - x[9] = input[8]; - x[10] = input[5]; - x[11] = input[10]; - x[12] = input[3]; - x[13] = input[12]; - x[14] = input[1]; - x[15] = input[14]; - - // stage 2 - btf_16_4p_sse2(cospi_p02_p62, cospi_p62_m02, x[0], x[1], x[0], x[1]); - btf_16_4p_sse2(cospi_p10_p54, cospi_p54_m10, x[2], x[3], x[2], x[3]); - btf_16_4p_sse2(cospi_p18_p46, cospi_p46_m18, x[4], x[5], x[4], x[5]); - btf_16_4p_sse2(cospi_p26_p38, cospi_p38_m26, x[6], x[7], x[6], x[7]); - btf_16_4p_sse2(cospi_p34_p30, cospi_p30_m34, x[8], x[9], x[8], x[9]); - btf_16_4p_sse2(cospi_p42_p22, cospi_p22_m42, x[10], x[11], x[10], x[11]); - btf_16_4p_sse2(cospi_p50_p14, cospi_p14_m50, x[12], x[13], x[12], x[13]); - btf_16_4p_sse2(cospi_p58_p06, cospi_p06_m58, x[14], x[15], x[14], x[15]); - - // stage 3 - iadst16_stage3_ssse3(x); - - // stage 4 - btf_16_4p_sse2(cospi_p08_p56, cospi_p56_m08, x[8], x[9], x[8], x[9]); - btf_16_4p_sse2(cospi_p40_p24, cospi_p24_m40, x[10], x[11], x[10], x[11]); - btf_16_4p_sse2(cospi_m56_p08, cospi_p08_p56, x[12], x[13], x[12], x[13]); - btf_16_4p_sse2(cospi_m24_p40, cospi_p40_p24, x[14], x[15], x[14], x[15]); - - // stage 5 - iadst16_stage5_ssse3(x); - - // stage 6 - btf_16_4p_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]); - btf_16_4p_sse2(cospi_m48_p16, cospi_p16_p48, x[6], x[7], x[6], x[7]); - btf_16_4p_sse2(cospi_p16_p48, cospi_p48_m16, x[12], x[13], x[12], x[13]); - btf_16_4p_sse2(cospi_m48_p16, cospi_p16_p48, x[14], x[15], x[14], x[15]); - - // stage 7 - iadst16_stage7_ssse3(x); - - // stage 8 - btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[2], x[3], x[2], x[3]); - btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[6], x[7], x[6], x[7]); - btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[10], x[11], x[10], x[11]); - btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[14], x[15], x[14], x[15]); - - // stage 9 - iadst16_stage9_ssse3(output, x); -} - -static void iidentity4_new_ssse3(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int16_t scale_fractional = (NewSqrt2 - (1 << NewSqrt2Bits)); - const __m128i scale = _mm_set1_epi16(scale_fractional << (15 - NewSqrt2Bits)); - for (int i = 0; i < 4; ++i) { - __m128i x = _mm_mulhrs_epi16(input[i], scale); - output[i] = _mm_adds_epi16(x, input[i]); - } -} - -static void iidentity8_new_sse2(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - for (int i = 0; i < 8; ++i) { - output[i] = _mm_adds_epi16(input[i], input[i]); - } -} - -static void iidentity16_new_ssse3(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const int16_t scale_fractional = 2 * (NewSqrt2 - (1 << NewSqrt2Bits)); - const __m128i scale = _mm_set1_epi16(scale_fractional << (15 - NewSqrt2Bits)); - for (int i = 0; i < 16; ++i) { - __m128i x = _mm_mulhrs_epi16(input[i], scale); - __m128i srcx2 = _mm_adds_epi16(input[i], input[i]); - output[i] = _mm_adds_epi16(x, srcx2); - } -} - -static INLINE __m128i lowbd_get_recon_8x8_sse2(const __m128i pred, - __m128i res) { - const __m128i zero = _mm_setzero_si128(); - __m128i x0 = _mm_adds_epi16(res, _mm_unpacklo_epi8(pred, zero)); - return _mm_packus_epi16(x0, x0); -} - -static INLINE void lowbd_write_buffer_4xn_sse2(__m128i *in, uint8_t *output, - int stride, int flipud, - const int height) { - int j = flipud ? (height - 1) : 0; - const int step = flipud ? -1 : 1; - const __m128i zero = _mm_setzero_si128(); - for (int i = 0; i < height; ++i, j += step) { - const __m128i v = _mm_cvtsi32_si128(*((uint32_t *)(output + i * stride))); - __m128i u = _mm_adds_epi16(in[j], _mm_unpacklo_epi8(v, zero)); - u = _mm_packus_epi16(u, zero); - *((uint32_t *)(output + i * stride)) = _mm_cvtsi128_si32(u); - } -} - -static INLINE void lowbd_write_buffer_8xn_sse2(__m128i *in, uint8_t *output, - int stride, int flipud, - const int height) { - int j = flipud ? (height - 1) : 0; - const int step = flipud ? -1 : 1; - for (int i = 0; i < height; ++i, j += step) { - const __m128i v = _mm_loadl_epi64((__m128i const *)(output + i * stride)); - const __m128i u = lowbd_get_recon_8x8_sse2(v, in[j]); - _mm_storel_epi64((__m128i *)(output + i * stride), u); - } -} - -// 1D functions process process 8 pixels at one time. -static const transform_1d_ssse3 - lowbd_txfm_all_1d_w8_arr[TX_SIZES][ITX_TYPES_1D] = { - { idct4_new_sse2, iadst4_new_sse2, iidentity4_new_ssse3 }, - { idct8_new_sse2, iadst8_new_sse2, iidentity8_new_sse2 }, - { idct16_new_sse2, iadst16_new_sse2, iidentity16_new_ssse3 }, - { idct32_new_sse2, NULL, NULL }, - { idct64_low32_new_ssse3, NULL, NULL }, - }; - -// functions for blocks with eob at DC and within -// topleft 8x8, 16x16, 32x32 corner -static const transform_1d_ssse3 - lowbd_txfm_all_1d_zeros_w8_arr[TX_SIZES][ITX_TYPES_1D][4] = { - { - { idct4_new_sse2, idct4_new_sse2, NULL, NULL }, - { iadst4_new_sse2, iadst4_new_sse2, NULL, NULL }, - { iidentity4_new_ssse3, iidentity4_new_ssse3, NULL, NULL }, - }, - { { idct8_low1_new_ssse3, idct8_new_sse2, NULL, NULL }, - { iadst8_low1_new_ssse3, iadst8_new_sse2, NULL, NULL }, - { iidentity8_new_sse2, iidentity8_new_sse2, NULL, NULL } }, - { - { idct16_low1_new_ssse3, idct16_low8_new_ssse3, idct16_new_sse2, - NULL }, - { iadst16_low1_new_ssse3, iadst16_low8_new_ssse3, iadst16_new_sse2, - NULL }, - { NULL, NULL, NULL, NULL }, - }, - { { idct32_low1_new_ssse3, idct32_low8_new_ssse3, idct32_low16_new_ssse3, - idct32_new_sse2 }, - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL } }, - { { idct64_low1_new_ssse3, idct64_low8_new_ssse3, idct64_low16_new_ssse3, - idct64_low32_new_ssse3 }, - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL } } - }; - -// 1D functions process process 4 pixels at one time. -// used in 4x4, 4x8, 4x16, 8x4, 16x4 -static const transform_1d_ssse3 - lowbd_txfm_all_1d_w4_arr[TX_SIZES][ITX_TYPES_1D] = { - { idct4_w4_new_sse2, iadst4_w4_new_sse2, iidentity4_new_ssse3 }, - { idct8_w4_new_sse2, iadst8_w4_new_sse2, iidentity8_new_sse2 }, - { idct16_w4_new_sse2, iadst16_w4_new_sse2, iidentity16_new_ssse3 }, - { NULL, NULL, NULL }, - { NULL, NULL, NULL }, - }; - -static INLINE void iidentity_row_8xn_ssse3(__m128i *out, const int32_t *input, - int stride, int shift, int height, - int txw_idx, int rect_type) { - const int32_t *input_row = input; - const __m128i scale = _mm_set1_epi16(NewSqrt2list[txw_idx]); - const __m128i rounding = _mm_set1_epi16((1 << (NewSqrt2Bits - 1)) + - (1 << (NewSqrt2Bits - shift - 1))); - const __m128i one = _mm_set1_epi16(1); - const __m128i scale_rounding = _mm_unpacklo_epi16(scale, rounding); - if (rect_type != 1 && rect_type != -1) { - for (int i = 0; i < height; ++i) { - const __m128i src = load_32bit_to_16bit(input_row); - input_row += stride; - __m128i lo = _mm_unpacklo_epi16(src, one); - __m128i hi = _mm_unpackhi_epi16(src, one); - lo = _mm_madd_epi16(lo, scale_rounding); - hi = _mm_madd_epi16(hi, scale_rounding); - lo = _mm_srai_epi32(lo, NewSqrt2Bits - shift); - hi = _mm_srai_epi32(hi, NewSqrt2Bits - shift); - out[i] = _mm_packs_epi32(lo, hi); - } - } else { - const __m128i rect_scale = - _mm_set1_epi16(NewInvSqrt2 << (15 - NewSqrt2Bits)); - for (int i = 0; i < height; ++i) { - __m128i src = load_32bit_to_16bit(input_row); - src = _mm_mulhrs_epi16(src, rect_scale); - input_row += stride; - __m128i lo = _mm_unpacklo_epi16(src, one); - __m128i hi = _mm_unpackhi_epi16(src, one); - lo = _mm_madd_epi16(lo, scale_rounding); - hi = _mm_madd_epi16(hi, scale_rounding); - lo = _mm_srai_epi32(lo, NewSqrt2Bits - shift); - hi = _mm_srai_epi32(hi, NewSqrt2Bits - shift); - out[i] = _mm_packs_epi32(lo, hi); - } - } -} - -static INLINE void iidentity_col_8xn_ssse3(uint8_t *output, int stride, - __m128i *buf, int shift, int height, - int txh_idx) { - const __m128i scale = _mm_set1_epi16(NewSqrt2list[txh_idx]); - const __m128i scale_rounding = _mm_set1_epi16(1 << (NewSqrt2Bits - 1)); - const __m128i shift_rounding = _mm_set1_epi32(1 << (-shift - 1)); - const __m128i one = _mm_set1_epi16(1); - const __m128i scale_coeff = _mm_unpacklo_epi16(scale, scale_rounding); - const __m128i zero = _mm_setzero_si128(); - for (int h = 0; h < height; ++h) { - __m128i lo = _mm_unpacklo_epi16(buf[h], one); - __m128i hi = _mm_unpackhi_epi16(buf[h], one); - lo = _mm_madd_epi16(lo, scale_coeff); - hi = _mm_madd_epi16(hi, scale_coeff); - lo = _mm_srai_epi32(lo, NewSqrt2Bits); - hi = _mm_srai_epi32(hi, NewSqrt2Bits); - lo = _mm_add_epi32(lo, shift_rounding); - hi = _mm_add_epi32(hi, shift_rounding); - lo = _mm_srai_epi32(lo, -shift); - hi = _mm_srai_epi32(hi, -shift); - __m128i x = _mm_packs_epi32(lo, hi); - - const __m128i pred = _mm_loadl_epi64((__m128i const *)(output)); - x = _mm_adds_epi16(x, _mm_unpacklo_epi8(pred, zero)); - const __m128i u = _mm_packus_epi16(x, x); - _mm_storel_epi64((__m128i *)(output), u); - output += stride; - } -} - -static INLINE void lowbd_inv_txfm2d_add_idtx_ssse3(const int32_t *input, - uint8_t *output, int stride, - TX_SIZE tx_size) { - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int input_stride = AOMMIN(32, txfm_size_col); - const int row_max = AOMMIN(32, txfm_size_row); - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - __m128i buf[32]; - - for (int i = 0; i < (input_stride >> 3); ++i) { - iidentity_row_8xn_ssse3(buf, input + 8 * i, input_stride, shift[0], row_max, - txw_idx, rect_type); - iidentity_col_8xn_ssse3(output + 8 * i, stride, buf, shift[1], row_max, - txh_idx); - } -} - -void lowbd_inv_txfm2d_add_4x4_ssse3(const int32_t *input, uint8_t *output, - int stride, TX_TYPE tx_type, - TX_SIZE tx_size_, int eob) { - (void)tx_size_; - (void)eob; - __m128i buf[4]; - const TX_SIZE tx_size = TX_4X4; - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - - const transform_1d_ssse3 row_txfm = - lowbd_txfm_all_1d_w4_arr[txw_idx][hitx_1d_tab[tx_type]]; - const transform_1d_ssse3 col_txfm = - lowbd_txfm_all_1d_w4_arr[txh_idx][vitx_1d_tab[tx_type]]; - - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - load_buffer_32bit_to_16bit_w4(input, txfm_size_col, buf, txfm_size_row); - transpose_16bit_4x4(buf, buf); - row_txfm(buf, buf, cos_bit_row); - if (lr_flip) { - __m128i temp[4]; - flip_buf_sse2(buf, temp, txfm_size_col); - transpose_16bit_4x4(temp, buf); - } else { - transpose_16bit_4x4(buf, buf); - } - col_txfm(buf, buf, cos_bit_col); - round_shift_16bit_ssse3(buf, txfm_size_row, shift[1]); - lowbd_write_buffer_4xn_sse2(buf, output, stride, ud_flip, txfm_size_row); -} - -static INLINE __m128i lowbd_get_recon_16x16_sse2(const __m128i pred, - __m128i res0, __m128i res1) { - const __m128i zero = _mm_setzero_si128(); - __m128i x0 = _mm_unpacklo_epi8(pred, zero); - __m128i x1 = _mm_unpackhi_epi8(pred, zero); - x0 = _mm_adds_epi16(res0, x0); - x1 = _mm_adds_epi16(res1, x1); - return _mm_packus_epi16(x0, x1); -} - -static INLINE void lowbd_write_buffer_16xn_sse2(__m128i *in, uint8_t *output, - int stride, int flipud, - int height) { - int j = flipud ? (height - 1) : 0; - const int step = flipud ? -1 : 1; - for (int i = 0; i < height; ++i, j += step) { - __m128i v = _mm_loadu_si128((__m128i const *)(output + i * stride)); - __m128i u = lowbd_get_recon_16x16_sse2(v, in[j], in[j + height]); - _mm_storeu_si128((__m128i *)(output + i * stride), u); - } -} - -static INLINE void round_shift_ssse3(const __m128i *input, __m128i *output, - int size) { - const __m128i scale = _mm_set1_epi16(NewInvSqrt2 * 8); - for (int i = 0; i < size; ++i) { - output[i] = _mm_mulhrs_epi16(input[i], scale); - } -} - -static INLINE void lowbd_inv_txfm2d_add_no_identity_ssse3( - const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - __m128i buf1[64 * 8]; - int eobx, eoby; - get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob); - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int buf_size_w_div8 = txfm_size_col >> 3; - const int buf_size_nonzero_w_div8 = (eobx + 8) >> 3; - const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3; - const int input_stride = AOMMIN(32, txfm_size_col); - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - - const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx]; - const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby]; - const transform_1d_ssse3 row_txfm = - lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x]; - const transform_1d_ssse3 col_txfm = - lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y]; - - assert(col_txfm != NULL); - assert(row_txfm != NULL); - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - for (int i = 0; i < buf_size_nonzero_h_div8; i++) { - __m128i buf0[64]; - const int32_t *input_row = input + i * input_stride * 8; - for (int j = 0; j < buf_size_nonzero_w_div8; ++j) { - __m128i *buf0_cur = buf0 + j * 8; - load_buffer_32bit_to_16bit(input_row + j * 8, input_stride, buf0_cur, 8); - transpose_16bit_8x8(buf0_cur, buf0_cur); - } - if (rect_type == 1 || rect_type == -1) { - round_shift_ssse3(buf0, buf0, input_stride); // rect special code - } - row_txfm(buf0, buf0, cos_bit_row); - round_shift_16bit_ssse3(buf0, txfm_size_col, shift[0]); - __m128i *_buf1 = buf1 + i * 8; - if (lr_flip) { - for (int j = 0; j < buf_size_w_div8; ++j) { - __m128i temp[8]; - flip_buf_sse2(buf0 + 8 * j, temp, 8); - transpose_16bit_8x8(temp, - _buf1 + txfm_size_row * (buf_size_w_div8 - 1 - j)); - } - } else { - for (int j = 0; j < buf_size_w_div8; ++j) { - transpose_16bit_8x8(buf0 + 8 * j, _buf1 + txfm_size_row * j); - } - } - } - for (int i = 0; i < buf_size_w_div8; i++) { - col_txfm(buf1 + i * txfm_size_row, buf1 + i * txfm_size_row, cos_bit_col); - round_shift_16bit_ssse3(buf1 + i * txfm_size_row, txfm_size_row, shift[1]); - } - - if (txfm_size_col >= 16) { - for (int i = 0; i < (txfm_size_col >> 4); i++) { - lowbd_write_buffer_16xn_sse2(buf1 + i * txfm_size_row * 2, - output + 16 * i, stride, ud_flip, - txfm_size_row); - } - } else if (txfm_size_col == 8) { - lowbd_write_buffer_8xn_sse2(buf1, output, stride, ud_flip, txfm_size_row); - } -} - -static INLINE void lowbd_inv_txfm2d_add_h_identity_ssse3( - const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - int eobx, eoby; - get_eobx_eoby_scan_h_identity(&eobx, &eoby, tx_size, eob); - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int buf_size_w_div8 = (eobx + 8) >> 3; - const int input_stride = AOMMIN(32, txfm_size_col); - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - - const int fun_idx = lowbd_txfm_all_1d_zeros_idx[eoby]; - assert(fun_idx < 5); - const transform_1d_ssse3 col_txfm = - lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx]; - - assert(col_txfm != NULL); - - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - for (int i = 0; i < buf_size_w_div8; i++) { - __m128i buf0[64]; - iidentity_row_8xn_ssse3(buf0, input + 8 * i, input_stride, shift[0], - eoby + 1, txw_idx, rect_type); - col_txfm(buf0, buf0, cos_bit_col); - __m128i mshift = _mm_set1_epi16(1 << (15 + shift[1])); - int k = ud_flip ? (txfm_size_row - 1) : 0; - const int step = ud_flip ? -1 : 1; - uint8_t *out = output + 8 * i; - for (int j = 0; j < txfm_size_row; ++j, k += step) { - const __m128i v = _mm_loadl_epi64((__m128i const *)(out)); - __m128i res = _mm_mulhrs_epi16(buf0[k], mshift); - const __m128i u = lowbd_get_recon_8x8_sse2(v, res); - _mm_storel_epi64((__m128i *)(out), u); - out += stride; - } - } -} - -static INLINE void lowbd_inv_txfm2d_add_v_identity_ssse3( - const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - __m128i buf1[64]; - int eobx, eoby; - get_eobx_eoby_scan_v_identity(&eobx, &eoby, tx_size, eob); - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int buf_size_w_div8 = txfm_size_col >> 3; - const int buf_size_h_div8 = (eoby + 8) >> 3; - const int input_stride = AOMMIN(32, txfm_size_col); - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - - const int fun_idx = lowbd_txfm_all_1d_zeros_idx[eobx]; - const transform_1d_ssse3 row_txfm = - lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx]; - - assert(row_txfm != NULL); - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - for (int i = 0; i < buf_size_h_div8; i++) { - __m128i buf0[64]; - const int32_t *input_row = input + i * input_stride * 8; - for (int j = 0; j < AOMMIN(4, buf_size_w_div8); ++j) { - __m128i *buf0_cur = buf0 + j * 8; - load_buffer_32bit_to_16bit(input_row + j * 8, input_stride, buf0_cur, 8); - transpose_16bit_8x8(buf0_cur, buf0_cur); - } - if (rect_type == 1 || rect_type == -1) { - round_shift_ssse3(buf0, buf0, input_stride); // rect special code - } - row_txfm(buf0, buf0, cos_bit_row); - round_shift_16bit_ssse3(buf0, txfm_size_col, shift[0]); - __m128i *_buf1 = buf1; - if (lr_flip) { - for (int j = 0; j < buf_size_w_div8; ++j) { - __m128i temp[8]; - flip_buf_sse2(buf0 + 8 * j, temp, 8); - transpose_16bit_8x8(temp, _buf1 + 8 * (buf_size_w_div8 - 1 - j)); - } - } else { - for (int j = 0; j < buf_size_w_div8; ++j) { - transpose_16bit_8x8(buf0 + 8 * j, _buf1 + 8 * j); - } - } - - for (int j = 0; j < buf_size_w_div8; ++j) { - iidentity_col_8xn_ssse3(output + i * 8 * stride + j * 8, stride, - buf1 + j * 8, shift[1], 8, txh_idx); - } - } -} - -// for 32x32,32x64,64x32,64x64,32x8,8x32,16x32,32x16,64x16,16x64 -static INLINE void lowbd_inv_txfm2d_add_universe_ssse3( - const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - switch (tx_type) { - case DCT_DCT: - lowbd_inv_txfm2d_add_no_identity_ssse3(input, output, stride, tx_type, - tx_size, eob); - break; - case IDTX: - lowbd_inv_txfm2d_add_idtx_ssse3(input, output, stride, tx_size); - break; - case V_DCT: - case V_ADST: - case V_FLIPADST: - lowbd_inv_txfm2d_add_h_identity_ssse3(input, output, stride, tx_type, - tx_size, eob); - break; - case H_DCT: - case H_ADST: - case H_FLIPADST: - lowbd_inv_txfm2d_add_v_identity_ssse3(input, output, stride, tx_type, - tx_size, eob); - break; - default: - lowbd_inv_txfm2d_add_no_identity_ssse3(input, output, stride, tx_type, - tx_size, eob); - break; - } -} - -void lowbd_inv_txfm2d_add_4x8_ssse3(const int32_t *input, uint8_t *output, - int stride, TX_TYPE tx_type, - TX_SIZE tx_size_, int eob) { - (void)tx_size_; - (void)eob; - __m128i buf[8]; - const TX_SIZE tx_size = TX_4X8; - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - - const transform_1d_ssse3 row_txfm = - lowbd_txfm_all_1d_w8_arr[txw_idx][hitx_1d_tab[tx_type]]; - const transform_1d_ssse3 col_txfm = - lowbd_txfm_all_1d_w4_arr[txh_idx][vitx_1d_tab[tx_type]]; - - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - load_buffer_32bit_to_16bit_w4(input, txfm_size_col, buf, txfm_size_row); - transpose_16bit_4x8(buf, buf); - round_shift_ssse3(buf, buf, txfm_size_col); // rect special code - row_txfm(buf, buf, cos_bit_row); - // round_shift_16bit_ssse3(buf, txfm_size_col, shift[0]);// shift[0] is 0 - if (lr_flip) { - __m128i temp[4]; - flip_buf_sse2(buf, temp, txfm_size_col); - transpose_16bit_8x4(temp, buf); - } else { - transpose_16bit_8x4(buf, buf); - } - col_txfm(buf, buf, cos_bit_col); - round_shift_16bit_ssse3(buf, txfm_size_row, shift[1]); - lowbd_write_buffer_4xn_sse2(buf, output, stride, ud_flip, txfm_size_row); -} - -void lowbd_inv_txfm2d_add_8x4_ssse3(const int32_t *input, uint8_t *output, - int stride, TX_TYPE tx_type, - TX_SIZE tx_size_, int eob) { - (void)tx_size_; - (void)eob; - __m128i buf[8]; - const TX_SIZE tx_size = TX_8X4; - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - - const transform_1d_ssse3 row_txfm = - lowbd_txfm_all_1d_w4_arr[txw_idx][hitx_1d_tab[tx_type]]; - const transform_1d_ssse3 col_txfm = - lowbd_txfm_all_1d_w8_arr[txh_idx][vitx_1d_tab[tx_type]]; - - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - load_buffer_32bit_to_16bit(input, txfm_size_col, buf, txfm_size_row); - transpose_16bit_8x4(buf, buf); - round_shift_ssse3(buf, buf, txfm_size_col); // rect special code - row_txfm(buf, buf, cos_bit_row); - // round_shift_16bit_ssse3(buf, txfm_size_col, shift[0]); // shift[0] is 0 - if (lr_flip) { - __m128i temp[8]; - flip_buf_sse2(buf, temp, txfm_size_col); - transpose_16bit_4x8(temp, buf); - } else { - transpose_16bit_4x8(buf, buf); - } - col_txfm(buf, buf, cos_bit_col); - round_shift_16bit_ssse3(buf, txfm_size_row, shift[1]); - lowbd_write_buffer_8xn_sse2(buf, output, stride, ud_flip, txfm_size_row); -} - -void lowbd_inv_txfm2d_add_4x16_ssse3(const int32_t *input, uint8_t *output, - int stride, TX_TYPE tx_type, - TX_SIZE tx_size_, int eob) { - (void)tx_size_; - (void)eob; - __m128i buf[16]; - const TX_SIZE tx_size = TX_4X16; - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - - const transform_1d_ssse3 row_txfm = - lowbd_txfm_all_1d_w8_arr[txw_idx][hitx_1d_tab[tx_type]]; - const transform_1d_ssse3 col_txfm = - lowbd_txfm_all_1d_w4_arr[txh_idx][vitx_1d_tab[tx_type]]; - - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - const int row_one_loop = 8; - for (int i = 0; i < 2; ++i) { - const int32_t *input_cur = input + i * txfm_size_col * row_one_loop; - __m128i *buf_cur = buf + i * row_one_loop; - load_buffer_32bit_to_16bit_w4(input_cur, txfm_size_col, buf_cur, - row_one_loop); - transpose_16bit_4x8(buf_cur, buf_cur); - row_txfm(buf_cur, buf_cur, cos_bit_row); - round_shift_16bit_ssse3(buf_cur, row_one_loop, shift[0]); - if (lr_flip) { - __m128i temp[8]; - flip_buf_sse2(buf_cur, temp, txfm_size_col); - transpose_16bit_8x4(temp, buf_cur); - } else { - transpose_16bit_8x4(buf_cur, buf_cur); - } - } - col_txfm(buf, buf, cos_bit_col); - round_shift_16bit_ssse3(buf, txfm_size_row, shift[1]); - lowbd_write_buffer_4xn_sse2(buf, output, stride, ud_flip, txfm_size_row); -} - -void lowbd_inv_txfm2d_add_16x4_ssse3(const int32_t *input, uint8_t *output, - int stride, TX_TYPE tx_type, - TX_SIZE tx_size_, int eob) { - (void)tx_size_; - (void)eob; - __m128i buf[16]; - const TX_SIZE tx_size = TX_16X4; - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; - const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int buf_size_w_div8 = txfm_size_col >> 3; - - const transform_1d_ssse3 row_txfm = - lowbd_txfm_all_1d_w4_arr[txw_idx][hitx_1d_tab[tx_type]]; - const transform_1d_ssse3 col_txfm = - lowbd_txfm_all_1d_w8_arr[txh_idx][vitx_1d_tab[tx_type]]; - - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - const int row_one_loop = 8; - for (int i = 0; i < buf_size_w_div8; ++i) { - const int32_t *input_cur = input + i * row_one_loop; - __m128i *buf_cur = buf + i * row_one_loop; - load_buffer_32bit_to_16bit(input_cur, txfm_size_col, buf_cur, - txfm_size_row); - transpose_16bit_8x4(buf_cur, buf_cur); - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit_ssse3(buf, txfm_size_col, shift[0]); - if (lr_flip) { - __m128i temp[16]; - flip_buf_sse2(buf, temp, 16); - transpose_16bit_4x8(temp, buf); - transpose_16bit_4x8(temp + 8, buf + 8); - } else { - transpose_16bit_4x8(buf, buf); - transpose_16bit_4x8(buf + row_one_loop, buf + row_one_loop); - } - for (int i = 0; i < buf_size_w_div8; i++) { - col_txfm(buf + i * row_one_loop, buf + i * row_one_loop, cos_bit_col); - round_shift_16bit_ssse3(buf + i * row_one_loop, txfm_size_row, shift[1]); - } - lowbd_write_buffer_8xn_sse2(buf, output, stride, ud_flip, 4); - lowbd_write_buffer_8xn_sse2(buf + 8, output + 8, stride, ud_flip, 4); -} - -void av1_lowbd_inv_txfm2d_add_ssse3(const int32_t *input, uint8_t *output, - int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob) { - switch (tx_size) { - case TX_4X4: - lowbd_inv_txfm2d_add_4x4_ssse3(input, output, stride, tx_type, tx_size, - eob); - break; - case TX_4X8: - lowbd_inv_txfm2d_add_4x8_ssse3(input, output, stride, tx_type, tx_size, - eob); - break; - case TX_8X4: - lowbd_inv_txfm2d_add_8x4_ssse3(input, output, stride, tx_type, tx_size, - eob); - break; - case TX_4X16: - lowbd_inv_txfm2d_add_4x16_ssse3(input, output, stride, tx_type, tx_size, - eob); - break; - case TX_16X4: - lowbd_inv_txfm2d_add_16x4_ssse3(input, output, stride, tx_type, tx_size, - eob); - break; - default: - lowbd_inv_txfm2d_add_universe_ssse3(input, output, stride, tx_type, - tx_size, eob); - break; - } -} -void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride, - const TxfmParam *txfm_param) { - const TX_TYPE tx_type = txfm_param->tx_type; - if (!txfm_param->lossless) { - av1_lowbd_inv_txfm2d_add_ssse3(dqcoeff, dst, stride, tx_type, - txfm_param->tx_size, txfm_param->eob); - } else { - av1_inv_txfm_add_c(dqcoeff, dst, stride, txfm_param); - } -} diff --git a/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.h b/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.h deleted file mode 100644 index 66bd339d1..000000000 --- a/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#ifndef AOM_AV1_COMMON_X86_AV1_INV_TXFM_SSSE3_H_ -#define AOM_AV1_COMMON_X86_AV1_INV_TXFM_SSSE3_H_ - -#include <emmintrin.h> // SSE2 -#include <tmmintrin.h> // SSSE3 - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" - -#include "aom/aom_integer.h" -#include "aom_dsp/x86/transpose_sse2.h" -#include "aom_dsp/x86/txfm_common_sse2.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define btf_16_ssse3(w0, w1, in, out0, out1) \ - do { \ - const __m128i _w0 = _mm_set1_epi16(w0 * 8); \ - const __m128i _w1 = _mm_set1_epi16(w1 * 8); \ - const __m128i _in = in; \ - out0 = _mm_mulhrs_epi16(_in, _w0); \ - out1 = _mm_mulhrs_epi16(_in, _w1); \ - } while (0) - -#define btf_16_adds_subs_sse2(in0, in1) \ - do { \ - const __m128i _in0 = in0; \ - const __m128i _in1 = in1; \ - in0 = _mm_adds_epi16(_in0, _in1); \ - in1 = _mm_subs_epi16(_in0, _in1); \ - } while (0) - -#define btf_16_subs_adds_sse2(in0, in1) \ - do { \ - const __m128i _in0 = in0; \ - const __m128i _in1 = in1; \ - in1 = _mm_subs_epi16(_in0, _in1); \ - in0 = _mm_adds_epi16(_in0, _in1); \ - } while (0) - -#define btf_16_adds_subs_out_sse2(out0, out1, in0, in1) \ - do { \ - const __m128i _in0 = in0; \ - const __m128i _in1 = in1; \ - out0 = _mm_adds_epi16(_in0, _in1); \ - out1 = _mm_subs_epi16(_in0, _in1); \ - } while (0) - -static INLINE void round_shift_16bit_ssse3(__m128i *in, int size, int bit) { - if (bit < 0) { - const __m128i scale = _mm_set1_epi16(1 << (15 + bit)); - for (int i = 0; i < size; ++i) { - in[i] = _mm_mulhrs_epi16(in[i], scale); - } - } else if (bit > 0) { - for (int i = 0; i < size; ++i) { - in[i] = _mm_slli_epi16(in[i], bit); - } - } -} - -// 1D itx types -typedef enum ATTRIBUTE_PACKED { - IDCT_1D, - IADST_1D, - IFLIPADST_1D = IADST_1D, - IIDENTITY_1D, - ITX_TYPES_1D, -} ITX_TYPE_1D; - -static const ITX_TYPE_1D vitx_1d_tab[TX_TYPES] = { - IDCT_1D, IADST_1D, IDCT_1D, IADST_1D, - IFLIPADST_1D, IDCT_1D, IFLIPADST_1D, IADST_1D, - IFLIPADST_1D, IIDENTITY_1D, IDCT_1D, IIDENTITY_1D, - IADST_1D, IIDENTITY_1D, IFLIPADST_1D, IIDENTITY_1D, -}; - -static const ITX_TYPE_1D hitx_1d_tab[TX_TYPES] = { - IDCT_1D, IDCT_1D, IADST_1D, IADST_1D, - IDCT_1D, IFLIPADST_1D, IFLIPADST_1D, IFLIPADST_1D, - IADST_1D, IIDENTITY_1D, IIDENTITY_1D, IDCT_1D, - IIDENTITY_1D, IADST_1D, IIDENTITY_1D, IFLIPADST_1D, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x8_default[8]) = { - 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, -}; - -DECLARE_ALIGNED(16, static const int16_t, - av1_eob_to_eobxy_16x16_default[16]) = { - 0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, - 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, -}; - -DECLARE_ALIGNED(16, static const int16_t, - av1_eob_to_eobxy_32x32_default[32]) = { - 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, - 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, - 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, - 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x16_default[16]) = { - 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07, - 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_16x8_default[8]) = { - 0x0707, 0x0707, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f, -}; - -DECLARE_ALIGNED(16, static const int16_t, - av1_eob_to_eobxy_16x32_default[32]) = { - 0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, - 0x0f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, - 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, - 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, -}; - -DECLARE_ALIGNED(16, static const int16_t, - av1_eob_to_eobxy_32x16_default[16]) = { - 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, - 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x32_default[32]) = { - 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07, - 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x1f07, 0x1f07, 0x1f07, - 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, - 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, -}; - -DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_32x8_default[8]) = { - 0x0707, 0x070f, 0x070f, 0x071f, 0x071f, 0x071f, 0x071f, 0x071f, -}; - -DECLARE_ALIGNED(16, static const int16_t *, - av1_eob_to_eobxy_default[TX_SIZES_ALL]) = { - NULL, - av1_eob_to_eobxy_8x8_default, - av1_eob_to_eobxy_16x16_default, - av1_eob_to_eobxy_32x32_default, - av1_eob_to_eobxy_32x32_default, - NULL, - NULL, - av1_eob_to_eobxy_8x16_default, - av1_eob_to_eobxy_16x8_default, - av1_eob_to_eobxy_16x32_default, - av1_eob_to_eobxy_32x16_default, - av1_eob_to_eobxy_32x32_default, - av1_eob_to_eobxy_32x32_default, - NULL, - NULL, - av1_eob_to_eobxy_8x32_default, - av1_eob_to_eobxy_32x8_default, - av1_eob_to_eobxy_16x32_default, - av1_eob_to_eobxy_32x16_default, -}; - -static const int lowbd_txfm_all_1d_zeros_idx[32] = { - 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -}; - -// Transform block width in log2 for eob (size of 64 map to 32) -static const int tx_size_wide_log2_eob[TX_SIZES_ALL] = { - 2, 3, 4, 5, 5, 2, 3, 3, 4, 4, 5, 5, 5, 2, 4, 3, 5, 4, 5, -}; - -static INLINE void get_eobx_eoby_scan_default(int *eobx, int *eoby, - TX_SIZE tx_size, int eob) { - if (eob == 1) { - *eobx = 0; - *eoby = 0; - return; - } - - const int tx_w_log2 = tx_size_wide_log2_eob[tx_size]; - const int eob_row = (eob - 1) >> tx_w_log2; - const int eobxy = av1_eob_to_eobxy_default[tx_size][eob_row]; - *eobx = eobxy & 0xFF; - *eoby = eobxy >> 8; -} - -static int eob_fill[32] = { - 0, 7, 7, 7, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 15, - 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, -}; - -static INLINE void get_eobx_eoby_scan_h_identity(int *eobx, int *eoby, - TX_SIZE tx_size, int eob) { - eob -= 1; - const int txfm_size_col = tx_size_wide[tx_size]; - const int eobx_max = AOMMIN(32, txfm_size_col) - 1; - *eobx = (eob >= eobx_max) ? eobx_max : eob_fill[eob]; - const int temp_eoby = eob / (eobx_max + 1); - assert(temp_eoby < 32); - *eoby = eob_fill[temp_eoby]; -} - -static INLINE void get_eobx_eoby_scan_v_identity(int *eobx, int *eoby, - TX_SIZE tx_size, int eob) { - eob -= 1; - const int txfm_size_row = tx_size_high[tx_size]; - const int eoby_max = AOMMIN(32, txfm_size_row) - 1; - *eobx = eob / (eoby_max + 1); - *eoby = (eob >= eoby_max) ? eoby_max : eob_fill[eob]; -} - -typedef void (*transform_1d_ssse3)(const __m128i *input, __m128i *output, - int8_t cos_bit); - -void av1_lowbd_inv_txfm2d_add_ssse3(const int32_t *input, uint8_t *output, - int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob); -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_COMMON_X86_AV1_INV_TXFM_SSSE3_H_ diff --git a/third_party/aom/av1/common/x86/av1_txfm_sse2.h b/third_party/aom/av1/common/x86/av1_txfm_sse2.h deleted file mode 100644 index 77aeb6eb1..000000000 --- a/third_party/aom/av1/common/x86/av1_txfm_sse2.h +++ /dev/null @@ -1,317 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#ifndef AOM_AV1_COMMON_X86_AV1_TXFM_SSE2_H_ -#define AOM_AV1_COMMON_X86_AV1_TXFM_SSE2_H_ - -#include <emmintrin.h> // SSE2 - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" - -#include "aom/aom_integer.h" -#include "aom_dsp/x86/transpose_sse2.h" -#include "aom_dsp/x86/txfm_common_sse2.h" -#include "av1/common/av1_txfm.h" - -#ifdef __cplusplus -extern "C" { -#endif - -static INLINE void btf_16_w4_sse2( - const __m128i *const w0, const __m128i *const w1, const __m128i __rounding, - const int8_t cos_bit, const __m128i *const in0, const __m128i *const in1, - __m128i *const out0, __m128i *const out1) { - const __m128i t0 = _mm_unpacklo_epi16(*in0, *in1); - const __m128i u0 = _mm_madd_epi16(t0, *w0); - const __m128i v0 = _mm_madd_epi16(t0, *w1); - const __m128i a0 = _mm_add_epi32(u0, __rounding); - const __m128i b0 = _mm_add_epi32(v0, __rounding); - const __m128i c0 = _mm_srai_epi32(a0, cos_bit); - const __m128i d0 = _mm_srai_epi32(b0, cos_bit); - - *out0 = _mm_packs_epi32(c0, c0); - *out1 = _mm_packs_epi32(d0, c0); -} - -#define btf_16_4p_sse2(w0, w1, in0, in1, out0, out1) \ - { \ - __m128i t0 = _mm_unpacklo_epi16(in0, in1); \ - __m128i u0 = _mm_madd_epi16(t0, w0); \ - __m128i v0 = _mm_madd_epi16(t0, w1); \ - \ - __m128i a0 = _mm_add_epi32(u0, __rounding); \ - __m128i b0 = _mm_add_epi32(v0, __rounding); \ - \ - __m128i c0 = _mm_srai_epi32(a0, cos_bit); \ - __m128i d0 = _mm_srai_epi32(b0, cos_bit); \ - \ - out0 = _mm_packs_epi32(c0, c0); \ - out1 = _mm_packs_epi32(d0, d0); \ - } - -#define btf_16_sse2(w0, w1, in0, in1, out0, out1) \ - { \ - __m128i t0 = _mm_unpacklo_epi16(in0, in1); \ - __m128i t1 = _mm_unpackhi_epi16(in0, in1); \ - __m128i u0 = _mm_madd_epi16(t0, w0); \ - __m128i u1 = _mm_madd_epi16(t1, w0); \ - __m128i v0 = _mm_madd_epi16(t0, w1); \ - __m128i v1 = _mm_madd_epi16(t1, w1); \ - \ - __m128i a0 = _mm_add_epi32(u0, __rounding); \ - __m128i a1 = _mm_add_epi32(u1, __rounding); \ - __m128i b0 = _mm_add_epi32(v0, __rounding); \ - __m128i b1 = _mm_add_epi32(v1, __rounding); \ - \ - __m128i c0 = _mm_srai_epi32(a0, cos_bit); \ - __m128i c1 = _mm_srai_epi32(a1, cos_bit); \ - __m128i d0 = _mm_srai_epi32(b0, cos_bit); \ - __m128i d1 = _mm_srai_epi32(b1, cos_bit); \ - \ - out0 = _mm_packs_epi32(c0, c1); \ - out1 = _mm_packs_epi32(d0, d1); \ - } - -static INLINE __m128i load_16bit_to_16bit(const int16_t *a) { - return _mm_load_si128((const __m128i *)a); -} - -static INLINE __m128i load_32bit_to_16bit(const int32_t *a) { - const __m128i a_low = _mm_load_si128((const __m128i *)a); - return _mm_packs_epi32(a_low, *(const __m128i *)(a + 4)); -} - -static INLINE __m128i load_32bit_to_16bit_w4(const int32_t *a) { - const __m128i a_low = _mm_load_si128((const __m128i *)a); - return _mm_packs_epi32(a_low, a_low); -} - -// Store 4 16 bit values. Sign extend the values. -static INLINE void store_16bit_to_32bit_w4(const __m128i a, int32_t *const b) { - const __m128i a_lo = _mm_unpacklo_epi16(a, a); - const __m128i a_1 = _mm_srai_epi32(a_lo, 16); - _mm_store_si128((__m128i *)b, a_1); -} - -// Store 8 16 bit values. Sign extend the values. -static INLINE void store_16bit_to_32bit(__m128i a, int32_t *b) { - const __m128i a_lo = _mm_unpacklo_epi16(a, a); - const __m128i a_hi = _mm_unpackhi_epi16(a, a); - const __m128i a_1 = _mm_srai_epi32(a_lo, 16); - const __m128i a_2 = _mm_srai_epi32(a_hi, 16); - _mm_store_si128((__m128i *)b, a_1); - _mm_store_si128((__m128i *)(b + 4), a_2); -} - -static INLINE __m128i scale_round_sse2(const __m128i a, const int scale) { - const __m128i scale_rounding = pair_set_epi16(scale, 1 << (NewSqrt2Bits - 1)); - const __m128i b = _mm_madd_epi16(a, scale_rounding); - return _mm_srai_epi32(b, NewSqrt2Bits); -} - -static INLINE void store_rect_16bit_to_32bit_w4(const __m128i a, - int32_t *const b) { - const __m128i one = _mm_set1_epi16(1); - const __m128i a_lo = _mm_unpacklo_epi16(a, one); - const __m128i b_lo = scale_round_sse2(a_lo, NewSqrt2); - _mm_store_si128((__m128i *)b, b_lo); -} - -static INLINE void store_rect_16bit_to_32bit(const __m128i a, - int32_t *const b) { - const __m128i one = _mm_set1_epi16(1); - const __m128i a_lo = _mm_unpacklo_epi16(a, one); - const __m128i a_hi = _mm_unpackhi_epi16(a, one); - const __m128i b_lo = scale_round_sse2(a_lo, NewSqrt2); - const __m128i b_hi = scale_round_sse2(a_hi, NewSqrt2); - _mm_store_si128((__m128i *)b, b_lo); - _mm_store_si128((__m128i *)(b + 4), b_hi); -} - -static INLINE void load_buffer_16bit_to_16bit_w4(const int16_t *const in, - const int stride, - __m128i *const out, - const int out_size) { - for (int i = 0; i < out_size; ++i) { - out[i] = _mm_loadl_epi64((const __m128i *)(in + i * stride)); - } -} - -static INLINE void load_buffer_16bit_to_16bit_w4_flip(const int16_t *const in, - const int stride, - __m128i *const out, - const int out_size) { - for (int i = 0; i < out_size; ++i) { - out[out_size - i - 1] = _mm_loadl_epi64((const __m128i *)(in + i * stride)); - } -} - -static INLINE void load_buffer_16bit_to_16bit(const int16_t *in, int stride, - __m128i *out, int out_size) { - for (int i = 0; i < out_size; ++i) { - out[i] = load_16bit_to_16bit(in + i * stride); - } -} - -static INLINE void load_buffer_16bit_to_16bit_flip(const int16_t *in, - int stride, __m128i *out, - int out_size) { - for (int i = 0; i < out_size; ++i) { - out[out_size - i - 1] = load_16bit_to_16bit(in + i * stride); - } -} - -static INLINE void load_buffer_32bit_to_16bit(const int32_t *in, int stride, - __m128i *out, int out_size) { - for (int i = 0; i < out_size; ++i) { - out[i] = load_32bit_to_16bit(in + i * stride); - } -} - -static INLINE void load_buffer_32bit_to_16bit_w4(const int32_t *in, int stride, - __m128i *out, int out_size) { - for (int i = 0; i < out_size; ++i) { - out[i] = load_32bit_to_16bit_w4(in + i * stride); - } -} - -static INLINE void load_buffer_32bit_to_16bit_flip(const int32_t *in, - int stride, __m128i *out, - int out_size) { - for (int i = 0; i < out_size; ++i) { - out[out_size - i - 1] = load_32bit_to_16bit(in + i * stride); - } -} - -static INLINE void store_buffer_16bit_to_32bit_w4(const __m128i *const in, - int32_t *const out, - const int stride, - const int out_size) { - for (int i = 0; i < out_size; ++i) { - store_16bit_to_32bit_w4(in[i], out + i * stride); - } -} - -static INLINE void store_buffer_16bit_to_32bit_w8(const __m128i *const in, - int32_t *const out, - const int stride, - const int out_size) { - for (int i = 0; i < out_size; ++i) { - store_16bit_to_32bit(in[i], out + i * stride); - } -} - -static INLINE void store_rect_buffer_16bit_to_32bit_w4(const __m128i *const in, - int32_t *const out, - const int stride, - const int out_size) { - for (int i = 0; i < out_size; ++i) { - store_rect_16bit_to_32bit_w4(in[i], out + i * stride); - } -} - -static INLINE void store_rect_buffer_16bit_to_32bit_w8(const __m128i *const in, - int32_t *const out, - const int stride, - const int out_size) { - for (int i = 0; i < out_size; ++i) { - store_rect_16bit_to_32bit(in[i], out + i * stride); - } -} - -static INLINE void store_buffer_16bit_to_16bit_8x8(const __m128i *in, - uint16_t *out, - const int stride) { - for (int i = 0; i < 8; ++i) { - _mm_store_si128((__m128i *)(out + i * stride), in[i]); - } -} - -static INLINE void round_shift_16bit(__m128i *in, int size, int bit) { - if (bit < 0) { - bit = -bit; - __m128i rounding = _mm_set1_epi16(1 << (bit - 1)); - for (int i = 0; i < size; ++i) { - in[i] = _mm_adds_epi16(in[i], rounding); - in[i] = _mm_srai_epi16(in[i], bit); - } - } else if (bit > 0) { - for (int i = 0; i < size; ++i) { - in[i] = _mm_slli_epi16(in[i], bit); - } - } -} - -static INLINE void flip_buf_sse2(__m128i *in, __m128i *out, int size) { - for (int i = 0; i < size; ++i) { - out[size - i - 1] = in[i]; - } -} - -void av1_lowbd_fwd_txfm2d_4x4_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd); - -void av1_lowbd_fwd_txfm2d_4x8_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd); - -void av1_lowbd_fwd_txfm2d_4x16_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd); - -void av1_lowbd_fwd_txfm2d_8x4_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd); - -void av1_lowbd_fwd_txfm2d_8x8_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd); - -void av1_lowbd_fwd_txfm2d_8x16_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd); - -void av1_lowbd_fwd_txfm2d_8x32_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd); - -void av1_lowbd_fwd_txfm2d_16x4_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd); - -void av1_lowbd_fwd_txfm2d_16x8_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd); - -void av1_lowbd_fwd_txfm2d_16x16_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd); - -void av1_lowbd_fwd_txfm2d_16x32_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd); - -void av1_lowbd_fwd_txfm2d_32x8_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd); - -void av1_lowbd_fwd_txfm2d_32x16_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd); - -void av1_lowbd_fwd_txfm2d_32x32_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd); - -void av1_lowbd_fwd_txfm2d_16x64_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd); - -void av1_lowbd_fwd_txfm2d_64x16_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd); - -typedef void (*transform_1d_sse2)(const __m128i *input, __m128i *output, - int8_t cos_bit); - -typedef struct { - transform_1d_sse2 col, row; // vertical and horizontal -} transform_2d_sse2; - -#ifdef __cplusplus -} -#endif // __cplusplus -#endif // AOM_AV1_COMMON_X86_AV1_TXFM_SSE2_H_ diff --git a/third_party/aom/av1/common/x86/av1_txfm_sse4.c b/third_party/aom/av1/common/x86/av1_txfm_sse4.c deleted file mode 100644 index 90b9879cc..000000000 --- a/third_party/aom/av1/common/x86/av1_txfm_sse4.c +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "config/aom_dsp_rtcd.h" - -#include "av1/common/av1_txfm.h" -#include "av1/common/x86/av1_txfm_sse4.h" - -void av1_round_shift_array_sse4_1(int32_t *arr, int size, int bit) { - __m128i *const vec = (__m128i *)arr; - const int vec_size = size >> 2; - av1_round_shift_array_32_sse4_1(vec, vec, vec_size, bit); -} diff --git a/third_party/aom/av1/common/x86/av1_txfm_sse4.h b/third_party/aom/av1/common/x86/av1_txfm_sse4.h deleted file mode 100644 index 6cad821b1..000000000 --- a/third_party/aom/av1/common/x86/av1_txfm_sse4.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_ -#define AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_ - -#include <smmintrin.h> - -#ifdef __cplusplus -extern "C" { -#endif - -static INLINE __m128i av1_round_shift_32_sse4_1(__m128i vec, int bit) { - __m128i tmp, round; - round = _mm_set1_epi32(1 << (bit - 1)); - tmp = _mm_add_epi32(vec, round); - return _mm_srai_epi32(tmp, bit); -} - -static INLINE void av1_round_shift_array_32_sse4_1(__m128i *input, - __m128i *output, - const int size, - const int bit) { - if (bit > 0) { - int i; - for (i = 0; i < size; i++) { - output[i] = av1_round_shift_32_sse4_1(input[i], bit); - } - } else { - int i; - for (i = 0; i < size; i++) { - output[i] = _mm_slli_epi32(input[i], -bit); - } - } -} - -static INLINE void av1_round_shift_rect_array_32_sse4_1(__m128i *input, - __m128i *output, - const int size, - const int bit, - const int val) { - const __m128i sqrt2 = _mm_set1_epi32(val); - if (bit > 0) { - int i; - for (i = 0; i < size; i++) { - const __m128i r0 = av1_round_shift_32_sse4_1(input[i], bit); - const __m128i r1 = _mm_mullo_epi32(sqrt2, r0); - output[i] = av1_round_shift_32_sse4_1(r1, NewSqrt2Bits); - } - } else { - int i; - for (i = 0; i < size; i++) { - const __m128i r0 = _mm_slli_epi32(input[i], -bit); - const __m128i r1 = _mm_mullo_epi32(sqrt2, r0); - output[i] = av1_round_shift_32_sse4_1(r1, NewSqrt2Bits); - } - } -} - -#ifdef __cplusplus -} -#endif - -#endif // AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_ diff --git a/third_party/aom/av1/common/x86/cfl_avx2.c b/third_party/aom/av1/common/x86/cfl_avx2.c deleted file mode 100644 index a8bfdcce6..000000000 --- a/third_party/aom/av1/common/x86/cfl_avx2.c +++ /dev/null @@ -1,491 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#include <immintrin.h> - -#include "config/av1_rtcd.h" - -#include "av1/common/cfl.h" - -#include "av1/common/x86/cfl_simd.h" - -#define CFL_GET_SUBSAMPLE_FUNCTION_AVX2(sub, bd) \ - CFL_SUBSAMPLE(avx2, sub, bd, 32, 32) \ - CFL_SUBSAMPLE(avx2, sub, bd, 32, 16) \ - CFL_SUBSAMPLE(avx2, sub, bd, 32, 8) \ - cfl_subsample_##bd##_fn cfl_get_luma_subsampling_##sub##_##bd##_avx2( \ - TX_SIZE tx_size) { \ - static const cfl_subsample_##bd##_fn subfn_##sub[TX_SIZES_ALL] = { \ - subsample_##bd##_##sub##_4x4_ssse3, /* 4x4 */ \ - subsample_##bd##_##sub##_8x8_ssse3, /* 8x8 */ \ - subsample_##bd##_##sub##_16x16_ssse3, /* 16x16 */ \ - subsample_##bd##_##sub##_32x32_avx2, /* 32x32 */ \ - cfl_subsample_##bd##_null, /* 64x64 (invalid CFL size) */ \ - subsample_##bd##_##sub##_4x8_ssse3, /* 4x8 */ \ - subsample_##bd##_##sub##_8x4_ssse3, /* 8x4 */ \ - subsample_##bd##_##sub##_8x16_ssse3, /* 8x16 */ \ - subsample_##bd##_##sub##_16x8_ssse3, /* 16x8 */ \ - subsample_##bd##_##sub##_16x32_ssse3, /* 16x32 */ \ - subsample_##bd##_##sub##_32x16_avx2, /* 32x16 */ \ - cfl_subsample_##bd##_null, /* 32x64 (invalid CFL size) */ \ - cfl_subsample_##bd##_null, /* 64x32 (invalid CFL size) */ \ - subsample_##bd##_##sub##_4x16_ssse3, /* 4x16 */ \ - subsample_##bd##_##sub##_16x4_ssse3, /* 16x4 */ \ - subsample_##bd##_##sub##_8x32_ssse3, /* 8x32 */ \ - subsample_##bd##_##sub##_32x8_avx2, /* 32x8 */ \ - cfl_subsample_##bd##_null, /* 16x64 (invalid CFL size) */ \ - cfl_subsample_##bd##_null, /* 64x16 (invalid CFL size) */ \ - }; \ - return subfn_##sub[tx_size]; \ - } - -/** - * Adds 4 pixels (in a 2x2 grid) and multiplies them by 2. Resulting in a more - * precise version of a box filter 4:2:0 pixel subsampling in Q3. - * - * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the - * active area is specified using width and height. - * - * Note: We don't need to worry about going over the active area, as long as we - * stay inside the CfL prediction buffer. - * - * Note: For 4:2:0 luma subsampling, the width will never be greater than 16. - */ -static void cfl_luma_subsampling_420_lbd_avx2(const uint8_t *input, - int input_stride, - uint16_t *pred_buf_q3, int width, - int height) { - (void)width; // Forever 32 - const __m256i twos = _mm256_set1_epi8(2); // Thirty two twos - const int luma_stride = input_stride << 1; - __m256i *row = (__m256i *)pred_buf_q3; - const __m256i *row_end = row + (height >> 1) * CFL_BUF_LINE_I256; - do { - __m256i top = _mm256_loadu_si256((__m256i *)input); - __m256i bot = _mm256_loadu_si256((__m256i *)(input + input_stride)); - - __m256i top_16x16 = _mm256_maddubs_epi16(top, twos); - __m256i bot_16x16 = _mm256_maddubs_epi16(bot, twos); - __m256i sum_16x16 = _mm256_add_epi16(top_16x16, bot_16x16); - - _mm256_storeu_si256(row, sum_16x16); - - input += luma_stride; - } while ((row += CFL_BUF_LINE_I256) < row_end); -} - -CFL_GET_SUBSAMPLE_FUNCTION_AVX2(420, lbd) - -/** - * Adds 2 pixels (in a 2x1 grid) and multiplies them by 4. Resulting in a more - * precise version of a box filter 4:2:2 pixel subsampling in Q3. - * - * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the - * active area is specified using width and height. - * - * Note: We don't need to worry about going over the active area, as long as we - * stay inside the CfL prediction buffer. - */ -static void cfl_luma_subsampling_422_lbd_avx2(const uint8_t *input, - int input_stride, - uint16_t *pred_buf_q3, int width, - int height) { - (void)width; // Forever 32 - const __m256i fours = _mm256_set1_epi8(4); // Thirty two fours - __m256i *row = (__m256i *)pred_buf_q3; - const __m256i *row_end = row + height * CFL_BUF_LINE_I256; - do { - __m256i top = _mm256_loadu_si256((__m256i *)input); - __m256i top_16x16 = _mm256_maddubs_epi16(top, fours); - _mm256_storeu_si256(row, top_16x16); - input += input_stride; - } while ((row += CFL_BUF_LINE_I256) < row_end); -} - -CFL_GET_SUBSAMPLE_FUNCTION_AVX2(422, lbd) - -/** - * Multiplies the pixels by 8 (scaling in Q3). The AVX2 subsampling is only - * performed on block of width 32. - * - * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the - * active area is specified using width and height. - * - * Note: We don't need to worry about going over the active area, as long as we - * stay inside the CfL prediction buffer. - */ -static void cfl_luma_subsampling_444_lbd_avx2(const uint8_t *input, - int input_stride, - uint16_t *pred_buf_q3, int width, - int height) { - (void)width; // Forever 32 - __m256i *row = (__m256i *)pred_buf_q3; - const __m256i *row_end = row + height * CFL_BUF_LINE_I256; - const __m256i zeros = _mm256_setzero_si256(); - do { - __m256i top = _mm256_loadu_si256((__m256i *)input); - top = _mm256_permute4x64_epi64(top, _MM_SHUFFLE(3, 1, 2, 0)); - - __m256i row_lo = _mm256_unpacklo_epi8(top, zeros); - row_lo = _mm256_slli_epi16(row_lo, 3); - __m256i row_hi = _mm256_unpackhi_epi8(top, zeros); - row_hi = _mm256_slli_epi16(row_hi, 3); - - _mm256_storeu_si256(row, row_lo); - _mm256_storeu_si256(row + 1, row_hi); - - input += input_stride; - } while ((row += CFL_BUF_LINE_I256) < row_end); -} - -CFL_GET_SUBSAMPLE_FUNCTION_AVX2(444, lbd) - -/** - * Adds 4 pixels (in a 2x2 grid) and multiplies them by 2. Resulting in a more - * precise version of a box filter 4:2:0 pixel subsampling in Q3. - * - * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the - * active area is specified using width and height. - * - * Note: We don't need to worry about going over the active area, as long as we - * stay inside the CfL prediction buffer. - * - * Note: For 4:2:0 luma subsampling, the width will never be greater than 16. - */ -static void cfl_luma_subsampling_420_hbd_avx2(const uint16_t *input, - int input_stride, - uint16_t *pred_buf_q3, int width, - int height) { - (void)width; // Forever 32 - const int luma_stride = input_stride << 1; - __m256i *row = (__m256i *)pred_buf_q3; - const __m256i *row_end = row + (height >> 1) * CFL_BUF_LINE_I256; - do { - __m256i top = _mm256_loadu_si256((__m256i *)input); - __m256i bot = _mm256_loadu_si256((__m256i *)(input + input_stride)); - __m256i sum = _mm256_add_epi16(top, bot); - - __m256i top_1 = _mm256_loadu_si256((__m256i *)(input + 16)); - __m256i bot_1 = _mm256_loadu_si256((__m256i *)(input + 16 + input_stride)); - __m256i sum_1 = _mm256_add_epi16(top_1, bot_1); - - __m256i hsum = _mm256_hadd_epi16(sum, sum_1); - hsum = _mm256_permute4x64_epi64(hsum, _MM_SHUFFLE(3, 1, 2, 0)); - hsum = _mm256_add_epi16(hsum, hsum); - - _mm256_storeu_si256(row, hsum); - - input += luma_stride; - } while ((row += CFL_BUF_LINE_I256) < row_end); -} - -CFL_GET_SUBSAMPLE_FUNCTION_AVX2(420, hbd) - -/** - * Adds 2 pixels (in a 2x1 grid) and multiplies them by 4. Resulting in a more - * precise version of a box filter 4:2:2 pixel subsampling in Q3. - * - * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the - * active area is specified using width and height. - * - * Note: We don't need to worry about going over the active area, as long as we - * stay inside the CfL prediction buffer. - * - */ -static void cfl_luma_subsampling_422_hbd_avx2(const uint16_t *input, - int input_stride, - uint16_t *pred_buf_q3, int width, - int height) { - (void)width; // Forever 32 - __m256i *row = (__m256i *)pred_buf_q3; - const __m256i *row_end = row + height * CFL_BUF_LINE_I256; - do { - __m256i top = _mm256_loadu_si256((__m256i *)input); - __m256i top_1 = _mm256_loadu_si256((__m256i *)(input + 16)); - __m256i hsum = _mm256_hadd_epi16(top, top_1); - hsum = _mm256_permute4x64_epi64(hsum, _MM_SHUFFLE(3, 1, 2, 0)); - hsum = _mm256_slli_epi16(hsum, 2); - - _mm256_storeu_si256(row, hsum); - - input += input_stride; - } while ((row += CFL_BUF_LINE_I256) < row_end); -} - -CFL_GET_SUBSAMPLE_FUNCTION_AVX2(422, hbd) - -static void cfl_luma_subsampling_444_hbd_avx2(const uint16_t *input, - int input_stride, - uint16_t *pred_buf_q3, int width, - int height) { - (void)width; // Forever 32 - __m256i *row = (__m256i *)pred_buf_q3; - const __m256i *row_end = row + height * CFL_BUF_LINE_I256; - do { - __m256i top = _mm256_loadu_si256((__m256i *)input); - __m256i top_1 = _mm256_loadu_si256((__m256i *)(input + 16)); - _mm256_storeu_si256(row, _mm256_slli_epi16(top, 3)); - _mm256_storeu_si256(row + 1, _mm256_slli_epi16(top_1, 3)); - input += input_stride; - } while ((row += CFL_BUF_LINE_I256) < row_end); -} - -CFL_GET_SUBSAMPLE_FUNCTION_AVX2(444, hbd) - -static INLINE __m256i predict_unclipped(const __m256i *input, __m256i alpha_q12, - __m256i alpha_sign, __m256i dc_q0) { - __m256i ac_q3 = _mm256_loadu_si256(input); - __m256i ac_sign = _mm256_sign_epi16(alpha_sign, ac_q3); - __m256i scaled_luma_q0 = - _mm256_mulhrs_epi16(_mm256_abs_epi16(ac_q3), alpha_q12); - scaled_luma_q0 = _mm256_sign_epi16(scaled_luma_q0, ac_sign); - return _mm256_add_epi16(scaled_luma_q0, dc_q0); -} - -static INLINE void cfl_predict_lbd_avx2(const int16_t *pred_buf_q3, - uint8_t *dst, int dst_stride, - int alpha_q3, int width, int height) { - (void)width; - const __m256i alpha_sign = _mm256_set1_epi16(alpha_q3); - const __m256i alpha_q12 = _mm256_slli_epi16(_mm256_abs_epi16(alpha_sign), 9); - const __m256i dc_q0 = _mm256_set1_epi16(*dst); - __m256i *row = (__m256i *)pred_buf_q3; - const __m256i *row_end = row + height * CFL_BUF_LINE_I256; - - do { - __m256i res = predict_unclipped(row, alpha_q12, alpha_sign, dc_q0); - __m256i next = predict_unclipped(row + 1, alpha_q12, alpha_sign, dc_q0); - res = _mm256_packus_epi16(res, next); - res = _mm256_permute4x64_epi64(res, _MM_SHUFFLE(3, 1, 2, 0)); - _mm256_storeu_si256((__m256i *)dst, res); - dst += dst_stride; - } while ((row += CFL_BUF_LINE_I256) < row_end); -} - -CFL_PREDICT_X(avx2, 32, 8, lbd); -CFL_PREDICT_X(avx2, 32, 16, lbd); -CFL_PREDICT_X(avx2, 32, 32, lbd); - -cfl_predict_lbd_fn get_predict_lbd_fn_avx2(TX_SIZE tx_size) { - static const cfl_predict_lbd_fn pred[TX_SIZES_ALL] = { - predict_lbd_4x4_ssse3, /* 4x4 */ - predict_lbd_8x8_ssse3, /* 8x8 */ - predict_lbd_16x16_ssse3, /* 16x16 */ - predict_lbd_32x32_avx2, /* 32x32 */ - cfl_predict_lbd_null, /* 64x64 (invalid CFL size) */ - predict_lbd_4x8_ssse3, /* 4x8 */ - predict_lbd_8x4_ssse3, /* 8x4 */ - predict_lbd_8x16_ssse3, /* 8x16 */ - predict_lbd_16x8_ssse3, /* 16x8 */ - predict_lbd_16x32_ssse3, /* 16x32 */ - predict_lbd_32x16_avx2, /* 32x16 */ - cfl_predict_lbd_null, /* 32x64 (invalid CFL size) */ - cfl_predict_lbd_null, /* 64x32 (invalid CFL size) */ - predict_lbd_4x16_ssse3, /* 4x16 */ - predict_lbd_16x4_ssse3, /* 16x4 */ - predict_lbd_8x32_ssse3, /* 8x32 */ - predict_lbd_32x8_avx2, /* 32x8 */ - cfl_predict_lbd_null, /* 16x64 (invalid CFL size) */ - cfl_predict_lbd_null, /* 64x16 (invalid CFL size) */ - }; - // Modulo TX_SIZES_ALL to ensure that an attacker won't be able to index the - // function pointer array out of bounds. - return pred[tx_size % TX_SIZES_ALL]; -} - -static __m256i highbd_max_epi16(int bd) { - const __m256i neg_one = _mm256_set1_epi16(-1); - // (1 << bd) - 1 => -(-1 << bd) -1 => -1 - (-1 << bd) => -1 ^ (-1 << bd) - return _mm256_xor_si256(_mm256_slli_epi16(neg_one, bd), neg_one); -} - -static __m256i highbd_clamp_epi16(__m256i u, __m256i zero, __m256i max) { - return _mm256_max_epi16(_mm256_min_epi16(u, max), zero); -} - -static INLINE void cfl_predict_hbd_avx2(const int16_t *pred_buf_q3, - uint16_t *dst, int dst_stride, - int alpha_q3, int bd, int width, - int height) { - // Use SSSE3 version for smaller widths - assert(width == 16 || width == 32); - const __m256i alpha_sign = _mm256_set1_epi16(alpha_q3); - const __m256i alpha_q12 = _mm256_slli_epi16(_mm256_abs_epi16(alpha_sign), 9); - const __m256i dc_q0 = _mm256_loadu_si256((__m256i *)dst); - const __m256i max = highbd_max_epi16(bd); - - __m256i *row = (__m256i *)pred_buf_q3; - const __m256i *row_end = row + height * CFL_BUF_LINE_I256; - do { - const __m256i res = predict_unclipped(row, alpha_q12, alpha_sign, dc_q0); - _mm256_storeu_si256((__m256i *)dst, - highbd_clamp_epi16(res, _mm256_setzero_si256(), max)); - if (width == 32) { - const __m256i res_1 = - predict_unclipped(row + 1, alpha_q12, alpha_sign, dc_q0); - _mm256_storeu_si256( - (__m256i *)(dst + 16), - highbd_clamp_epi16(res_1, _mm256_setzero_si256(), max)); - } - dst += dst_stride; - } while ((row += CFL_BUF_LINE_I256) < row_end); -} - -CFL_PREDICT_X(avx2, 16, 4, hbd) -CFL_PREDICT_X(avx2, 16, 8, hbd) -CFL_PREDICT_X(avx2, 16, 16, hbd) -CFL_PREDICT_X(avx2, 16, 32, hbd) -CFL_PREDICT_X(avx2, 32, 8, hbd) -CFL_PREDICT_X(avx2, 32, 16, hbd) -CFL_PREDICT_X(avx2, 32, 32, hbd) - -cfl_predict_hbd_fn get_predict_hbd_fn_avx2(TX_SIZE tx_size) { - static const cfl_predict_hbd_fn pred[TX_SIZES_ALL] = { - predict_hbd_4x4_ssse3, /* 4x4 */ - predict_hbd_8x8_ssse3, /* 8x8 */ - predict_hbd_16x16_avx2, /* 16x16 */ - predict_hbd_32x32_avx2, /* 32x32 */ - cfl_predict_hbd_null, /* 64x64 (invalid CFL size) */ - predict_hbd_4x8_ssse3, /* 4x8 */ - predict_hbd_8x4_ssse3, /* 8x4 */ - predict_hbd_8x16_ssse3, /* 8x16 */ - predict_hbd_16x8_avx2, /* 16x8 */ - predict_hbd_16x32_avx2, /* 16x32 */ - predict_hbd_32x16_avx2, /* 32x16 */ - cfl_predict_hbd_null, /* 32x64 (invalid CFL size) */ - cfl_predict_hbd_null, /* 64x32 (invalid CFL size) */ - predict_hbd_4x16_ssse3, /* 4x16 */ - predict_hbd_16x4_avx2, /* 16x4 */ - predict_hbd_8x32_ssse3, /* 8x32 */ - predict_hbd_32x8_avx2, /* 32x8 */ - cfl_predict_hbd_null, /* 16x64 (invalid CFL size) */ - cfl_predict_hbd_null, /* 64x16 (invalid CFL size) */ - }; - // Modulo TX_SIZES_ALL to ensure that an attacker won't be able to index the - // function pointer array out of bounds. - return pred[tx_size % TX_SIZES_ALL]; -} - -// Returns a vector where all the (32-bits) elements are the sum of all the -// lanes in a. -static INLINE __m256i fill_sum_epi32(__m256i a) { - // Given that a == [A, B, C, D, E, F, G, H] - a = _mm256_hadd_epi32(a, a); - // Given that A' == A + B, C' == C + D, E' == E + F, G' == G + H - // a == [A', C', A', C', E', G', E', G'] - a = _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0)); - // a == [A', C', E', G', A', C', E', G'] - a = _mm256_hadd_epi32(a, a); - // Given that A'' == A' + C' and E'' == E' + G' - // a == [A'', E'', A'', E'', A'', E'', A'', E''] - return _mm256_hadd_epi32(a, a); - // Given that A''' == A'' + E'' - // a == [A''', A''', A''', A''', A''', A''', A''', A'''] -} - -static INLINE __m256i _mm256_addl_epi16(__m256i a) { - return _mm256_add_epi32(_mm256_unpacklo_epi16(a, _mm256_setzero_si256()), - _mm256_unpackhi_epi16(a, _mm256_setzero_si256())); -} - -static INLINE void subtract_average_avx2(const uint16_t *src_ptr, - int16_t *dst_ptr, int width, - int height, int round_offset, - int num_pel_log2) { - // Use SSE2 version for smaller widths - assert(width == 16 || width == 32); - - const __m256i *src = (__m256i *)src_ptr; - const __m256i *const end = src + height * CFL_BUF_LINE_I256; - // To maximize usage of the AVX2 registers, we sum two rows per loop - // iteration - const int step = 2 * CFL_BUF_LINE_I256; - - __m256i sum = _mm256_setzero_si256(); - // For width 32, we use a second sum accumulator to reduce accumulator - // dependencies in the loop. - __m256i sum2; - if (width == 32) sum2 = _mm256_setzero_si256(); - - do { - // Add top row to the bottom row - __m256i l0 = _mm256_add_epi16(_mm256_loadu_si256(src), - _mm256_loadu_si256(src + CFL_BUF_LINE_I256)); - sum = _mm256_add_epi32(sum, _mm256_addl_epi16(l0)); - if (width == 32) { /* Don't worry, this if it gets optimized out. */ - // Add the second part of the top row to the second part of the bottom row - __m256i l1 = - _mm256_add_epi16(_mm256_loadu_si256(src + 1), - _mm256_loadu_si256(src + 1 + CFL_BUF_LINE_I256)); - sum2 = _mm256_add_epi32(sum2, _mm256_addl_epi16(l1)); - } - src += step; - } while (src < end); - // Combine both sum accumulators - if (width == 32) sum = _mm256_add_epi32(sum, sum2); - - __m256i fill = fill_sum_epi32(sum); - - __m256i avg_epi16 = _mm256_srli_epi32( - _mm256_add_epi32(fill, _mm256_set1_epi32(round_offset)), num_pel_log2); - avg_epi16 = _mm256_packs_epi32(avg_epi16, avg_epi16); - - // Store and subtract loop - src = (__m256i *)src_ptr; - __m256i *dst = (__m256i *)dst_ptr; - do { - _mm256_storeu_si256(dst, - _mm256_sub_epi16(_mm256_loadu_si256(src), avg_epi16)); - if (width == 32) { - _mm256_storeu_si256( - dst + 1, _mm256_sub_epi16(_mm256_loadu_si256(src + 1), avg_epi16)); - } - src += CFL_BUF_LINE_I256; - dst += CFL_BUF_LINE_I256; - } while (src < end); -} - -// Declare wrappers for AVX2 sizes -CFL_SUB_AVG_X(avx2, 16, 4, 32, 6) -CFL_SUB_AVG_X(avx2, 16, 8, 64, 7) -CFL_SUB_AVG_X(avx2, 16, 16, 128, 8) -CFL_SUB_AVG_X(avx2, 16, 32, 256, 9) -CFL_SUB_AVG_X(avx2, 32, 8, 128, 8) -CFL_SUB_AVG_X(avx2, 32, 16, 256, 9) -CFL_SUB_AVG_X(avx2, 32, 32, 512, 10) - -// Based on the observation that for small blocks AVX2 does not outperform -// SSE2, we call the SSE2 code for block widths 4 and 8. -cfl_subtract_average_fn get_subtract_average_fn_avx2(TX_SIZE tx_size) { - static const cfl_subtract_average_fn sub_avg[TX_SIZES_ALL] = { - subtract_average_4x4_sse2, /* 4x4 */ - subtract_average_8x8_sse2, /* 8x8 */ - subtract_average_16x16_avx2, /* 16x16 */ - subtract_average_32x32_avx2, /* 32x32 */ - cfl_subtract_average_null, /* 64x64 (invalid CFL size) */ - subtract_average_4x8_sse2, /* 4x8 */ - subtract_average_8x4_sse2, /* 8x4 */ - subtract_average_8x16_sse2, /* 8x16 */ - subtract_average_16x8_avx2, /* 16x8 */ - subtract_average_16x32_avx2, /* 16x32 */ - subtract_average_32x16_avx2, /* 32x16 */ - cfl_subtract_average_null, /* 32x64 (invalid CFL size) */ - cfl_subtract_average_null, /* 64x32 (invalid CFL size) */ - subtract_average_4x16_sse2, /* 4x16 */ - subtract_average_16x4_avx2, /* 16x4 */ - subtract_average_8x32_sse2, /* 8x32 */ - subtract_average_32x8_avx2, /* 32x8 */ - cfl_subtract_average_null, /* 16x64 (invalid CFL size) */ - cfl_subtract_average_null, /* 64x16 (invalid CFL size) */ - }; - // Modulo TX_SIZES_ALL to ensure that an attacker won't be able to - // index the function pointer array out of bounds. - return sub_avg[tx_size % TX_SIZES_ALL]; -} diff --git a/third_party/aom/av1/common/x86/cfl_simd.h b/third_party/aom/av1/common/x86/cfl_simd.h deleted file mode 100644 index 3b342cd4e..000000000 --- a/third_party/aom/av1/common/x86/cfl_simd.h +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_X86_CFL_SIMD_H_ -#define AOM_AV1_COMMON_X86_CFL_SIMD_H_ - -#include "av1/common/blockd.h" - -// SSSE3 version is optimal for with == 4, we reuse them in AVX2 -void subsample_lbd_420_4x4_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_420_4x8_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_420_4x16_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); - -// SSSE3 version is optimal for with == 8, we reuse it in AVX2 -void subsample_lbd_420_8x4_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_420_8x8_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_420_8x16_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_420_8x32_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); - -// SSSE3 version is optimal for with == 16, we reuse it in AVX2 -void subsample_lbd_420_16x4_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_420_16x8_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_420_16x16_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_420_16x32_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); - -// SSSE3 version is optimal for with == 4, we reuse them in AVX2 -void subsample_lbd_422_4x4_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_422_4x8_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_422_4x16_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); - -// SSSE3 version is optimal for with == 8, we reuse it in AVX2 -void subsample_lbd_422_8x4_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_422_8x8_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_422_8x16_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_422_8x32_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); - -// SSSE3 version is optimal for with == 16, we reuse it in AVX2 -void subsample_lbd_422_16x4_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_422_16x8_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_422_16x16_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_422_16x32_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); - -// SSSE3 version is optimal for with == 4, we reuse them in AVX2 -void subsample_lbd_444_4x4_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_444_4x8_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_444_4x16_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); - -// SSSE3 version is optimal for with == 8, we reuse it in AVX2 -void subsample_lbd_444_8x4_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_444_8x8_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_444_8x16_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_444_8x32_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); - -// SSSE3 version is optimal for with == 16, we reuse it in AVX2 -void subsample_lbd_444_16x4_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_444_16x8_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_444_16x16_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); -void subsample_lbd_444_16x32_ssse3(const uint8_t *input, int input_stride, - uint16_t *output_q3); - -void subsample_hbd_420_4x4_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_420_4x8_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_420_4x16_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); - -// SSSE3 version is optimal for with == 8, we reuse it in AVX2 -void subsample_hbd_420_8x4_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_420_8x8_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_420_8x16_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_420_8x32_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); - -// SSSE3 version is faster for with == 16, we reuse it in AVX2 -void subsample_hbd_420_16x4_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_420_16x8_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_420_16x16_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_420_16x32_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); - -void subsample_hbd_422_4x4_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_422_4x8_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_422_4x16_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); - -// SSSE3 version is optimal for with == 8, we reuse it in AVX2 -void subsample_hbd_422_8x4_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_422_8x8_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_422_8x16_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_422_8x32_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); - -// SSSE3 version is faster for with == 16, we reuse it in AVX2 -void subsample_hbd_422_16x4_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_422_16x8_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_422_16x16_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_422_16x32_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); - -void subsample_hbd_444_4x4_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_444_4x8_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_444_4x16_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); - -// SSSE3 version is optimal for with == 8, we reuse it in AVX2 -void subsample_hbd_444_8x4_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_444_8x8_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_444_8x16_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_444_8x32_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); - -// SSSE3 version is faster for with == 16, we reuse it in AVX2 -void subsample_hbd_444_16x4_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_444_16x8_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_444_16x16_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); -void subsample_hbd_444_16x32_ssse3(const uint16_t *input, int input_stride, - uint16_t *output_q3); - -// SSE2 version is optimal for with == 4, we reuse them in AVX2 -void subtract_average_4x4_sse2(const uint16_t *src, int16_t *dst); -void subtract_average_4x8_sse2(const uint16_t *src, int16_t *dst); -void subtract_average_4x16_sse2(const uint16_t *src, int16_t *dst); - -// SSE2 version is optimal for with == 8, we reuse them in AVX2 -void subtract_average_8x4_sse2(const uint16_t *src, int16_t *dst); -void subtract_average_8x8_sse2(const uint16_t *src, int16_t *dst); -void subtract_average_8x16_sse2(const uint16_t *src, int16_t *dst); -void subtract_average_8x32_sse2(const uint16_t *src, int16_t *dst); - -void predict_lbd_4x4_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, - int dst_stride, int alpha_q3); -void predict_lbd_4x8_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, - int dst_stride, int alpha_q3); -void predict_lbd_4x16_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, - int dst_stride, int alpha_q3); - -void predict_lbd_8x4_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, - int dst_stride, int alpha_q3); -void predict_lbd_8x8_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, - int dst_stride, int alpha_q3); -void predict_lbd_8x16_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, - int dst_stride, int alpha_q3); -void predict_lbd_8x32_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, - int dst_stride, int alpha_q3); - -void predict_lbd_16x4_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, - int dst_stride, int alpha_q3); -void predict_lbd_16x8_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, - int dst_stride, int alpha_q3); -void predict_lbd_16x16_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, - int dst_stride, int alpha_q3); -void predict_lbd_16x32_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, - int dst_stride, int alpha_q3); - -void predict_hbd_4x4_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, - int dst_stride, int alpha_q3, int bd); -void predict_hbd_4x8_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, - int dst_stride, int alpha_q3, int bd); -void predict_hbd_4x16_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, - int dst_stride, int alpha_q3, int bd); - -void predict_hbd_8x4_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, - int dst_stride, int alpha_q3, int bd); -void predict_hbd_8x8_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, - int dst_stride, int alpha_q3, int bd); -void predict_hbd_8x16_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, - int dst_stride, int alpha_q3, int bd); -void predict_hbd_8x32_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, - int dst_stride, int alpha_q3, int bd); - -void predict_hbd_16x4_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, - int dst_stride, int alpha_q3, int bd); -void predict_hbd_16x8_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, - int dst_stride, int alpha_q3, int bd); -void predict_hbd_16x16_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, - int dst_stride, int alpha_q3, int bd); -void predict_hbd_16x32_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, - int dst_stride, int alpha_q3, int bd); - -#endif // AOM_AV1_COMMON_X86_CFL_SIMD_H_ diff --git a/third_party/aom/av1/common/x86/cfl_sse2.c b/third_party/aom/av1/common/x86/cfl_sse2.c deleted file mode 100644 index 4783fe098..000000000 --- a/third_party/aom/av1/common/x86/cfl_sse2.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <emmintrin.h> - -#include "av1/common/cfl.h" -#include "config/av1_rtcd.h" - -static INLINE __m128i fill_sum_epi32(__m128i l0) { - l0 = _mm_add_epi32(l0, _mm_shuffle_epi32(l0, _MM_SHUFFLE(1, 0, 3, 2))); - return _mm_add_epi32(l0, _mm_shuffle_epi32(l0, _MM_SHUFFLE(2, 3, 0, 1))); -} - -static INLINE void subtract_average_sse2(const uint16_t *src_ptr, - int16_t *dst_ptr, int width, - int height, int round_offset, - int num_pel_log2) { - const __m128i zeros = _mm_setzero_si128(); - const __m128i round_offset_epi32 = _mm_set1_epi32(round_offset); - const __m128i *src = (__m128i *)src_ptr; - const __m128i *const end = src + height * CFL_BUF_LINE_I128; - const int step = CFL_BUF_LINE_I128 * (1 + (width == 8) + 3 * (width == 4)); - - __m128i sum = zeros; - do { - __m128i l0; - if (width == 4) { - l0 = _mm_add_epi16(_mm_loadl_epi64(src), - _mm_loadl_epi64(src + CFL_BUF_LINE_I128)); - __m128i l1 = _mm_add_epi16(_mm_loadl_epi64(src + 2 * CFL_BUF_LINE_I128), - _mm_loadl_epi64(src + 3 * CFL_BUF_LINE_I128)); - sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros), - _mm_unpacklo_epi16(l1, zeros))); - } else { - if (width == 8) { - l0 = _mm_add_epi16(_mm_loadu_si128(src), - _mm_loadu_si128(src + CFL_BUF_LINE_I128)); - } else { - l0 = _mm_add_epi16(_mm_loadu_si128(src), _mm_loadu_si128(src + 1)); - } - sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros), - _mm_unpackhi_epi16(l0, zeros))); - if (width == 32) { - l0 = _mm_add_epi16(_mm_loadu_si128(src + 2), _mm_loadu_si128(src + 3)); - sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros), - _mm_unpackhi_epi16(l0, zeros))); - } - } - src += step; - } while (src < end); - - sum = fill_sum_epi32(sum); - - __m128i avg_epi16 = - _mm_srli_epi32(_mm_add_epi32(sum, round_offset_epi32), num_pel_log2); - avg_epi16 = _mm_packs_epi32(avg_epi16, avg_epi16); - - src = (__m128i *)src_ptr; - __m128i *dst = (__m128i *)dst_ptr; - do { - if (width == 4) { - _mm_storel_epi64(dst, _mm_sub_epi16(_mm_loadl_epi64(src), avg_epi16)); - } else { - _mm_storeu_si128(dst, _mm_sub_epi16(_mm_loadu_si128(src), avg_epi16)); - if (width > 8) { - _mm_storeu_si128(dst + 1, - _mm_sub_epi16(_mm_loadu_si128(src + 1), avg_epi16)); - if (width == 32) { - _mm_storeu_si128(dst + 2, - _mm_sub_epi16(_mm_loadu_si128(src + 2), avg_epi16)); - _mm_storeu_si128(dst + 3, - _mm_sub_epi16(_mm_loadu_si128(src + 3), avg_epi16)); - } - } - } - src += CFL_BUF_LINE_I128; - dst += CFL_BUF_LINE_I128; - } while (src < end); -} - -CFL_SUB_AVG_FN(sse2) diff --git a/third_party/aom/av1/common/x86/cfl_ssse3.c b/third_party/aom/av1/common/x86/cfl_ssse3.c deleted file mode 100644 index bbf007295..000000000 --- a/third_party/aom/av1/common/x86/cfl_ssse3.c +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <tmmintrin.h> - -#include "config/av1_rtcd.h" - -#include "av1/common/cfl.h" - -#include "av1/common/x86/cfl_simd.h" - -// Load 32-bit integer from memory into the first element of dst. -static INLINE __m128i _mm_loadh_epi32(__m128i const *mem_addr) { - return _mm_cvtsi32_si128(*((int *)mem_addr)); -} - -// Store 32-bit integer from the first element of a into memory. -static INLINE void _mm_storeh_epi32(__m128i const *mem_addr, __m128i a) { - *((int *)mem_addr) = _mm_cvtsi128_si32(a); -} - -/** - * Adds 4 pixels (in a 2x2 grid) and multiplies them by 2. Resulting in a more - * precise version of a box filter 4:2:0 pixel subsampling in Q3. - * - * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the - * active area is specified using width and height. - * - * Note: We don't need to worry about going over the active area, as long as we - * stay inside the CfL prediction buffer. - */ -static INLINE void cfl_luma_subsampling_420_lbd_ssse3(const uint8_t *input, - int input_stride, - uint16_t *pred_buf_q3, - int width, int height) { - const __m128i twos = _mm_set1_epi8(2); - __m128i *pred_buf_m128i = (__m128i *)pred_buf_q3; - const __m128i *end = pred_buf_m128i + (height >> 1) * CFL_BUF_LINE_I128; - const int luma_stride = input_stride << 1; - do { - if (width == 4) { - __m128i top = _mm_loadh_epi32((__m128i *)input); - top = _mm_maddubs_epi16(top, twos); - __m128i bot = _mm_loadh_epi32((__m128i *)(input + input_stride)); - bot = _mm_maddubs_epi16(bot, twos); - const __m128i sum = _mm_add_epi16(top, bot); - _mm_storeh_epi32(pred_buf_m128i, sum); - } else if (width == 8) { - __m128i top = _mm_loadl_epi64((__m128i *)input); - top = _mm_maddubs_epi16(top, twos); - __m128i bot = _mm_loadl_epi64((__m128i *)(input + input_stride)); - bot = _mm_maddubs_epi16(bot, twos); - const __m128i sum = _mm_add_epi16(top, bot); - _mm_storel_epi64(pred_buf_m128i, sum); - } else { - __m128i top = _mm_loadu_si128((__m128i *)input); - top = _mm_maddubs_epi16(top, twos); - __m128i bot = _mm_loadu_si128((__m128i *)(input + input_stride)); - bot = _mm_maddubs_epi16(bot, twos); - const __m128i sum = _mm_add_epi16(top, bot); - _mm_storeu_si128(pred_buf_m128i, sum); - if (width == 32) { - __m128i top_1 = _mm_loadu_si128(((__m128i *)input) + 1); - __m128i bot_1 = - _mm_loadu_si128(((__m128i *)(input + input_stride)) + 1); - top_1 = _mm_maddubs_epi16(top_1, twos); - bot_1 = _mm_maddubs_epi16(bot_1, twos); - __m128i sum_1 = _mm_add_epi16(top_1, bot_1); - _mm_storeu_si128(pred_buf_m128i + 1, sum_1); - } - } - input += luma_stride; - pred_buf_m128i += CFL_BUF_LINE_I128; - } while (pred_buf_m128i < end); -} - -/** - * Adds 2 pixels (in a 2x1 grid) and multiplies them by 4. Resulting in a more - * precise version of a box filter 4:2:2 pixel subsampling in Q3. - * - * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the - * active area is specified using width and height. - * - * Note: We don't need to worry about going over the active area, as long as we - * stay inside the CfL prediction buffer. - */ -static INLINE void cfl_luma_subsampling_422_lbd_ssse3(const uint8_t *input, - int input_stride, - uint16_t *pred_buf_q3, - int width, int height) { - const __m128i fours = _mm_set1_epi8(4); - __m128i *pred_buf_m128i = (__m128i *)pred_buf_q3; - const __m128i *end = pred_buf_m128i + height * CFL_BUF_LINE_I128; - do { - if (width == 4) { - __m128i top = _mm_loadh_epi32((__m128i *)input); - top = _mm_maddubs_epi16(top, fours); - _mm_storeh_epi32(pred_buf_m128i, top); - } else if (width == 8) { - __m128i top = _mm_loadl_epi64((__m128i *)input); - top = _mm_maddubs_epi16(top, fours); - _mm_storel_epi64(pred_buf_m128i, top); - } else { - __m128i top = _mm_loadu_si128((__m128i *)input); - top = _mm_maddubs_epi16(top, fours); - _mm_storeu_si128(pred_buf_m128i, top); - if (width == 32) { - __m128i top_1 = _mm_loadu_si128(((__m128i *)input) + 1); - top_1 = _mm_maddubs_epi16(top_1, fours); - _mm_storeu_si128(pred_buf_m128i + 1, top_1); - } - } - input += input_stride; - pred_buf_m128i += CFL_BUF_LINE_I128; - } while (pred_buf_m128i < end); -} - -/** - * Multiplies the pixels by 8 (scaling in Q3). - * - * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the - * active area is specified using width and height. - * - * Note: We don't need to worry about going over the active area, as long as we - * stay inside the CfL prediction buffer. - */ -static INLINE void cfl_luma_subsampling_444_lbd_ssse3(const uint8_t *input, - int input_stride, - uint16_t *pred_buf_q3, - int width, int height) { - const __m128i zeros = _mm_setzero_si128(); - const int luma_stride = input_stride; - __m128i *pred_buf_m128i = (__m128i *)pred_buf_q3; - const __m128i *end = pred_buf_m128i + height * CFL_BUF_LINE_I128; - do { - if (width == 4) { - __m128i row = _mm_loadh_epi32((__m128i *)input); - row = _mm_unpacklo_epi8(row, zeros); - _mm_storel_epi64(pred_buf_m128i, _mm_slli_epi16(row, 3)); - } else if (width == 8) { - __m128i row = _mm_loadl_epi64((__m128i *)input); - row = _mm_unpacklo_epi8(row, zeros); - _mm_storeu_si128(pred_buf_m128i, _mm_slli_epi16(row, 3)); - } else { - __m128i row = _mm_loadu_si128((__m128i *)input); - const __m128i row_lo = _mm_unpacklo_epi8(row, zeros); - const __m128i row_hi = _mm_unpackhi_epi8(row, zeros); - _mm_storeu_si128(pred_buf_m128i, _mm_slli_epi16(row_lo, 3)); - _mm_storeu_si128(pred_buf_m128i + 1, _mm_slli_epi16(row_hi, 3)); - if (width == 32) { - __m128i row_1 = _mm_loadu_si128(((__m128i *)input) + 1); - const __m128i row_1_lo = _mm_unpacklo_epi8(row_1, zeros); - const __m128i row_1_hi = _mm_unpackhi_epi8(row_1, zeros); - _mm_storeu_si128(pred_buf_m128i + 2, _mm_slli_epi16(row_1_lo, 3)); - _mm_storeu_si128(pred_buf_m128i + 3, _mm_slli_epi16(row_1_hi, 3)); - } - } - input += luma_stride; - pred_buf_m128i += CFL_BUF_LINE_I128; - } while (pred_buf_m128i < end); -} - -/** - * Adds 4 pixels (in a 2x2 grid) and multiplies them by 2. Resulting in a more - * precise version of a box filter 4:2:0 pixel subsampling in Q3. - * - * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the - * active area is specified using width and height. - * - * Note: We don't need to worry about going over the active area, as long as we - * stay inside the CfL prediction buffer. - */ -static INLINE void cfl_luma_subsampling_420_hbd_ssse3(const uint16_t *input, - int input_stride, - uint16_t *pred_buf_q3, - int width, int height) { - const uint16_t *end = pred_buf_q3 + (height >> 1) * CFL_BUF_LINE; - const int luma_stride = input_stride << 1; - do { - if (width == 4) { - const __m128i top = _mm_loadl_epi64((__m128i *)input); - const __m128i bot = _mm_loadl_epi64((__m128i *)(input + input_stride)); - __m128i sum = _mm_add_epi16(top, bot); - sum = _mm_hadd_epi16(sum, sum); - *((int *)pred_buf_q3) = _mm_cvtsi128_si32(_mm_add_epi16(sum, sum)); - } else { - const __m128i top = _mm_loadu_si128((__m128i *)input); - const __m128i bot = _mm_loadu_si128((__m128i *)(input + input_stride)); - __m128i sum = _mm_add_epi16(top, bot); - if (width == 8) { - sum = _mm_hadd_epi16(sum, sum); - _mm_storel_epi64((__m128i *)pred_buf_q3, _mm_add_epi16(sum, sum)); - } else { - const __m128i top_1 = _mm_loadu_si128(((__m128i *)input) + 1); - const __m128i bot_1 = - _mm_loadu_si128(((__m128i *)(input + input_stride)) + 1); - sum = _mm_hadd_epi16(sum, _mm_add_epi16(top_1, bot_1)); - _mm_storeu_si128((__m128i *)pred_buf_q3, _mm_add_epi16(sum, sum)); - if (width == 32) { - const __m128i top_2 = _mm_loadu_si128(((__m128i *)input) + 2); - const __m128i bot_2 = - _mm_loadu_si128(((__m128i *)(input + input_stride)) + 2); - const __m128i top_3 = _mm_loadu_si128(((__m128i *)input) + 3); - const __m128i bot_3 = - _mm_loadu_si128(((__m128i *)(input + input_stride)) + 3); - const __m128i sum_2 = _mm_add_epi16(top_2, bot_2); - const __m128i sum_3 = _mm_add_epi16(top_3, bot_3); - __m128i next_sum = _mm_hadd_epi16(sum_2, sum_3); - _mm_storeu_si128(((__m128i *)pred_buf_q3) + 1, - _mm_add_epi16(next_sum, next_sum)); - } - } - } - input += luma_stride; - } while ((pred_buf_q3 += CFL_BUF_LINE) < end); -} - -/** - * Adds 2 pixels (in a 2x1 grid) and multiplies them by 4. Resulting in a more - * precise version of a box filter 4:2:2 pixel subsampling in Q3. - * - * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the - * active area is specified using width and height. - * - * Note: We don't need to worry about going over the active area, as long as we - * stay inside the CfL prediction buffer. - */ -static INLINE void cfl_luma_subsampling_422_hbd_ssse3(const uint16_t *input, - int input_stride, - uint16_t *pred_buf_q3, - int width, int height) { - __m128i *pred_buf_m128i = (__m128i *)pred_buf_q3; - const __m128i *end = pred_buf_m128i + height * CFL_BUF_LINE_I128; - do { - if (width == 4) { - const __m128i top = _mm_loadl_epi64((__m128i *)input); - const __m128i sum = _mm_slli_epi16(_mm_hadd_epi16(top, top), 2); - _mm_storeh_epi32(pred_buf_m128i, sum); - } else { - const __m128i top = _mm_loadu_si128((__m128i *)input); - if (width == 8) { - const __m128i sum = _mm_slli_epi16(_mm_hadd_epi16(top, top), 2); - _mm_storel_epi64(pred_buf_m128i, sum); - } else { - const __m128i top_1 = _mm_loadu_si128(((__m128i *)input) + 1); - const __m128i sum = _mm_slli_epi16(_mm_hadd_epi16(top, top_1), 2); - _mm_storeu_si128(pred_buf_m128i, sum); - if (width == 32) { - const __m128i top_2 = _mm_loadu_si128(((__m128i *)input) + 2); - const __m128i top_3 = _mm_loadu_si128(((__m128i *)input) + 3); - const __m128i sum_1 = _mm_slli_epi16(_mm_hadd_epi16(top_2, top_3), 2); - _mm_storeu_si128(pred_buf_m128i + 1, sum_1); - } - } - } - pred_buf_m128i += CFL_BUF_LINE_I128; - input += input_stride; - } while (pred_buf_m128i < end); -} - -static INLINE void cfl_luma_subsampling_444_hbd_ssse3(const uint16_t *input, - int input_stride, - uint16_t *pred_buf_q3, - int width, int height) { - const uint16_t *end = pred_buf_q3 + height * CFL_BUF_LINE; - do { - if (width == 4) { - const __m128i row = _mm_slli_epi16(_mm_loadl_epi64((__m128i *)input), 3); - _mm_storel_epi64((__m128i *)pred_buf_q3, row); - } else { - const __m128i row = _mm_slli_epi16(_mm_loadu_si128((__m128i *)input), 3); - _mm_storeu_si128((__m128i *)pred_buf_q3, row); - if (width >= 16) { - __m128i row_1 = _mm_loadu_si128(((__m128i *)input) + 1); - row_1 = _mm_slli_epi16(row_1, 3); - _mm_storeu_si128(((__m128i *)pred_buf_q3) + 1, row_1); - if (width == 32) { - __m128i row_2 = _mm_loadu_si128(((__m128i *)input) + 2); - row_2 = _mm_slli_epi16(row_2, 3); - _mm_storeu_si128(((__m128i *)pred_buf_q3) + 2, row_2); - __m128i row_3 = _mm_loadu_si128(((__m128i *)input) + 3); - row_3 = _mm_slli_epi16(row_3, 3); - _mm_storeu_si128(((__m128i *)pred_buf_q3) + 3, row_3); - } - } - } - input += input_stride; - pred_buf_q3 += CFL_BUF_LINE; - } while (pred_buf_q3 < end); -} - -CFL_GET_SUBSAMPLE_FUNCTION(ssse3) - -static INLINE __m128i predict_unclipped(const __m128i *input, __m128i alpha_q12, - __m128i alpha_sign, __m128i dc_q0) { - __m128i ac_q3 = _mm_loadu_si128(input); - __m128i ac_sign = _mm_sign_epi16(alpha_sign, ac_q3); - __m128i scaled_luma_q0 = _mm_mulhrs_epi16(_mm_abs_epi16(ac_q3), alpha_q12); - scaled_luma_q0 = _mm_sign_epi16(scaled_luma_q0, ac_sign); - return _mm_add_epi16(scaled_luma_q0, dc_q0); -} - -static INLINE void cfl_predict_lbd_ssse3(const int16_t *pred_buf_q3, - uint8_t *dst, int dst_stride, - int alpha_q3, int width, int height) { - const __m128i alpha_sign = _mm_set1_epi16(alpha_q3); - const __m128i alpha_q12 = _mm_slli_epi16(_mm_abs_epi16(alpha_sign), 9); - const __m128i dc_q0 = _mm_set1_epi16(*dst); - __m128i *row = (__m128i *)pred_buf_q3; - const __m128i *row_end = row + height * CFL_BUF_LINE_I128; - do { - __m128i res = predict_unclipped(row, alpha_q12, alpha_sign, dc_q0); - if (width < 16) { - res = _mm_packus_epi16(res, res); - if (width == 4) - _mm_storeh_epi32((__m128i *)dst, res); - else - _mm_storel_epi64((__m128i *)dst, res); - } else { - __m128i next = predict_unclipped(row + 1, alpha_q12, alpha_sign, dc_q0); - res = _mm_packus_epi16(res, next); - _mm_storeu_si128((__m128i *)dst, res); - if (width == 32) { - res = predict_unclipped(row + 2, alpha_q12, alpha_sign, dc_q0); - next = predict_unclipped(row + 3, alpha_q12, alpha_sign, dc_q0); - res = _mm_packus_epi16(res, next); - _mm_storeu_si128((__m128i *)(dst + 16), res); - } - } - dst += dst_stride; - } while ((row += CFL_BUF_LINE_I128) < row_end); -} - -CFL_PREDICT_FN(ssse3, lbd) - -static INLINE __m128i highbd_max_epi16(int bd) { - const __m128i neg_one = _mm_set1_epi16(-1); - // (1 << bd) - 1 => -(-1 << bd) -1 => -1 - (-1 << bd) => -1 ^ (-1 << bd) - return _mm_xor_si128(_mm_slli_epi16(neg_one, bd), neg_one); -} - -static INLINE __m128i highbd_clamp_epi16(__m128i u, __m128i zero, __m128i max) { - return _mm_max_epi16(_mm_min_epi16(u, max), zero); -} - -static INLINE void cfl_predict_hbd_ssse3(const int16_t *pred_buf_q3, - uint16_t *dst, int dst_stride, - int alpha_q3, int bd, int width, - int height) { - const __m128i alpha_sign = _mm_set1_epi16(alpha_q3); - const __m128i alpha_q12 = _mm_slli_epi16(_mm_abs_epi16(alpha_sign), 9); - const __m128i dc_q0 = _mm_set1_epi16(*dst); - const __m128i max = highbd_max_epi16(bd); - const __m128i zeros = _mm_setzero_si128(); - __m128i *row = (__m128i *)pred_buf_q3; - const __m128i *row_end = row + height * CFL_BUF_LINE_I128; - do { - __m128i res = predict_unclipped(row, alpha_q12, alpha_sign, dc_q0); - res = highbd_clamp_epi16(res, zeros, max); - if (width == 4) { - _mm_storel_epi64((__m128i *)dst, res); - } else { - _mm_storeu_si128((__m128i *)dst, res); - } - if (width >= 16) { - const __m128i res_1 = - predict_unclipped(row + 1, alpha_q12, alpha_sign, dc_q0); - _mm_storeu_si128(((__m128i *)dst) + 1, - highbd_clamp_epi16(res_1, zeros, max)); - } - if (width == 32) { - const __m128i res_2 = - predict_unclipped(row + 2, alpha_q12, alpha_sign, dc_q0); - _mm_storeu_si128((__m128i *)(dst + 16), - highbd_clamp_epi16(res_2, zeros, max)); - const __m128i res_3 = - predict_unclipped(row + 3, alpha_q12, alpha_sign, dc_q0); - _mm_storeu_si128((__m128i *)(dst + 24), - highbd_clamp_epi16(res_3, zeros, max)); - } - dst += dst_stride; - } while ((row += CFL_BUF_LINE_I128) < row_end); -} - -CFL_PREDICT_FN(ssse3, hbd) diff --git a/third_party/aom/av1/common/x86/convolve_2d_avx2.c b/third_party/aom/av1/common/x86/convolve_2d_avx2.c deleted file mode 100644 index 0acafd044..000000000 --- a/third_party/aom/av1/common/x86/convolve_2d_avx2.c +++ /dev/null @@ -1,283 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <immintrin.h> - -#include "config/av1_rtcd.h" - -#include "aom_dsp/x86/convolve_avx2.h" -#include "aom_dsp/x86/convolve_common_intrin.h" -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/aom_filter.h" -#include "aom_dsp/x86/synonyms.h" -#include "av1/common/convolve.h" - -void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - const int bd = 8; - - DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]); - int im_h = h + filter_params_y->taps - 1; - int im_stride = 8; - int i, j; - const int fo_vert = filter_params_y->taps / 2 - 1; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz; - - const int bits = - FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - - __m256i filt[4], coeffs_h[4], coeffs_v[4]; - - assert(conv_params->round_0 > 0); - - filt[0] = _mm256_load_si256((__m256i const *)filt_global_avx2); - filt[1] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32)); - filt[2] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 2)); - filt[3] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 3)); - - prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs_h); - prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_v); - - const __m256i round_const_h = _mm256_set1_epi16( - ((1 << (conv_params->round_0 - 1)) >> 1) + (1 << (bd + FILTER_BITS - 2))); - const __m128i round_shift_h = _mm_cvtsi32_si128(conv_params->round_0 - 1); - - const __m256i sum_round_v = _mm256_set1_epi32( - (1 << offset_bits) + ((1 << conv_params->round_1) >> 1)); - const __m128i sum_shift_v = _mm_cvtsi32_si128(conv_params->round_1); - - const __m256i round_const_v = _mm256_set1_epi32( - ((1 << bits) >> 1) - (1 << (offset_bits - conv_params->round_1)) - - ((1 << (offset_bits - conv_params->round_1)) >> 1)); - const __m128i round_shift_v = _mm_cvtsi32_si128(bits); - - for (j = 0; j < w; j += 8) { - for (i = 0; i < im_h; i += 2) { - __m256i data = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)&src_ptr[(i * src_stride) + j])); - - // Load the next line - if (i + 1 < im_h) - data = _mm256_inserti128_si256( - data, - _mm_loadu_si128( - (__m128i *)&src_ptr[(i * src_stride) + j + src_stride]), - 1); - - __m256i res = convolve_lowbd_x(data, coeffs_h, filt); - - res = - _mm256_sra_epi16(_mm256_add_epi16(res, round_const_h), round_shift_h); - - _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); - } - - /* Vertical filter */ - { - __m256i src_0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); - __m256i src_1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); - __m256i src_2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); - __m256i src_3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); - __m256i src_4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); - __m256i src_5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride)); - - __m256i s[8]; - s[0] = _mm256_unpacklo_epi16(src_0, src_1); - s[1] = _mm256_unpacklo_epi16(src_2, src_3); - s[2] = _mm256_unpacklo_epi16(src_4, src_5); - - s[4] = _mm256_unpackhi_epi16(src_0, src_1); - s[5] = _mm256_unpackhi_epi16(src_2, src_3); - s[6] = _mm256_unpackhi_epi16(src_4, src_5); - - for (i = 0; i < h; i += 2) { - const int16_t *data = &im_block[i * im_stride]; - - const __m256i s6 = - _mm256_loadu_si256((__m256i *)(data + 6 * im_stride)); - const __m256i s7 = - _mm256_loadu_si256((__m256i *)(data + 7 * im_stride)); - - s[3] = _mm256_unpacklo_epi16(s6, s7); - s[7] = _mm256_unpackhi_epi16(s6, s7); - - __m256i res_a = convolve(s, coeffs_v); - __m256i res_b = convolve(s + 4, coeffs_v); - - // Combine V round and 2F-H-V round into a single rounding - res_a = - _mm256_sra_epi32(_mm256_add_epi32(res_a, sum_round_v), sum_shift_v); - res_b = - _mm256_sra_epi32(_mm256_add_epi32(res_b, sum_round_v), sum_shift_v); - - const __m256i res_a_round = _mm256_sra_epi32( - _mm256_add_epi32(res_a, round_const_v), round_shift_v); - const __m256i res_b_round = _mm256_sra_epi32( - _mm256_add_epi32(res_b, round_const_v), round_shift_v); - - /* rounding code */ - // 16 bit conversion - const __m256i res_16bit = _mm256_packs_epi32(res_a_round, res_b_round); - // 8 bit conversion and saturation to uint8 - const __m256i res_8b = _mm256_packus_epi16(res_16bit, res_16bit); - - const __m128i res_0 = _mm256_castsi256_si128(res_8b); - const __m128i res_1 = _mm256_extracti128_si256(res_8b, 1); - - // Store values into the destination buffer - __m128i *const p_0 = (__m128i *)&dst[i * dst_stride + j]; - __m128i *const p_1 = (__m128i *)&dst[i * dst_stride + j + dst_stride]; - if (w - j > 4) { - _mm_storel_epi64(p_0, res_0); - _mm_storel_epi64(p_1, res_1); - } else if (w == 4) { - xx_storel_32(p_0, res_0); - xx_storel_32(p_1, res_1); - } else { - *(uint16_t *)p_0 = _mm_cvtsi128_si32(res_0); - *(uint16_t *)p_1 = _mm_cvtsi128_si32(res_1); - } - - s[0] = s[1]; - s[1] = s[2]; - s[2] = s[3]; - - s[4] = s[5]; - s[5] = s[6]; - s[6] = s[7]; - } - } - } -} - -static INLINE void copy_128(const uint8_t *src, uint8_t *dst) { - __m256i s[4]; - s[0] = _mm256_loadu_si256((__m256i *)(src + 0 * 32)); - s[1] = _mm256_loadu_si256((__m256i *)(src + 1 * 32)); - s[2] = _mm256_loadu_si256((__m256i *)(src + 2 * 32)); - s[3] = _mm256_loadu_si256((__m256i *)(src + 3 * 32)); - _mm256_storeu_si256((__m256i *)(dst + 0 * 32), s[0]); - _mm256_storeu_si256((__m256i *)(dst + 1 * 32), s[1]); - _mm256_storeu_si256((__m256i *)(dst + 2 * 32), s[2]); - _mm256_storeu_si256((__m256i *)(dst + 3 * 32), s[3]); -} - -void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - (void)filter_params_x; - (void)filter_params_y; - (void)subpel_x_q4; - (void)subpel_y_q4; - (void)conv_params; - - if (w >= 16) { - assert(!((intptr_t)dst % 16)); - assert(!(dst_stride % 16)); - } - - if (w == 2) { - do { - memcpy(dst, src, 2 * sizeof(*src)); - src += src_stride; - dst += dst_stride; - memcpy(dst, src, 2 * sizeof(*src)); - src += src_stride; - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 4) { - do { - memcpy(dst, src, 4 * sizeof(*src)); - src += src_stride; - dst += dst_stride; - memcpy(dst, src, 4 * sizeof(*src)); - src += src_stride; - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 8) { - do { - __m128i s[2]; - s[0] = _mm_loadl_epi64((__m128i *)src); - src += src_stride; - s[1] = _mm_loadl_epi64((__m128i *)src); - src += src_stride; - _mm_storel_epi64((__m128i *)dst, s[0]); - dst += dst_stride; - _mm_storel_epi64((__m128i *)dst, s[1]); - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 16) { - do { - __m128i s[2]; - s[0] = _mm_loadu_si128((__m128i *)src); - src += src_stride; - s[1] = _mm_loadu_si128((__m128i *)src); - src += src_stride; - _mm_store_si128((__m128i *)dst, s[0]); - dst += dst_stride; - _mm_store_si128((__m128i *)dst, s[1]); - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 32) { - do { - __m256i s[2]; - s[0] = _mm256_loadu_si256((__m256i *)src); - src += src_stride; - s[1] = _mm256_loadu_si256((__m256i *)src); - src += src_stride; - _mm256_storeu_si256((__m256i *)dst, s[0]); - dst += dst_stride; - _mm256_storeu_si256((__m256i *)dst, s[1]); - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 64) { - do { - __m256i s[4]; - s[0] = _mm256_loadu_si256((__m256i *)(src + 0 * 32)); - s[1] = _mm256_loadu_si256((__m256i *)(src + 1 * 32)); - src += src_stride; - s[2] = _mm256_loadu_si256((__m256i *)(src + 0 * 32)); - s[3] = _mm256_loadu_si256((__m256i *)(src + 1 * 32)); - src += src_stride; - _mm256_storeu_si256((__m256i *)(dst + 0 * 32), s[0]); - _mm256_storeu_si256((__m256i *)(dst + 1 * 32), s[1]); - dst += dst_stride; - _mm256_storeu_si256((__m256i *)(dst + 0 * 32), s[2]); - _mm256_storeu_si256((__m256i *)(dst + 1 * 32), s[3]); - dst += dst_stride; - h -= 2; - } while (h); - } else { - do { - copy_128(src, dst); - src += src_stride; - dst += dst_stride; - copy_128(src, dst); - src += src_stride; - dst += dst_stride; - h -= 2; - } while (h); - } -} diff --git a/third_party/aom/av1/common/x86/convolve_2d_sse2.c b/third_party/aom/av1/common/x86/convolve_2d_sse2.c deleted file mode 100644 index b1a62a4f6..000000000 --- a/third_party/aom/av1/common/x86/convolve_2d_sse2.c +++ /dev/null @@ -1,472 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <emmintrin.h> - -#include "config/av1_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/aom_filter.h" -#include "aom_dsp/x86/convolve_sse2.h" -#include "av1/common/convolve.h" - -void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - const int bd = 8; - - DECLARE_ALIGNED(16, int16_t, - im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]); - int im_h = h + filter_params_y->taps - 1; - int im_stride = MAX_SB_SIZE; - int i, j; - const int fo_vert = filter_params_y->taps / 2 - 1; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz; - - const __m128i zero = _mm_setzero_si128(); - const int bits = - FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - - assert(conv_params->round_0 > 0); - - /* Horizontal filter */ - { - const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( - filter_params_x, subpel_x_q4 & SUBPEL_MASK); - const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter); - - // coeffs 0 1 0 1 2 3 2 3 - const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x); - // coeffs 4 5 4 5 6 7 6 7 - const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x); - - // coeffs 0 1 0 1 0 1 0 1 - const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0); - // coeffs 2 3 2 3 2 3 2 3 - const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0); - // coeffs 4 5 4 5 4 5 4 5 - const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1); - // coeffs 6 7 6 7 6 7 6 7 - const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1); - - const __m128i round_const = _mm_set1_epi32( - (1 << (bd + FILTER_BITS - 1)) + ((1 << conv_params->round_0) >> 1)); - const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0); - - for (i = 0; i < im_h; ++i) { - for (j = 0; j < w; j += 8) { - const __m128i data = - _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]); - - // Filter even-index pixels - const __m128i src_0 = _mm_unpacklo_epi8(data, zero); - const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01); - const __m128i src_2 = _mm_unpacklo_epi8(_mm_srli_si128(data, 2), zero); - const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23); - const __m128i src_4 = _mm_unpacklo_epi8(_mm_srli_si128(data, 4), zero); - const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); - const __m128i src_6 = _mm_unpacklo_epi8(_mm_srli_si128(data, 6), zero); - const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67); - - __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4), - _mm_add_epi32(res_2, res_6)); - res_even = - _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift); - - // Filter odd-index pixels - const __m128i src_1 = _mm_unpacklo_epi8(_mm_srli_si128(data, 1), zero); - const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); - const __m128i src_3 = _mm_unpacklo_epi8(_mm_srli_si128(data, 3), zero); - const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23); - const __m128i src_5 = _mm_unpacklo_epi8(_mm_srli_si128(data, 5), zero); - const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45); - const __m128i src_7 = _mm_unpacklo_epi8(_mm_srli_si128(data, 7), zero); - const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67); - - __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5), - _mm_add_epi32(res_3, res_7)); - res_odd = - _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift); - - // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7 - __m128i res = _mm_packs_epi32(res_even, res_odd); - _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res); - } - } - } - - /* Vertical filter */ - { - const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( - filter_params_y, subpel_y_q4 & SUBPEL_MASK); - const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter); - - // coeffs 0 1 0 1 2 3 2 3 - const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y); - // coeffs 4 5 4 5 6 7 6 7 - const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y); - - // coeffs 0 1 0 1 0 1 0 1 - const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0); - // coeffs 2 3 2 3 2 3 2 3 - const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0); - // coeffs 4 5 4 5 4 5 4 5 - const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1); - // coeffs 6 7 6 7 6 7 6 7 - const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1); - - const __m128i sum_round = - _mm_set1_epi32((1 << offset_bits) + ((1 << conv_params->round_1) >> 1)); - const __m128i sum_shift = _mm_cvtsi32_si128(conv_params->round_1); - - const __m128i round_const = _mm_set1_epi32( - ((1 << bits) >> 1) - (1 << (offset_bits - conv_params->round_1)) - - ((1 << (offset_bits - conv_params->round_1)) >> 1)); - const __m128i round_shift = _mm_cvtsi32_si128(bits); - - for (i = 0; i < h; ++i) { - for (j = 0; j < w; j += 8) { - // Filter even-index pixels - const int16_t *data = &im_block[i * im_stride + j]; - const __m128i src_0 = - _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride), - *(__m128i *)(data + 1 * im_stride)); - const __m128i src_2 = - _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride), - *(__m128i *)(data + 3 * im_stride)); - const __m128i src_4 = - _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride), - *(__m128i *)(data + 5 * im_stride)); - const __m128i src_6 = - _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride), - *(__m128i *)(data + 7 * im_stride)); - - const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01); - const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23); - const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); - const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67); - - const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2), - _mm_add_epi32(res_4, res_6)); - - // Filter odd-index pixels - const __m128i src_1 = - _mm_unpackhi_epi16(*(__m128i *)(data + 0 * im_stride), - *(__m128i *)(data + 1 * im_stride)); - const __m128i src_3 = - _mm_unpackhi_epi16(*(__m128i *)(data + 2 * im_stride), - *(__m128i *)(data + 3 * im_stride)); - const __m128i src_5 = - _mm_unpackhi_epi16(*(__m128i *)(data + 4 * im_stride), - *(__m128i *)(data + 5 * im_stride)); - const __m128i src_7 = - _mm_unpackhi_epi16(*(__m128i *)(data + 6 * im_stride), - *(__m128i *)(data + 7 * im_stride)); - - const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); - const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23); - const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45); - const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67); - - const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3), - _mm_add_epi32(res_5, res_7)); - - // Rearrange pixels back into the order 0 ... 7 - const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd); - const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd); - - __m128i res_lo_round = - _mm_sra_epi32(_mm_add_epi32(res_lo, sum_round), sum_shift); - __m128i res_hi_round = - _mm_sra_epi32(_mm_add_epi32(res_hi, sum_round), sum_shift); - - res_lo_round = _mm_sra_epi32(_mm_add_epi32(res_lo_round, round_const), - round_shift); - res_hi_round = _mm_sra_epi32(_mm_add_epi32(res_hi_round, round_const), - round_shift); - - const __m128i res16 = _mm_packs_epi32(res_lo_round, res_hi_round); - const __m128i res = _mm_packus_epi16(res16, res16); - - // Accumulate values into the destination buffer - __m128i *const p = (__m128i *)&dst[i * dst_stride + j]; - - if (w == 2) { - *(uint16_t *)p = _mm_cvtsi128_si32(res); - } else if (w == 4) { - *(uint32_t *)p = _mm_cvtsi128_si32(res); - } else { - _mm_storel_epi64(p, res); - } - } - } - } -} - -static INLINE void copy_128(const uint8_t *src, uint8_t *dst) { - __m128i s[8]; - s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 16)); - s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 16)); - s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 16)); - s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 16)); - s[4] = _mm_loadu_si128((__m128i *)(src + 4 * 16)); - s[5] = _mm_loadu_si128((__m128i *)(src + 5 * 16)); - s[6] = _mm_loadu_si128((__m128i *)(src + 6 * 16)); - s[7] = _mm_loadu_si128((__m128i *)(src + 7 * 16)); - _mm_store_si128((__m128i *)(dst + 0 * 16), s[0]); - _mm_store_si128((__m128i *)(dst + 1 * 16), s[1]); - _mm_store_si128((__m128i *)(dst + 2 * 16), s[2]); - _mm_store_si128((__m128i *)(dst + 3 * 16), s[3]); - _mm_store_si128((__m128i *)(dst + 4 * 16), s[4]); - _mm_store_si128((__m128i *)(dst + 5 * 16), s[5]); - _mm_store_si128((__m128i *)(dst + 6 * 16), s[6]); - _mm_store_si128((__m128i *)(dst + 7 * 16), s[7]); -} - -void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - (void)filter_params_x; - (void)filter_params_y; - (void)subpel_x_q4; - (void)subpel_y_q4; - (void)conv_params; - - if (w >= 16) { - assert(!((intptr_t)dst % 16)); - assert(!(dst_stride % 16)); - } - - if (w == 2) { - do { - memcpy(dst, src, 2 * sizeof(*src)); - src += src_stride; - dst += dst_stride; - memcpy(dst, src, 2 * sizeof(*src)); - src += src_stride; - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 4) { - do { - memcpy(dst, src, 4 * sizeof(*src)); - src += src_stride; - dst += dst_stride; - memcpy(dst, src, 4 * sizeof(*src)); - src += src_stride; - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 8) { - do { - __m128i s[2]; - s[0] = _mm_loadl_epi64((__m128i *)src); - src += src_stride; - s[1] = _mm_loadl_epi64((__m128i *)src); - src += src_stride; - _mm_storel_epi64((__m128i *)dst, s[0]); - dst += dst_stride; - _mm_storel_epi64((__m128i *)dst, s[1]); - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 16) { - do { - __m128i s[2]; - s[0] = _mm_loadu_si128((__m128i *)src); - src += src_stride; - s[1] = _mm_loadu_si128((__m128i *)src); - src += src_stride; - _mm_store_si128((__m128i *)dst, s[0]); - dst += dst_stride; - _mm_store_si128((__m128i *)dst, s[1]); - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 32) { - do { - __m128i s[4]; - s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 16)); - s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 16)); - src += src_stride; - s[2] = _mm_loadu_si128((__m128i *)(src + 0 * 16)); - s[3] = _mm_loadu_si128((__m128i *)(src + 1 * 16)); - src += src_stride; - _mm_store_si128((__m128i *)(dst + 0 * 16), s[0]); - _mm_store_si128((__m128i *)(dst + 1 * 16), s[1]); - dst += dst_stride; - _mm_store_si128((__m128i *)(dst + 0 * 16), s[2]); - _mm_store_si128((__m128i *)(dst + 1 * 16), s[3]); - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 64) { - do { - __m128i s[8]; - s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 16)); - s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 16)); - s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 16)); - s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 16)); - src += src_stride; - s[4] = _mm_loadu_si128((__m128i *)(src + 0 * 16)); - s[5] = _mm_loadu_si128((__m128i *)(src + 1 * 16)); - s[6] = _mm_loadu_si128((__m128i *)(src + 2 * 16)); - s[7] = _mm_loadu_si128((__m128i *)(src + 3 * 16)); - src += src_stride; - _mm_store_si128((__m128i *)(dst + 0 * 16), s[0]); - _mm_store_si128((__m128i *)(dst + 1 * 16), s[1]); - _mm_store_si128((__m128i *)(dst + 2 * 16), s[2]); - _mm_store_si128((__m128i *)(dst + 3 * 16), s[3]); - dst += dst_stride; - _mm_store_si128((__m128i *)(dst + 0 * 16), s[4]); - _mm_store_si128((__m128i *)(dst + 1 * 16), s[5]); - _mm_store_si128((__m128i *)(dst + 2 * 16), s[6]); - _mm_store_si128((__m128i *)(dst + 3 * 16), s[7]); - dst += dst_stride; - h -= 2; - } while (h); - } else { - do { - copy_128(src, dst); - src += src_stride; - dst += dst_stride; - copy_128(src, dst); - src += src_stride; - dst += dst_stride; - h -= 2; - } while (h); - } -} - -void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, - uint8_t *dst0, int dst_stride0, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - const int bd = 8; - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - (void)filter_params_x; - (void)filter_params_y; - (void)subpel_x_q4; - (void)subpel_y_q4; - - const int bits = - FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0; - const int do_average = conv_params->do_average; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - const __m128i zero = _mm_setzero_si128(); - const __m128i left_shift = _mm_cvtsi32_si128(bits); - int i, j; - - const int w0 = conv_params->fwd_offset; - const int w1 = conv_params->bck_offset; - const __m128i wt0 = _mm_set1_epi16(w0); - const __m128i wt1 = _mm_set1_epi16(w1); - const __m128i wt = _mm_unpacklo_epi16(wt0, wt1); - - const int offset_0 = - bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset = (1 << offset_0) + (1 << (offset_0 - 1)); - const __m128i offset_const = _mm_set1_epi16(offset); - const int rounding_shift = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const __m128i rounding_const = _mm_set1_epi16((1 << rounding_shift) >> 1); - - assert((w % 4) == 0); - - if (!(w % 16)) { - for (i = 0; i < h; ++i) { - for (j = 0; j < w; j += 16) { - const __m128i d8 = _mm_loadu_si128((__m128i *)&src[j]); - - const __m128i d16_lo = _mm_unpacklo_epi8(d8, zero); - const __m128i d16_hi = _mm_unpackhi_epi8(d8, zero); - - const __m128i res_lo = _mm_sll_epi16(d16_lo, left_shift); - const __m128i res_unsigned_lo = _mm_add_epi16(res_lo, offset_const); - - const __m128i res_hi = _mm_sll_epi16(d16_hi, left_shift); - const __m128i res_unsigned_hi = _mm_add_epi16(res_hi, offset_const); - - if (do_average) { - const __m128i data_ref_0_lo = _mm_loadu_si128((__m128i *)(&dst[j])); - const __m128i data_ref_0_hi = - _mm_loadu_si128((__m128i *)(&dst[j + 8])); - - const __m128i comp_avg_res_lo = - comp_avg(&data_ref_0_lo, &res_unsigned_lo, &wt, use_jnt_comp_avg); - - const __m128i round_result_lo = convolve_rounding( - &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift); - - const __m128i comp_avg_res_hi = - comp_avg(&data_ref_0_hi, &res_unsigned_hi, &wt, use_jnt_comp_avg); - - const __m128i round_result_hi = convolve_rounding( - &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift); - - const __m128i res_8 = - _mm_packus_epi16(round_result_lo, round_result_hi); - - _mm_store_si128((__m128i *)(&dst0[j]), res_8); - } else { - _mm_store_si128((__m128i *)(&dst[j]), res_unsigned_lo); - _mm_store_si128((__m128i *)(&dst[j + 8]), res_unsigned_hi); - } - } - src += src_stride; - dst += dst_stride; - dst0 += dst_stride0; - } - } else { - for (i = 0; i < h; ++i) { - for (j = 0; j < w; j += 8) { - const __m128i d8 = _mm_loadl_epi64((__m128i *)&src[j]); - const __m128i d16_0 = _mm_unpacklo_epi8(d8, zero); - - const __m128i res = _mm_sll_epi16(d16_0, left_shift); - const __m128i res_unsigned = _mm_add_epi16(res, offset_const); - - if (do_average) { - const __m128i data_ref_0 = _mm_loadu_si128((__m128i *)(&dst[j])); - - const __m128i comp_avg_res = - comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg); - - const __m128i round_result = convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m128i res_8 = _mm_packus_epi16(round_result, round_result); - - if (w > 4) - _mm_storel_epi64((__m128i *)(&dst0[j]), res_8); - else - *(uint32_t *)(&dst0[j]) = _mm_cvtsi128_si32(res_8); - } else { - _mm_store_si128((__m128i *)(&dst[j]), res_unsigned); - } - } - src += src_stride; - dst += dst_stride; - dst0 += dst_stride0; - } - } -} diff --git a/third_party/aom/av1/common/x86/convolve_avx2.c b/third_party/aom/av1/common/x86/convolve_avx2.c deleted file mode 100644 index 0e91ea947..000000000 --- a/third_party/aom/av1/common/x86/convolve_avx2.c +++ /dev/null @@ -1,277 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <immintrin.h> - -#include "config/av1_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/x86/convolve_avx2.h" -#include "aom_dsp/x86/synonyms.h" - -void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - int i, j; - const int fo_vert = filter_params_y->taps / 2 - 1; - const uint8_t *const src_ptr = src - fo_vert * src_stride; - - // right shift is F-1 because we are already dividing - // filter co-efficients by 2 - const int right_shift_bits = (FILTER_BITS - 1); - const __m128i right_shift = _mm_cvtsi32_si128(right_shift_bits); - const __m256i right_shift_const = - _mm256_set1_epi16((1 << right_shift_bits) >> 1); - __m256i coeffs[4], s[8]; - - assert(conv_params->round_0 <= FILTER_BITS); - assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) || - ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS))); - - prepare_coeffs_lowbd(filter_params_y, subpel_y_q4, coeffs); - - (void)filter_params_x; - (void)subpel_x_q4; - (void)conv_params; - - for (j = 0; j < w; j += 16) { - const uint8_t *data = &src_ptr[j]; - __m256i src6; - - // Load lines a and b. Line a to lower 128, line b to upper 128 - const __m256i src_01a = _mm256_permute2x128_si256( - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 0 * src_stride))), - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 1 * src_stride))), - 0x20); - - const __m256i src_12a = _mm256_permute2x128_si256( - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 1 * src_stride))), - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 2 * src_stride))), - 0x20); - - const __m256i src_23a = _mm256_permute2x128_si256( - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 2 * src_stride))), - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 3 * src_stride))), - 0x20); - - const __m256i src_34a = _mm256_permute2x128_si256( - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 3 * src_stride))), - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 4 * src_stride))), - 0x20); - - const __m256i src_45a = _mm256_permute2x128_si256( - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 4 * src_stride))), - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 5 * src_stride))), - 0x20); - - src6 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 6 * src_stride))); - const __m256i src_56a = _mm256_permute2x128_si256( - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 5 * src_stride))), - src6, 0x20); - - s[0] = _mm256_unpacklo_epi8(src_01a, src_12a); - s[1] = _mm256_unpacklo_epi8(src_23a, src_34a); - s[2] = _mm256_unpacklo_epi8(src_45a, src_56a); - - s[4] = _mm256_unpackhi_epi8(src_01a, src_12a); - s[5] = _mm256_unpackhi_epi8(src_23a, src_34a); - s[6] = _mm256_unpackhi_epi8(src_45a, src_56a); - - for (i = 0; i < h; i += 2) { - data = &src_ptr[i * src_stride + j]; - const __m256i src_67a = _mm256_permute2x128_si256( - src6, - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 7 * src_stride))), - 0x20); - - src6 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 8 * src_stride))); - const __m256i src_78a = _mm256_permute2x128_si256( - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 7 * src_stride))), - src6, 0x20); - - s[3] = _mm256_unpacklo_epi8(src_67a, src_78a); - s[7] = _mm256_unpackhi_epi8(src_67a, src_78a); - - const __m256i res_lo = convolve_lowbd(s, coeffs); - - /* rounding code */ - // shift by F - 1 - const __m256i res_16b_lo = _mm256_sra_epi16( - _mm256_add_epi16(res_lo, right_shift_const), right_shift); - // 8 bit conversion and saturation to uint8 - __m256i res_8b_lo = _mm256_packus_epi16(res_16b_lo, res_16b_lo); - - if (w - j > 8) { - const __m256i res_hi = convolve_lowbd(s + 4, coeffs); - - /* rounding code */ - // shift by F - 1 - const __m256i res_16b_hi = _mm256_sra_epi16( - _mm256_add_epi16(res_hi, right_shift_const), right_shift); - // 8 bit conversion and saturation to uint8 - __m256i res_8b_hi = _mm256_packus_epi16(res_16b_hi, res_16b_hi); - - __m256i res_a = _mm256_unpacklo_epi64(res_8b_lo, res_8b_hi); - - const __m128i res_0 = _mm256_castsi256_si128(res_a); - const __m128i res_1 = _mm256_extracti128_si256(res_a, 1); - - _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j], res_0); - _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j + dst_stride], - res_1); - } else { - const __m128i res_0 = _mm256_castsi256_si128(res_8b_lo); - const __m128i res_1 = _mm256_extracti128_si256(res_8b_lo, 1); - if (w - j > 4) { - _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res_0); - _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j + dst_stride], - res_1); - } else if (w - j > 2) { - xx_storel_32(&dst[i * dst_stride + j], res_0); - xx_storel_32(&dst[i * dst_stride + j + dst_stride], res_1); - } else { - __m128i *const p_0 = (__m128i *)&dst[i * dst_stride + j]; - __m128i *const p_1 = (__m128i *)&dst[i * dst_stride + j + dst_stride]; - *(uint16_t *)p_0 = _mm_cvtsi128_si32(res_0); - *(uint16_t *)p_1 = _mm_cvtsi128_si32(res_1); - } - } - - s[0] = s[1]; - s[1] = s[2]; - s[2] = s[3]; - - s[4] = s[5]; - s[5] = s[6]; - s[6] = s[7]; - } - } -} - -void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - int i, j; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const uint8_t *const src_ptr = src - fo_horiz; - const int bits = FILTER_BITS - conv_params->round_0; - - __m256i filt[4], coeffs[4]; - - filt[0] = _mm256_load_si256((__m256i const *)filt_global_avx2); - filt[1] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32)); - filt[2] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 2)); - filt[3] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 3)); - - prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs); - - const __m256i round_0_const = - _mm256_set1_epi16((1 << (conv_params->round_0 - 1)) >> 1); - const __m128i round_0_shift = _mm_cvtsi32_si128(conv_params->round_0 - 1); - const __m256i round_const = _mm256_set1_epi16((1 << bits) >> 1); - const __m128i round_shift = _mm_cvtsi32_si128(bits); - - (void)filter_params_y; - (void)subpel_y_q4; - - assert(bits >= 0); - assert((FILTER_BITS - conv_params->round_1) >= 0 || - ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS)); - assert(conv_params->round_0 > 0); - - if (w <= 8) { - for (i = 0; i < h; i += 2) { - const __m256i data = _mm256_permute2x128_si256( - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(&src_ptr[i * src_stride]))), - _mm256_castsi128_si256(_mm_loadu_si128( - (__m128i *)(&src_ptr[i * src_stride + src_stride]))), - 0x20); - - __m256i res_16b = convolve_lowbd_x(data, coeffs, filt); - - res_16b = _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_0_const), - round_0_shift); - - res_16b = - _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_const), round_shift); - - /* rounding code */ - // 8 bit conversion and saturation to uint8 - __m256i res_8b = _mm256_packus_epi16(res_16b, res_16b); - - const __m128i res_0 = _mm256_castsi256_si128(res_8b); - const __m128i res_1 = _mm256_extracti128_si256(res_8b, 1); - if (w > 4) { - _mm_storel_epi64((__m128i *)&dst[i * dst_stride], res_0); - _mm_storel_epi64((__m128i *)&dst[i * dst_stride + dst_stride], res_1); - } else if (w > 2) { - xx_storel_32(&dst[i * dst_stride], res_0); - xx_storel_32(&dst[i * dst_stride + dst_stride], res_1); - } else { - __m128i *const p_0 = (__m128i *)&dst[i * dst_stride]; - __m128i *const p_1 = (__m128i *)&dst[i * dst_stride + dst_stride]; - *(uint16_t *)p_0 = _mm_cvtsi128_si32(res_0); - *(uint16_t *)p_1 = _mm_cvtsi128_si32(res_1); - } - } - } else { - for (i = 0; i < h; ++i) { - for (j = 0; j < w; j += 16) { - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 8 9 10 11 12 13 14 15 16 17 18 - // 19 20 21 22 23 - const __m256i data = _mm256_inserti128_si256( - _mm256_loadu_si256((__m256i *)&src_ptr[(i * src_stride) + j]), - _mm_loadu_si128((__m128i *)&src_ptr[(i * src_stride) + (j + 8)]), - 1); - - __m256i res_16b = convolve_lowbd_x(data, coeffs, filt); - - res_16b = _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_0_const), - round_0_shift); - - res_16b = _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_const), - round_shift); - - /* rounding code */ - // 8 bit conversion and saturation to uint8 - __m256i res_8b = _mm256_packus_epi16(res_16b, res_16b); - - // Store values into the destination buffer - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - res_8b = _mm256_permute4x64_epi64(res_8b, 216); - __m128i res = _mm256_castsi256_si128(res_8b); - _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j], res); - } - } - } -} diff --git a/third_party/aom/av1/common/x86/convolve_sse2.c b/third_party/aom/av1/common/x86/convolve_sse2.c deleted file mode 100644 index 5016642de..000000000 --- a/third_party/aom/av1/common/x86/convolve_sse2.c +++ /dev/null @@ -1,338 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <emmintrin.h> - -#include "config/av1_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/aom_filter.h" -#include "aom_dsp/x86/convolve_common_intrin.h" -#include "av1/common/convolve.h" - -static INLINE void prepare_coeffs(const InterpFilterParams *const filter_params, - const int subpel_q4, - __m128i *const coeffs /* [4] */) { - const int16_t *const y_filter = av1_get_interp_filter_subpel_kernel( - filter_params, subpel_q4 & SUBPEL_MASK); - const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter); - // coeffs 0 1 0 1 2 3 2 3 - const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y); - // coeffs 4 5 4 5 6 7 6 7 - const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y); - - coeffs[0] = _mm_unpacklo_epi64(tmp_0, tmp_0); // coeffs 0 1 0 1 0 1 0 1 - coeffs[1] = _mm_unpackhi_epi64(tmp_0, tmp_0); // coeffs 2 3 2 3 2 3 2 3 - coeffs[2] = _mm_unpacklo_epi64(tmp_1, tmp_1); // coeffs 4 5 4 5 4 5 4 5 - coeffs[3] = _mm_unpackhi_epi64(tmp_1, tmp_1); // coeffs 6 7 6 7 6 7 6 7 -} - -static INLINE __m128i convolve(const __m128i *const s, - const __m128i *const coeffs) { - const __m128i d0 = _mm_madd_epi16(s[0], coeffs[0]); - const __m128i d1 = _mm_madd_epi16(s[1], coeffs[1]); - const __m128i d2 = _mm_madd_epi16(s[2], coeffs[2]); - const __m128i d3 = _mm_madd_epi16(s[3], coeffs[3]); - const __m128i d = _mm_add_epi32(_mm_add_epi32(d0, d1), _mm_add_epi32(d2, d3)); - return d; -} - -static INLINE __m128i convolve_lo_x(const __m128i *const s, - const __m128i *const coeffs) { - __m128i ss[4]; - ss[0] = _mm_unpacklo_epi8(s[0], _mm_setzero_si128()); - ss[1] = _mm_unpacklo_epi8(s[1], _mm_setzero_si128()); - ss[2] = _mm_unpacklo_epi8(s[2], _mm_setzero_si128()); - ss[3] = _mm_unpacklo_epi8(s[3], _mm_setzero_si128()); - return convolve(ss, coeffs); -} - -static INLINE __m128i convolve_lo_y(const __m128i *const s, - const __m128i *const coeffs) { - __m128i ss[4]; - ss[0] = _mm_unpacklo_epi8(s[0], _mm_setzero_si128()); - ss[1] = _mm_unpacklo_epi8(s[2], _mm_setzero_si128()); - ss[2] = _mm_unpacklo_epi8(s[4], _mm_setzero_si128()); - ss[3] = _mm_unpacklo_epi8(s[6], _mm_setzero_si128()); - return convolve(ss, coeffs); -} - -static INLINE __m128i convolve_hi_y(const __m128i *const s, - const __m128i *const coeffs) { - __m128i ss[4]; - ss[0] = _mm_unpackhi_epi8(s[0], _mm_setzero_si128()); - ss[1] = _mm_unpackhi_epi8(s[2], _mm_setzero_si128()); - ss[2] = _mm_unpackhi_epi8(s[4], _mm_setzero_si128()); - ss[3] = _mm_unpackhi_epi8(s[6], _mm_setzero_si128()); - return convolve(ss, coeffs); -} - -void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - const int fo_vert = filter_params_y->taps / 2 - 1; - const uint8_t *src_ptr = src - fo_vert * src_stride; - const __m128i round_const = _mm_set1_epi32((1 << FILTER_BITS) >> 1); - const __m128i round_shift = _mm_cvtsi32_si128(FILTER_BITS); - __m128i coeffs[4]; - - (void)filter_params_x; - (void)subpel_x_q4; - (void)conv_params; - - assert(conv_params->round_0 <= FILTER_BITS); - assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) || - ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS))); - - prepare_coeffs(filter_params_y, subpel_y_q4, coeffs); - - if (w <= 4) { - __m128i s[8], src6, res, res_round, res16; - uint32_t res_int; - src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 6 * src_stride)); - s[0] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 0 * src_stride)), - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride))); - s[1] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)), - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride))); - s[2] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)), - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride))); - s[3] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)), - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride))); - s[4] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)), - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride))); - s[5] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)), src6); - - do { - s[6] = _mm_unpacklo_epi8( - src6, _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride))); - src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 8 * src_stride)); - s[7] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)), src6); - - res = convolve_lo_y(s + 0, coeffs); - res_round = _mm_sra_epi32(_mm_add_epi32(res, round_const), round_shift); - res16 = _mm_packs_epi32(res_round, res_round); - res_int = _mm_cvtsi128_si32(_mm_packus_epi16(res16, res16)); - - if (w == 2) - *(uint16_t *)dst = res_int; - else - *(uint32_t *)dst = res_int; - - src_ptr += src_stride; - dst += dst_stride; - - res = convolve_lo_y(s + 1, coeffs); - res_round = _mm_sra_epi32(_mm_add_epi32(res, round_const), round_shift); - res16 = _mm_packs_epi32(res_round, res_round); - res_int = _mm_cvtsi128_si32(_mm_packus_epi16(res16, res16)); - - if (w == 2) - *(uint16_t *)dst = res_int; - else - *(uint32_t *)dst = res_int; - - src_ptr += src_stride; - dst += dst_stride; - - s[0] = s[2]; - s[1] = s[3]; - s[2] = s[4]; - s[3] = s[5]; - s[4] = s[6]; - s[5] = s[7]; - h -= 2; - } while (h); - } else { - assert(!(w % 8)); - int j = 0; - do { - __m128i s[8], src6, res_lo, res_hi; - __m128i res_lo_round, res_hi_round, res16, res; - const uint8_t *data = &src_ptr[j]; - - src6 = _mm_loadl_epi64((__m128i *)(data + 6 * src_stride)); - s[0] = _mm_unpacklo_epi8( - _mm_loadl_epi64((__m128i *)(data + 0 * src_stride)), - _mm_loadl_epi64((__m128i *)(data + 1 * src_stride))); - s[1] = _mm_unpacklo_epi8( - _mm_loadl_epi64((__m128i *)(data + 1 * src_stride)), - _mm_loadl_epi64((__m128i *)(data + 2 * src_stride))); - s[2] = _mm_unpacklo_epi8( - _mm_loadl_epi64((__m128i *)(data + 2 * src_stride)), - _mm_loadl_epi64((__m128i *)(data + 3 * src_stride))); - s[3] = _mm_unpacklo_epi8( - _mm_loadl_epi64((__m128i *)(data + 3 * src_stride)), - _mm_loadl_epi64((__m128i *)(data + 4 * src_stride))); - s[4] = _mm_unpacklo_epi8( - _mm_loadl_epi64((__m128i *)(data + 4 * src_stride)), - _mm_loadl_epi64((__m128i *)(data + 5 * src_stride))); - s[5] = _mm_unpacklo_epi8( - _mm_loadl_epi64((__m128i *)(data + 5 * src_stride)), src6); - - int i = 0; - do { - data = &src_ptr[i * src_stride + j]; - s[6] = _mm_unpacklo_epi8( - src6, _mm_loadl_epi64((__m128i *)(data + 7 * src_stride))); - src6 = _mm_loadl_epi64((__m128i *)(data + 8 * src_stride)); - s[7] = _mm_unpacklo_epi8( - _mm_loadl_epi64((__m128i *)(data + 7 * src_stride)), src6); - - res_lo = convolve_lo_y(s, coeffs); // Filter low index pixels - res_hi = convolve_hi_y(s, coeffs); // Filter high index pixels - - res_lo_round = - _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift); - res_hi_round = - _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift); - - res16 = _mm_packs_epi32(res_lo_round, res_hi_round); - res = _mm_packus_epi16(res16, res16); - - _mm_storel_epi64((__m128i *)(dst + i * dst_stride + j), res); - i++; - - res_lo = convolve_lo_y(s + 1, coeffs); // Filter low index pixels - res_hi = convolve_hi_y(s + 1, coeffs); // Filter high index pixels - - res_lo_round = - _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift); - res_hi_round = - _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift); - - res16 = _mm_packs_epi32(res_lo_round, res_hi_round); - res = _mm_packus_epi16(res16, res16); - - _mm_storel_epi64((__m128i *)(dst + i * dst_stride + j), res); - i++; - - s[0] = s[2]; - s[1] = s[3]; - s[2] = s[4]; - s[3] = s[5]; - s[4] = s[6]; - s[5] = s[7]; - } while (i < h); - j += 8; - } while (j < w); - } -} - -void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - const int fo_horiz = filter_params_x->taps / 2 - 1; - const uint8_t *src_ptr = src - fo_horiz; - const int bits = FILTER_BITS - conv_params->round_0; - const __m128i round_0_const = - _mm_set1_epi32((1 << conv_params->round_0) >> 1); - const __m128i round_const = _mm_set1_epi32((1 << bits) >> 1); - const __m128i round_0_shift = _mm_cvtsi32_si128(conv_params->round_0); - const __m128i round_shift = _mm_cvtsi32_si128(bits); - __m128i coeffs[4]; - - (void)filter_params_y; - (void)subpel_y_q4; - - assert(bits >= 0); - assert((FILTER_BITS - conv_params->round_1) >= 0 || - ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS)); - - prepare_coeffs(filter_params_x, subpel_x_q4, coeffs); - - if (w <= 4) { - do { - const __m128i data = _mm_loadu_si128((__m128i *)src_ptr); - __m128i s[4]; - - s[0] = _mm_unpacklo_epi8(data, _mm_srli_si128(data, 1)); - s[1] = - _mm_unpacklo_epi8(_mm_srli_si128(data, 2), _mm_srli_si128(data, 3)); - s[2] = - _mm_unpacklo_epi8(_mm_srli_si128(data, 4), _mm_srli_si128(data, 5)); - s[3] = - _mm_unpacklo_epi8(_mm_srli_si128(data, 6), _mm_srli_si128(data, 7)); - const __m128i res_lo = convolve_lo_x(s, coeffs); - __m128i res_lo_round = - _mm_sra_epi32(_mm_add_epi32(res_lo, round_0_const), round_0_shift); - res_lo_round = - _mm_sra_epi32(_mm_add_epi32(res_lo_round, round_const), round_shift); - - const __m128i res16 = _mm_packs_epi32(res_lo_round, res_lo_round); - const __m128i res = _mm_packus_epi16(res16, res16); - - uint32_t r = _mm_cvtsi128_si32(res); - if (w == 2) - *(uint16_t *)dst = r; - else - *(uint32_t *)dst = r; - - src_ptr += src_stride; - dst += dst_stride; - } while (--h); - } else { - assert(!(w % 8)); - int i = 0; - do { - int j = 0; - do { - const __m128i data = - _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]); - __m128i s[4]; - - // Filter even-index pixels - s[0] = data; - s[1] = _mm_srli_si128(data, 2); - s[2] = _mm_srli_si128(data, 4); - s[3] = _mm_srli_si128(data, 6); - const __m128i res_even = convolve_lo_x(s, coeffs); - - // Filter odd-index pixels - s[0] = _mm_srli_si128(data, 1); - s[1] = _mm_srli_si128(data, 3); - s[2] = _mm_srli_si128(data, 5); - s[3] = _mm_srli_si128(data, 7); - const __m128i res_odd = convolve_lo_x(s, coeffs); - - // Rearrange pixels back into the order 0 ... 7 - const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd); - const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd); - __m128i res_lo_round = - _mm_sra_epi32(_mm_add_epi32(res_lo, round_0_const), round_0_shift); - res_lo_round = _mm_sra_epi32(_mm_add_epi32(res_lo_round, round_const), - round_shift); - __m128i res_hi_round = - _mm_sra_epi32(_mm_add_epi32(res_hi, round_0_const), round_0_shift); - res_hi_round = _mm_sra_epi32(_mm_add_epi32(res_hi_round, round_const), - round_shift); - - const __m128i res16 = _mm_packs_epi32(res_lo_round, res_hi_round); - const __m128i res = _mm_packus_epi16(res16, res16); - - _mm_storel_epi64((__m128i *)(dst + i * dst_stride + j), res); - j += 8; - } while (j < w); - } while (++i < h); - } -} diff --git a/third_party/aom/av1/common/x86/filterintra_sse4.c b/third_party/aom/av1/common/x86/filterintra_sse4.c deleted file mode 100644 index c11edc1d4..000000000 --- a/third_party/aom/av1/common/x86/filterintra_sse4.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <smmintrin.h> - -#include "config/av1_rtcd.h" - -#include "aom_dsp/x86/synonyms.h" -#include "av1/common/enums.h" -#include "av1/common/reconintra.h" - -void av1_filter_intra_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, - TX_SIZE tx_size, const uint8_t *above, - const uint8_t *left, int mode) { - int r, c; - uint8_t buffer[33][33]; - const int bw = tx_size_wide[tx_size]; - const int bh = tx_size_high[tx_size]; - - assert(bw <= 32 && bh <= 32); - - // The initialization is just for silencing Jenkins static analysis warnings - for (r = 0; r < bh + 1; ++r) - memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0])); - - for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r]; - memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t)); - - const __m128i f1f0 = xx_load_128(av1_filter_intra_taps[mode][0]); - const __m128i f3f2 = xx_load_128(av1_filter_intra_taps[mode][2]); - const __m128i f5f4 = xx_load_128(av1_filter_intra_taps[mode][4]); - const __m128i f7f6 = xx_load_128(av1_filter_intra_taps[mode][6]); - const __m128i filter_intra_scale_bits = - _mm_set1_epi16(1 << (15 - FILTER_INTRA_SCALE_BITS)); - - for (r = 1; r < bh + 1; r += 2) { - for (c = 1; c < bw + 1; c += 4) { - DECLARE_ALIGNED(16, uint8_t, p[8]); - memcpy(p, &buffer[r - 1][c - 1], 5 * sizeof(uint8_t)); - p[5] = buffer[r][c - 1]; - p[6] = buffer[r + 1][c - 1]; - p[7] = 0; - const __m128i p_b = xx_loadl_64(p); - const __m128i in = _mm_unpacklo_epi64(p_b, p_b); - const __m128i out_01 = _mm_maddubs_epi16(in, f1f0); - const __m128i out_23 = _mm_maddubs_epi16(in, f3f2); - const __m128i out_45 = _mm_maddubs_epi16(in, f5f4); - const __m128i out_67 = _mm_maddubs_epi16(in, f7f6); - const __m128i out_0123 = _mm_hadd_epi16(out_01, out_23); - const __m128i out_4567 = _mm_hadd_epi16(out_45, out_67); - const __m128i out_01234567 = _mm_hadd_epi16(out_0123, out_4567); - // Rounding - const __m128i round_w = - _mm_mulhrs_epi16(out_01234567, filter_intra_scale_bits); - const __m128i out_r = _mm_packus_epi16(round_w, round_w); - const __m128i out_r1 = _mm_srli_si128(out_r, 4); - // Storing - xx_storel_32(&buffer[r][c], out_r); - xx_storel_32(&buffer[r + 1][c], out_r1); - } - } - - for (r = 0; r < bh; ++r) { - memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t)); - dst += stride; - } -} diff --git a/third_party/aom/av1/common/x86/highbd_convolve_2d_avx2.c b/third_party/aom/av1/common/x86/highbd_convolve_2d_avx2.c deleted file mode 100644 index ae68f0bbb..000000000 --- a/third_party/aom/av1/common/x86/highbd_convolve_2d_avx2.c +++ /dev/null @@ -1,326 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <immintrin.h> -#include <assert.h> - -#include "config/aom_dsp_rtcd.h" - -#include "aom_dsp/x86/convolve_avx2.h" -#include "aom_dsp/x86/synonyms.h" -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/aom_filter.h" -#include "av1/common/convolve.h" - -void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, - uint16_t *dst, int dst_stride, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, - const int subpel_y_q4, - ConvolveParams *conv_params, int bd) { - DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]); - int im_h = h + filter_params_y->taps - 1; - int im_stride = 8; - int i, j; - const int fo_vert = filter_params_y->taps / 2 - 1; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz; - - // Check that, even with 12-bit input, the intermediate values will fit - // into an unsigned 16-bit intermediate array. - assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16); - - __m256i s[8], coeffs_y[4], coeffs_x[4]; - - const __m256i round_const_x = _mm256_set1_epi32( - ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1))); - const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0); - - const __m256i round_const_y = _mm256_set1_epi32( - ((1 << conv_params->round_1) >> 1) - - (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1))); - const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1); - - const int bits = - FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1; - const __m128i round_shift_bits = _mm_cvtsi32_si128(bits); - const __m256i round_const_bits = _mm256_set1_epi32((1 << bits) >> 1); - const __m256i clip_pixel = - _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255)); - const __m256i zero = _mm256_setzero_si256(); - - prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x); - prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y); - - for (j = 0; j < w; j += 8) { - /* Horizontal filter */ - { - for (i = 0; i < im_h; i += 2) { - const __m256i row0 = - _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]); - __m256i row1 = _mm256_set1_epi16(0); - if (i + 1 < im_h) - row1 = - _mm256_loadu_si256((__m256i *)&src_ptr[(i + 1) * src_stride + j]); - - const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20); - const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31); - - // even pixels - s[0] = _mm256_alignr_epi8(r1, r0, 0); - s[1] = _mm256_alignr_epi8(r1, r0, 4); - s[2] = _mm256_alignr_epi8(r1, r0, 8); - s[3] = _mm256_alignr_epi8(r1, r0, 12); - - __m256i res_even = convolve(s, coeffs_x); - res_even = _mm256_sra_epi32(_mm256_add_epi32(res_even, round_const_x), - round_shift_x); - - // odd pixels - s[0] = _mm256_alignr_epi8(r1, r0, 2); - s[1] = _mm256_alignr_epi8(r1, r0, 6); - s[2] = _mm256_alignr_epi8(r1, r0, 10); - s[3] = _mm256_alignr_epi8(r1, r0, 14); - - __m256i res_odd = convolve(s, coeffs_x); - res_odd = _mm256_sra_epi32(_mm256_add_epi32(res_odd, round_const_x), - round_shift_x); - - __m256i res_even1 = _mm256_packs_epi32(res_even, res_even); - __m256i res_odd1 = _mm256_packs_epi32(res_odd, res_odd); - __m256i res = _mm256_unpacklo_epi16(res_even1, res_odd1); - - _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); - } - } - - /* Vertical filter */ - { - __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); - __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); - __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); - __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); - __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); - __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride)); - - s[0] = _mm256_unpacklo_epi16(s0, s1); - s[1] = _mm256_unpacklo_epi16(s2, s3); - s[2] = _mm256_unpacklo_epi16(s4, s5); - - s[4] = _mm256_unpackhi_epi16(s0, s1); - s[5] = _mm256_unpackhi_epi16(s2, s3); - s[6] = _mm256_unpackhi_epi16(s4, s5); - - for (i = 0; i < h; i += 2) { - const int16_t *data = &im_block[i * im_stride]; - - const __m256i s6 = - _mm256_loadu_si256((__m256i *)(data + 6 * im_stride)); - const __m256i s7 = - _mm256_loadu_si256((__m256i *)(data + 7 * im_stride)); - - s[3] = _mm256_unpacklo_epi16(s6, s7); - s[7] = _mm256_unpackhi_epi16(s6, s7); - - const __m256i res_a = convolve(s, coeffs_y); - __m256i res_a_round = _mm256_sra_epi32( - _mm256_add_epi32(res_a, round_const_y), round_shift_y); - - res_a_round = _mm256_sra_epi32( - _mm256_add_epi32(res_a_round, round_const_bits), round_shift_bits); - - if (w - j > 4) { - const __m256i res_b = convolve(s + 4, coeffs_y); - __m256i res_b_round = _mm256_sra_epi32( - _mm256_add_epi32(res_b, round_const_y), round_shift_y); - res_b_round = - _mm256_sra_epi32(_mm256_add_epi32(res_b_round, round_const_bits), - round_shift_bits); - - __m256i res_16bit = _mm256_packs_epi32(res_a_round, res_b_round); - res_16bit = _mm256_min_epi16(res_16bit, clip_pixel); - res_16bit = _mm256_max_epi16(res_16bit, zero); - - _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j], - _mm256_castsi256_si128(res_16bit)); - _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j + dst_stride], - _mm256_extracti128_si256(res_16bit, 1)); - } else if (w == 4) { - res_a_round = _mm256_packs_epi32(res_a_round, res_a_round); - res_a_round = _mm256_min_epi16(res_a_round, clip_pixel); - res_a_round = _mm256_max_epi16(res_a_round, zero); - - _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], - _mm256_castsi256_si128(res_a_round)); - _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j + dst_stride], - _mm256_extracti128_si256(res_a_round, 1)); - } else { - res_a_round = _mm256_packs_epi32(res_a_round, res_a_round); - res_a_round = _mm256_min_epi16(res_a_round, clip_pixel); - res_a_round = _mm256_max_epi16(res_a_round, zero); - - xx_storel_32((__m128i *)&dst[i * dst_stride + j], - _mm256_castsi256_si128(res_a_round)); - xx_storel_32((__m128i *)&dst[i * dst_stride + j + dst_stride], - _mm256_extracti128_si256(res_a_round, 1)); - } - - s[0] = s[1]; - s[1] = s[2]; - s[2] = s[3]; - - s[4] = s[5]; - s[5] = s[6]; - s[6] = s[7]; - } - } - } -} - -static INLINE void copy_64(const uint16_t *src, uint16_t *dst) { - __m256i s[4]; - s[0] = _mm256_loadu_si256((__m256i *)(src + 0 * 16)); - s[1] = _mm256_loadu_si256((__m256i *)(src + 1 * 16)); - s[2] = _mm256_loadu_si256((__m256i *)(src + 2 * 16)); - s[3] = _mm256_loadu_si256((__m256i *)(src + 3 * 16)); - _mm256_storeu_si256((__m256i *)(dst + 0 * 16), s[0]); - _mm256_storeu_si256((__m256i *)(dst + 1 * 16), s[1]); - _mm256_storeu_si256((__m256i *)(dst + 2 * 16), s[2]); - _mm256_storeu_si256((__m256i *)(dst + 3 * 16), s[3]); -} - -static INLINE void copy_128(const uint16_t *src, uint16_t *dst) { - __m256i s[8]; - s[0] = _mm256_loadu_si256((__m256i *)(src + 0 * 16)); - s[1] = _mm256_loadu_si256((__m256i *)(src + 1 * 16)); - s[2] = _mm256_loadu_si256((__m256i *)(src + 2 * 16)); - s[3] = _mm256_loadu_si256((__m256i *)(src + 3 * 16)); - s[4] = _mm256_loadu_si256((__m256i *)(src + 4 * 16)); - s[5] = _mm256_loadu_si256((__m256i *)(src + 5 * 16)); - s[6] = _mm256_loadu_si256((__m256i *)(src + 6 * 16)); - s[7] = _mm256_loadu_si256((__m256i *)(src + 7 * 16)); - - _mm256_storeu_si256((__m256i *)(dst + 0 * 16), s[0]); - _mm256_storeu_si256((__m256i *)(dst + 1 * 16), s[1]); - _mm256_storeu_si256((__m256i *)(dst + 2 * 16), s[2]); - _mm256_storeu_si256((__m256i *)(dst + 3 * 16), s[3]); - _mm256_storeu_si256((__m256i *)(dst + 4 * 16), s[4]); - _mm256_storeu_si256((__m256i *)(dst + 5 * 16), s[5]); - _mm256_storeu_si256((__m256i *)(dst + 6 * 16), s[6]); - _mm256_storeu_si256((__m256i *)(dst + 7 * 16), s[7]); -} - -void av1_highbd_convolve_2d_copy_sr_avx2( - const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, - int h, const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, const int subpel_x_q4, - const int subpel_y_q4, ConvolveParams *conv_params, int bd) { - (void)filter_params_x; - (void)filter_params_y; - (void)subpel_x_q4; - (void)subpel_y_q4; - (void)conv_params; - (void)bd; - - if (w >= 16) { - assert(!((intptr_t)dst % 16)); - assert(!(dst_stride % 16)); - } - - if (w == 2) { - do { - memcpy(dst, src, 2 * sizeof(*src)); - src += src_stride; - dst += dst_stride; - memcpy(dst, src, 2 * sizeof(*src)); - src += src_stride; - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 4) { - do { - __m128i s[2]; - s[0] = _mm_loadl_epi64((__m128i *)src); - src += src_stride; - s[1] = _mm_loadl_epi64((__m128i *)src); - src += src_stride; - _mm_storel_epi64((__m128i *)dst, s[0]); - dst += dst_stride; - _mm_storel_epi64((__m128i *)dst, s[1]); - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 8) { - do { - __m128i s[2]; - s[0] = _mm_loadu_si128((__m128i *)src); - src += src_stride; - s[1] = _mm_loadu_si128((__m128i *)src); - src += src_stride; - _mm_store_si128((__m128i *)dst, s[0]); - dst += dst_stride; - _mm_store_si128((__m128i *)dst, s[1]); - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 16) { - do { - __m256i s[2]; - s[0] = _mm256_loadu_si256((__m256i *)src); - src += src_stride; - s[1] = _mm256_loadu_si256((__m256i *)src); - src += src_stride; - _mm256_storeu_si256((__m256i *)dst, s[0]); - dst += dst_stride; - _mm256_storeu_si256((__m256i *)dst, s[1]); - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 32) { - do { - __m256i s[4]; - s[0] = _mm256_loadu_si256((__m256i *)(src + 0 * 16)); - s[1] = _mm256_loadu_si256((__m256i *)(src + 1 * 16)); - src += src_stride; - s[2] = _mm256_loadu_si256((__m256i *)(src + 0 * 16)); - s[3] = _mm256_loadu_si256((__m256i *)(src + 1 * 16)); - src += src_stride; - _mm256_storeu_si256((__m256i *)(dst + 0 * 16), s[0]); - _mm256_storeu_si256((__m256i *)(dst + 1 * 16), s[1]); - dst += dst_stride; - _mm256_storeu_si256((__m256i *)(dst + 0 * 16), s[2]); - _mm256_storeu_si256((__m256i *)(dst + 1 * 16), s[3]); - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 64) { - do { - copy_64(src, dst); - src += src_stride; - dst += dst_stride; - copy_64(src, dst); - src += src_stride; - dst += dst_stride; - h -= 2; - } while (h); - } else { - do { - copy_128(src, dst); - src += src_stride; - dst += dst_stride; - copy_128(src, dst); - src += src_stride; - dst += dst_stride; - h -= 2; - } while (h); - } -} diff --git a/third_party/aom/av1/common/x86/highbd_convolve_2d_sse2.c b/third_party/aom/av1/common/x86/highbd_convolve_2d_sse2.c deleted file mode 100644 index 15f8872c1..000000000 --- a/third_party/aom/av1/common/x86/highbd_convolve_2d_sse2.c +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#include <emmintrin.h> -#include <assert.h> - -#include "config/aom_dsp_rtcd.h" - -#include "aom_dsp/aom_filter.h" - -static INLINE void copy_64(const uint16_t *src, uint16_t *dst) { - __m128i s[8]; - s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 8)); - s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 8)); - s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 8)); - s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 8)); - s[4] = _mm_loadu_si128((__m128i *)(src + 4 * 8)); - s[5] = _mm_loadu_si128((__m128i *)(src + 5 * 8)); - s[6] = _mm_loadu_si128((__m128i *)(src + 6 * 8)); - s[7] = _mm_loadu_si128((__m128i *)(src + 7 * 8)); - _mm_store_si128((__m128i *)(dst + 0 * 8), s[0]); - _mm_store_si128((__m128i *)(dst + 1 * 8), s[1]); - _mm_store_si128((__m128i *)(dst + 2 * 8), s[2]); - _mm_store_si128((__m128i *)(dst + 3 * 8), s[3]); - _mm_store_si128((__m128i *)(dst + 4 * 8), s[4]); - _mm_store_si128((__m128i *)(dst + 5 * 8), s[5]); - _mm_store_si128((__m128i *)(dst + 6 * 8), s[6]); - _mm_store_si128((__m128i *)(dst + 7 * 8), s[7]); -} - -static INLINE void copy_128(const uint16_t *src, uint16_t *dst) { - __m128i s[16]; - s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 8)); - s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 8)); - s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 8)); - s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 8)); - s[4] = _mm_loadu_si128((__m128i *)(src + 4 * 8)); - s[5] = _mm_loadu_si128((__m128i *)(src + 5 * 8)); - s[6] = _mm_loadu_si128((__m128i *)(src + 6 * 8)); - s[7] = _mm_loadu_si128((__m128i *)(src + 7 * 8)); - s[8] = _mm_loadu_si128((__m128i *)(src + 8 * 8)); - s[9] = _mm_loadu_si128((__m128i *)(src + 9 * 8)); - s[10] = _mm_loadu_si128((__m128i *)(src + 10 * 8)); - s[11] = _mm_loadu_si128((__m128i *)(src + 11 * 8)); - s[12] = _mm_loadu_si128((__m128i *)(src + 12 * 8)); - s[13] = _mm_loadu_si128((__m128i *)(src + 13 * 8)); - s[14] = _mm_loadu_si128((__m128i *)(src + 14 * 8)); - s[15] = _mm_loadu_si128((__m128i *)(src + 15 * 8)); - _mm_store_si128((__m128i *)(dst + 0 * 8), s[0]); - _mm_store_si128((__m128i *)(dst + 1 * 8), s[1]); - _mm_store_si128((__m128i *)(dst + 2 * 8), s[2]); - _mm_store_si128((__m128i *)(dst + 3 * 8), s[3]); - _mm_store_si128((__m128i *)(dst + 4 * 8), s[4]); - _mm_store_si128((__m128i *)(dst + 5 * 8), s[5]); - _mm_store_si128((__m128i *)(dst + 6 * 8), s[6]); - _mm_store_si128((__m128i *)(dst + 7 * 8), s[7]); - _mm_store_si128((__m128i *)(dst + 8 * 8), s[8]); - _mm_store_si128((__m128i *)(dst + 9 * 8), s[9]); - _mm_store_si128((__m128i *)(dst + 10 * 8), s[10]); - _mm_store_si128((__m128i *)(dst + 11 * 8), s[11]); - _mm_store_si128((__m128i *)(dst + 12 * 8), s[12]); - _mm_store_si128((__m128i *)(dst + 13 * 8), s[13]); - _mm_store_si128((__m128i *)(dst + 14 * 8), s[14]); - _mm_store_si128((__m128i *)(dst + 15 * 8), s[15]); -} - -void av1_highbd_convolve_2d_copy_sr_sse2( - const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, - int h, const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, const int subpel_x_q4, - const int subpel_y_q4, ConvolveParams *conv_params, int bd) { - (void)filter_params_x; - (void)filter_params_y; - (void)subpel_x_q4; - (void)subpel_y_q4; - (void)conv_params; - (void)bd; - if (w >= 16) { - assert(!((intptr_t)dst % 16)); - assert(!(dst_stride % 16)); - } - - if (w == 2) { - do { - __m128i s = _mm_loadl_epi64((__m128i *)src); - *(uint32_t *)dst = _mm_cvtsi128_si32(s); - src += src_stride; - dst += dst_stride; - s = _mm_loadl_epi64((__m128i *)src); - *(uint32_t *)dst = _mm_cvtsi128_si32(s); - src += src_stride; - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 4) { - do { - __m128i s[2]; - s[0] = _mm_loadl_epi64((__m128i *)src); - src += src_stride; - s[1] = _mm_loadl_epi64((__m128i *)src); - src += src_stride; - _mm_storel_epi64((__m128i *)dst, s[0]); - dst += dst_stride; - _mm_storel_epi64((__m128i *)dst, s[1]); - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 8) { - do { - __m128i s[2]; - s[0] = _mm_loadu_si128((__m128i *)src); - src += src_stride; - s[1] = _mm_loadu_si128((__m128i *)src); - src += src_stride; - _mm_store_si128((__m128i *)dst, s[0]); - dst += dst_stride; - _mm_store_si128((__m128i *)dst, s[1]); - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 16) { - do { - __m128i s[4]; - s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 8)); - s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 8)); - src += src_stride; - s[2] = _mm_loadu_si128((__m128i *)(src + 0 * 8)); - s[3] = _mm_loadu_si128((__m128i *)(src + 1 * 8)); - src += src_stride; - _mm_store_si128((__m128i *)(dst + 0 * 8), s[0]); - _mm_store_si128((__m128i *)(dst + 1 * 8), s[1]); - dst += dst_stride; - _mm_store_si128((__m128i *)(dst + 0 * 8), s[2]); - _mm_store_si128((__m128i *)(dst + 1 * 8), s[3]); - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 32) { - do { - __m128i s[8]; - s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 8)); - s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 8)); - s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 8)); - s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 8)); - src += src_stride; - s[4] = _mm_loadu_si128((__m128i *)(src + 0 * 8)); - s[5] = _mm_loadu_si128((__m128i *)(src + 1 * 8)); - s[6] = _mm_loadu_si128((__m128i *)(src + 2 * 8)); - s[7] = _mm_loadu_si128((__m128i *)(src + 3 * 8)); - src += src_stride; - _mm_store_si128((__m128i *)(dst + 0 * 8), s[0]); - _mm_store_si128((__m128i *)(dst + 1 * 8), s[1]); - _mm_store_si128((__m128i *)(dst + 2 * 8), s[2]); - _mm_store_si128((__m128i *)(dst + 3 * 8), s[3]); - dst += dst_stride; - _mm_store_si128((__m128i *)(dst + 0 * 8), s[4]); - _mm_store_si128((__m128i *)(dst + 1 * 8), s[5]); - _mm_store_si128((__m128i *)(dst + 2 * 8), s[6]); - _mm_store_si128((__m128i *)(dst + 3 * 8), s[7]); - dst += dst_stride; - h -= 2; - } while (h); - } else if (w == 64) { - do { - copy_64(src, dst); - src += src_stride; - dst += dst_stride; - copy_64(src, dst); - src += src_stride; - dst += dst_stride; - h -= 2; - } while (h); - } else { - do { - copy_128(src, dst); - src += src_stride; - dst += dst_stride; - copy_128(src, dst); - src += src_stride; - dst += dst_stride; - h -= 2; - } while (h); - } -} diff --git a/third_party/aom/av1/common/x86/highbd_convolve_2d_sse4.c b/third_party/aom/av1/common/x86/highbd_convolve_2d_sse4.c deleted file mode 100644 index 3f8dafb4b..000000000 --- a/third_party/aom/av1/common/x86/highbd_convolve_2d_sse4.c +++ /dev/null @@ -1,420 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <tmmintrin.h> -#include <smmintrin.h> -#include <assert.h> - -#include "config/aom_dsp_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/aom_filter.h" -#include "aom_dsp/x86/convolve_sse2.h" -#include "aom_dsp/x86/convolve_sse4_1.h" -#include "av1/common/convolve.h" - -void av1_highbd_jnt_convolve_2d_copy_sse4_1( - const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w, - int h, const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, const int subpel_x_q4, - const int subpel_y_q4, ConvolveParams *conv_params, int bd) { - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - (void)filter_params_x; - (void)filter_params_y; - (void)subpel_x_q4; - (void)subpel_y_q4; - - const int bits = - FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0; - const __m128i left_shift = _mm_cvtsi32_si128(bits); - const int do_average = conv_params->do_average; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - const int w0 = conv_params->fwd_offset; - const int w1 = conv_params->bck_offset; - const __m128i wt0 = _mm_set1_epi32(w0); - const __m128i wt1 = _mm_set1_epi32(w1); - const __m128i zero = _mm_setzero_si128(); - int i, j; - - const int offset_0 = - bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset = (1 << offset_0) + (1 << (offset_0 - 1)); - const __m128i offset_const = _mm_set1_epi32(offset); - const __m128i offset_const_16b = _mm_set1_epi16(offset); - const int rounding_shift = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const __m128i rounding_const = _mm_set1_epi32((1 << rounding_shift) >> 1); - const __m128i clip_pixel_to_bd = - _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255)); - - assert(bits <= 4); - - if (!(w % 8)) { - for (i = 0; i < h; i += 1) { - for (j = 0; j < w; j += 8) { - const __m128i src_16bit = - _mm_loadu_si128((__m128i *)(&src[i * src_stride + j])); - const __m128i res = _mm_sll_epi16(src_16bit, left_shift); - if (do_average) { - const __m128i data_0 = - _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j])); - - const __m128i data_ref_0_lo = _mm_unpacklo_epi16(data_0, zero); - const __m128i data_ref_0_hi = _mm_unpackhi_epi16(data_0, zero); - - const __m128i res_32b_lo = _mm_unpacklo_epi16(res, zero); - const __m128i res_unsigned_lo = - _mm_add_epi32(res_32b_lo, offset_const); - - const __m128i comp_avg_res_lo = highbd_comp_avg_sse4_1( - &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg); - - const __m128i res_32b_hi = _mm_unpackhi_epi16(res, zero); - const __m128i res_unsigned_hi = - _mm_add_epi32(res_32b_hi, offset_const); - - const __m128i comp_avg_res_hi = highbd_comp_avg_sse4_1( - &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg); - - const __m128i round_result_lo = highbd_convolve_rounding_sse2( - &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift); - const __m128i round_result_hi = highbd_convolve_rounding_sse2( - &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift); - - const __m128i res_16b = - _mm_packus_epi32(round_result_lo, round_result_hi); - const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd); - - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip); - } else { - const __m128i res_unsigned_16b = - _mm_adds_epu16(res, offset_const_16b); - - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), - res_unsigned_16b); - } - } - } - } else if (!(w % 4)) { - for (i = 0; i < h; i += 2) { - for (j = 0; j < w; j += 4) { - const __m128i src_row_0 = - _mm_loadl_epi64((__m128i *)(&src[i * src_stride + j])); - const __m128i src_row_1 = - _mm_loadl_epi64((__m128i *)(&src[i * src_stride + j + src_stride])); - const __m128i src_10 = _mm_unpacklo_epi64(src_row_0, src_row_1); - - const __m128i res = _mm_sll_epi16(src_10, left_shift); - - if (do_average) { - const __m128i data_0 = - _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j])); - const __m128i data_1 = _mm_loadl_epi64( - (__m128i *)(&dst[i * dst_stride + j + dst_stride])); - - const __m128i data_ref_0 = _mm_unpacklo_epi16(data_0, zero); - const __m128i data_ref_1 = _mm_unpacklo_epi16(data_1, zero); - - const __m128i res_32b = _mm_unpacklo_epi16(res, zero); - const __m128i res_unsigned_lo = _mm_add_epi32(res_32b, offset_const); - - const __m128i res_32b_hi = _mm_unpackhi_epi16(res, zero); - const __m128i res_unsigned_hi = - _mm_add_epi32(res_32b_hi, offset_const); - - const __m128i comp_avg_res_lo = highbd_comp_avg_sse4_1( - &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg); - const __m128i comp_avg_res_hi = highbd_comp_avg_sse4_1( - &data_ref_1, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg); - - const __m128i round_result_lo = highbd_convolve_rounding_sse2( - &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift); - const __m128i round_result_hi = highbd_convolve_rounding_sse2( - &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift); - - const __m128i res_16b = - _mm_packus_epi32(round_result_lo, round_result_hi); - const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd); - - const __m128i res_1 = _mm_srli_si128(res_clip, 8); - - _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip); - _mm_storel_epi64( - (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1); - } else { - const __m128i res_unsigned_16b = - _mm_adds_epu16(res, offset_const_16b); - - const __m128i res_1 = _mm_srli_si128(res_unsigned_16b, 8); - - _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]), - res_unsigned_16b); - _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); - } - } - } - } -} - -void av1_highbd_jnt_convolve_2d_sse4_1( - const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w, - int h, const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, const int subpel_x_q4, - const int subpel_y_q4, ConvolveParams *conv_params, int bd) { - DECLARE_ALIGNED(16, int16_t, - im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]); - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - int im_h = h + filter_params_y->taps - 1; - int im_stride = MAX_SB_SIZE; - int i, j; - const int do_average = conv_params->do_average; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - const int fo_vert = filter_params_y->taps / 2 - 1; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz; - - const int w0 = conv_params->fwd_offset; - const int w1 = conv_params->bck_offset; - const __m128i wt0 = _mm_set1_epi32(w0); - const __m128i wt1 = _mm_set1_epi32(w1); - - const int offset_0 = - bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset = (1 << offset_0) + (1 << (offset_0 - 1)); - const __m128i offset_const = _mm_set1_epi32(offset); - const int rounding_shift = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const __m128i rounding_const = _mm_set1_epi32((1 << rounding_shift) >> 1); - const __m128i clip_pixel_to_bd = - _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255)); - - // Check that, even with 12-bit input, the intermediate values will fit - // into an unsigned 16-bit intermediate array. - assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16); - - /* Horizontal filter */ - { - const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( - filter_params_x, subpel_x_q4 & SUBPEL_MASK); - const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter); - - // coeffs 0 1 0 1 2 3 2 3 - const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x); - // coeffs 4 5 4 5 6 7 6 7 - const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x); - - // coeffs 0 1 0 1 0 1 0 1 - const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0); - // coeffs 2 3 2 3 2 3 2 3 - const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0); - // coeffs 4 5 4 5 4 5 4 5 - const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1); - // coeffs 6 7 6 7 6 7 6 7 - const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1); - - const __m128i round_const = _mm_set1_epi32( - ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1))); - const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0); - - for (i = 0; i < im_h; ++i) { - for (j = 0; j < w; j += 8) { - const __m128i data = - _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]); - const __m128i data2 = - _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j + 8]); - - // Filter even-index pixels - const __m128i res_0 = _mm_madd_epi16(data, coeff_01); - const __m128i res_2 = - _mm_madd_epi16(_mm_alignr_epi8(data2, data, 4), coeff_23); - const __m128i res_4 = - _mm_madd_epi16(_mm_alignr_epi8(data2, data, 8), coeff_45); - const __m128i res_6 = - _mm_madd_epi16(_mm_alignr_epi8(data2, data, 12), coeff_67); - - __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4), - _mm_add_epi32(res_2, res_6)); - res_even = - _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift); - - // Filter odd-index pixels - const __m128i res_1 = - _mm_madd_epi16(_mm_alignr_epi8(data2, data, 2), coeff_01); - const __m128i res_3 = - _mm_madd_epi16(_mm_alignr_epi8(data2, data, 6), coeff_23); - const __m128i res_5 = - _mm_madd_epi16(_mm_alignr_epi8(data2, data, 10), coeff_45); - const __m128i res_7 = - _mm_madd_epi16(_mm_alignr_epi8(data2, data, 14), coeff_67); - - __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5), - _mm_add_epi32(res_3, res_7)); - res_odd = - _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift); - - // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7 - __m128i res = _mm_packs_epi32(res_even, res_odd); - _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res); - } - } - } - - /* Vertical filter */ - { - const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( - filter_params_y, subpel_y_q4 & SUBPEL_MASK); - const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter); - - // coeffs 0 1 0 1 2 3 2 3 - const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y); - // coeffs 4 5 4 5 6 7 6 7 - const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y); - - // coeffs 0 1 0 1 0 1 0 1 - const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0); - // coeffs 2 3 2 3 2 3 2 3 - const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0); - // coeffs 4 5 4 5 4 5 4 5 - const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1); - // coeffs 6 7 6 7 6 7 6 7 - const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1); - - const __m128i round_const = _mm_set1_epi32( - ((1 << conv_params->round_1) >> 1) - - (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1))); - const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1); - - for (i = 0; i < h; ++i) { - for (j = 0; j < w; j += 8) { - // Filter even-index pixels - const int16_t *data = &im_block[i * im_stride + j]; - const __m128i src_0 = - _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride), - *(__m128i *)(data + 1 * im_stride)); - const __m128i src_2 = - _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride), - *(__m128i *)(data + 3 * im_stride)); - const __m128i src_4 = - _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride), - *(__m128i *)(data + 5 * im_stride)); - const __m128i src_6 = - _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride), - *(__m128i *)(data + 7 * im_stride)); - - const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01); - const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23); - const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); - const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67); - - const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2), - _mm_add_epi32(res_4, res_6)); - - // Filter odd-index pixels - const __m128i src_1 = - _mm_unpackhi_epi16(*(__m128i *)(data + 0 * im_stride), - *(__m128i *)(data + 1 * im_stride)); - const __m128i src_3 = - _mm_unpackhi_epi16(*(__m128i *)(data + 2 * im_stride), - *(__m128i *)(data + 3 * im_stride)); - const __m128i src_5 = - _mm_unpackhi_epi16(*(__m128i *)(data + 4 * im_stride), - *(__m128i *)(data + 5 * im_stride)); - const __m128i src_7 = - _mm_unpackhi_epi16(*(__m128i *)(data + 6 * im_stride), - *(__m128i *)(data + 7 * im_stride)); - - const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); - const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23); - const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45); - const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67); - - const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3), - _mm_add_epi32(res_5, res_7)); - - // Rearrange pixels back into the order 0 ... 7 - const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd); - const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd); - - const __m128i res_lo_round = - _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift); - - const __m128i res_unsigned_lo = - _mm_add_epi32(res_lo_round, offset_const); - - if (w < 8) { - if (do_average) { - const __m128i data_0 = - _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j])); - - const __m128i data_ref_0 = _mm_cvtepu16_epi32(data_0); - - const __m128i comp_avg_res = highbd_comp_avg_sse4_1( - &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg); - - const __m128i round_result = highbd_convolve_rounding_sse2( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m128i res_16b = - _mm_packus_epi32(round_result, round_result); - const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd); - - _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip); - } else { - const __m128i res_16b = - _mm_packus_epi32(res_unsigned_lo, res_unsigned_lo); - _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]), res_16b); - } - } else { - const __m128i res_hi_round = - _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift); - - const __m128i res_unsigned_hi = - _mm_add_epi32(res_hi_round, offset_const); - - if (do_average) { - const __m128i data_lo = - _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j])); - const __m128i data_hi = - _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j + 4])); - - const __m128i data_ref_0_lo = _mm_cvtepu16_epi32(data_lo); - const __m128i data_ref_0_hi = _mm_cvtepu16_epi32(data_hi); - - const __m128i comp_avg_res_lo = highbd_comp_avg_sse4_1( - &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg); - const __m128i comp_avg_res_hi = highbd_comp_avg_sse4_1( - &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg); - - const __m128i round_result_lo = - highbd_convolve_rounding_sse2(&comp_avg_res_lo, &offset_const, - &rounding_const, rounding_shift); - const __m128i round_result_hi = - highbd_convolve_rounding_sse2(&comp_avg_res_hi, &offset_const, - &rounding_const, rounding_shift); - - const __m128i res_16b = - _mm_packus_epi32(round_result_lo, round_result_hi); - const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd); - - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip); - } else { - const __m128i res_16b = - _mm_packus_epi32(res_unsigned_lo, res_unsigned_hi); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_16b); - } - } - } - } - } -} diff --git a/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c b/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c deleted file mode 100644 index 1d029db39..000000000 --- a/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <tmmintrin.h> -#include <assert.h> - -#include "config/aom_dsp_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/aom_filter.h" -#include "aom_dsp/x86/convolve_sse2.h" -#include "av1/common/convolve.h" - -void av1_highbd_convolve_2d_sr_ssse3( - const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, - int h, const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, const int subpel_x_q4, - const int subpel_y_q4, ConvolveParams *conv_params, int bd) { - DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]); - int im_h = h + filter_params_y->taps - 1; - int im_stride = 8; - int i, j; - const int fo_vert = filter_params_y->taps / 2 - 1; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz; - - // Check that, even with 12-bit input, the intermediate values will fit - // into an unsigned 16-bit intermediate array. - assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16); - __m128i coeffs_x[4], coeffs_y[4], s[16]; - - const __m128i round_const_x = _mm_set1_epi32( - ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1))); - const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0); - - const __m128i round_const_y = - _mm_set1_epi32(((1 << conv_params->round_1) >> 1) - - (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1))); - const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1); - - const int bits = - FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1; - const __m128i round_shift_bits = _mm_cvtsi32_si128(bits); - const __m128i round_const_bits = _mm_set1_epi32((1 << bits) >> 1); - const __m128i clip_pixel = - _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255)); - const __m128i zero = _mm_setzero_si128(); - - prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x); - prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y); - - for (j = 0; j < w; j += 8) { - /* Horizontal filter */ - { - for (i = 0; i < im_h; i += 1) { - const __m128i row00 = - _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]); - const __m128i row01 = - _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + (j + 8)]); - - // even pixels - s[0] = _mm_alignr_epi8(row01, row00, 0); - s[1] = _mm_alignr_epi8(row01, row00, 4); - s[2] = _mm_alignr_epi8(row01, row00, 8); - s[3] = _mm_alignr_epi8(row01, row00, 12); - - __m128i res_even = convolve(s, coeffs_x); - res_even = _mm_sra_epi32(_mm_add_epi32(res_even, round_const_x), - round_shift_x); - - // odd pixels - s[0] = _mm_alignr_epi8(row01, row00, 2); - s[1] = _mm_alignr_epi8(row01, row00, 6); - s[2] = _mm_alignr_epi8(row01, row00, 10); - s[3] = _mm_alignr_epi8(row01, row00, 14); - - __m128i res_odd = convolve(s, coeffs_x); - res_odd = - _mm_sra_epi32(_mm_add_epi32(res_odd, round_const_x), round_shift_x); - - __m128i res_even1 = _mm_packs_epi32(res_even, res_even); - __m128i res_odd1 = _mm_packs_epi32(res_odd, res_odd); - __m128i res = _mm_unpacklo_epi16(res_even1, res_odd1); - - _mm_store_si128((__m128i *)&im_block[i * im_stride], res); - } - } - /* Vertical filter */ - { - __m128i s0 = _mm_loadu_si128((__m128i *)(im_block + 0 * im_stride)); - __m128i s1 = _mm_loadu_si128((__m128i *)(im_block + 1 * im_stride)); - __m128i s2 = _mm_loadu_si128((__m128i *)(im_block + 2 * im_stride)); - __m128i s3 = _mm_loadu_si128((__m128i *)(im_block + 3 * im_stride)); - __m128i s4 = _mm_loadu_si128((__m128i *)(im_block + 4 * im_stride)); - __m128i s5 = _mm_loadu_si128((__m128i *)(im_block + 5 * im_stride)); - __m128i s6 = _mm_loadu_si128((__m128i *)(im_block + 6 * im_stride)); - - s[0] = _mm_unpacklo_epi16(s0, s1); - s[1] = _mm_unpacklo_epi16(s2, s3); - s[2] = _mm_unpacklo_epi16(s4, s5); - - s[4] = _mm_unpackhi_epi16(s0, s1); - s[5] = _mm_unpackhi_epi16(s2, s3); - s[6] = _mm_unpackhi_epi16(s4, s5); - - s[0 + 8] = _mm_unpacklo_epi16(s1, s2); - s[1 + 8] = _mm_unpacklo_epi16(s3, s4); - s[2 + 8] = _mm_unpacklo_epi16(s5, s6); - - s[4 + 8] = _mm_unpackhi_epi16(s1, s2); - s[5 + 8] = _mm_unpackhi_epi16(s3, s4); - s[6 + 8] = _mm_unpackhi_epi16(s5, s6); - - for (i = 0; i < h; i += 2) { - const int16_t *data = &im_block[i * im_stride]; - - __m128i s7 = _mm_loadu_si128((__m128i *)(data + 7 * im_stride)); - __m128i s8 = _mm_loadu_si128((__m128i *)(data + 8 * im_stride)); - - s[3] = _mm_unpacklo_epi16(s6, s7); - s[7] = _mm_unpackhi_epi16(s6, s7); - - s[3 + 8] = _mm_unpacklo_epi16(s7, s8); - s[7 + 8] = _mm_unpackhi_epi16(s7, s8); - - const __m128i res_a0 = convolve(s, coeffs_y); - __m128i res_a_round0 = - _mm_sra_epi32(_mm_add_epi32(res_a0, round_const_y), round_shift_y); - res_a_round0 = _mm_sra_epi32( - _mm_add_epi32(res_a_round0, round_const_bits), round_shift_bits); - - const __m128i res_a1 = convolve(s + 8, coeffs_y); - __m128i res_a_round1 = - _mm_sra_epi32(_mm_add_epi32(res_a1, round_const_y), round_shift_y); - res_a_round1 = _mm_sra_epi32( - _mm_add_epi32(res_a_round1, round_const_bits), round_shift_bits); - - if (w - j > 4) { - const __m128i res_b0 = convolve(s + 4, coeffs_y); - __m128i res_b_round0 = _mm_sra_epi32( - _mm_add_epi32(res_b0, round_const_y), round_shift_y); - res_b_round0 = _mm_sra_epi32( - _mm_add_epi32(res_b_round0, round_const_bits), round_shift_bits); - - const __m128i res_b1 = convolve(s + 4 + 8, coeffs_y); - __m128i res_b_round1 = _mm_sra_epi32( - _mm_add_epi32(res_b1, round_const_y), round_shift_y); - res_b_round1 = _mm_sra_epi32( - _mm_add_epi32(res_b_round1, round_const_bits), round_shift_bits); - - __m128i res_16bit0 = _mm_packs_epi32(res_a_round0, res_b_round0); - res_16bit0 = _mm_min_epi16(res_16bit0, clip_pixel); - res_16bit0 = _mm_max_epi16(res_16bit0, zero); - - __m128i res_16bit1 = _mm_packs_epi32(res_a_round1, res_b_round1); - res_16bit1 = _mm_min_epi16(res_16bit1, clip_pixel); - res_16bit1 = _mm_max_epi16(res_16bit1, zero); - - _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j], res_16bit0); - _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j + dst_stride], - res_16bit1); - } else if (w == 4) { - res_a_round0 = _mm_packs_epi32(res_a_round0, res_a_round0); - res_a_round0 = _mm_min_epi16(res_a_round0, clip_pixel); - res_a_round0 = _mm_max_epi16(res_a_round0, zero); - - res_a_round1 = _mm_packs_epi32(res_a_round1, res_a_round1); - res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel); - res_a_round1 = _mm_max_epi16(res_a_round1, zero); - - _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res_a_round0); - _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j + dst_stride], - res_a_round1); - } else { - res_a_round0 = _mm_packs_epi32(res_a_round0, res_a_round0); - res_a_round0 = _mm_min_epi16(res_a_round0, clip_pixel); - res_a_round0 = _mm_max_epi16(res_a_round0, zero); - - res_a_round1 = _mm_packs_epi32(res_a_round1, res_a_round1); - res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel); - res_a_round1 = _mm_max_epi16(res_a_round1, zero); - - *((uint32_t *)(&dst[i * dst_stride + j])) = - _mm_cvtsi128_si32(res_a_round0); - - *((uint32_t *)(&dst[i * dst_stride + j + dst_stride])) = - _mm_cvtsi128_si32(res_a_round1); - } - s[0] = s[1]; - s[1] = s[2]; - s[2] = s[3]; - - s[4] = s[5]; - s[5] = s[6]; - s[6] = s[7]; - - s[0 + 8] = s[1 + 8]; - s[1 + 8] = s[2 + 8]; - s[2 + 8] = s[3 + 8]; - - s[4 + 8] = s[5 + 8]; - s[5 + 8] = s[6 + 8]; - s[6 + 8] = s[7 + 8]; - - s6 = s8; - } - } - } -} diff --git a/third_party/aom/av1/common/x86/highbd_inv_txfm_avx2.c b/third_party/aom/av1/common/x86/highbd_inv_txfm_avx2.c deleted file mode 100644 index ade2af03e..000000000 --- a/third_party/aom/av1/common/x86/highbd_inv_txfm_avx2.c +++ /dev/null @@ -1,1349 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#include <assert.h> -#include <immintrin.h> - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" - -#include "av1/common/av1_inv_txfm1d_cfg.h" -#include "av1/common/idct.h" -#include "av1/common/x86/av1_inv_txfm_ssse3.h" -#include "av1/common/x86/highbd_txfm_utility_sse4.h" - -// Note: -// Total 32x4 registers to represent 32x32 block coefficients. -// For high bit depth, each coefficient is 4-byte. -// Each __m256i register holds 8 coefficients. -// So each "row" we needs 4 register. Totally 32 rows -// Register layout: -// v0, v1, v2, v3, -// v4, v5, v6, v7, -// ... ... -// v124, v125, v126, v127 - -static INLINE __m256i highbd_clamp_epi16_avx2(__m256i u, int bd) { - const __m256i zero = _mm256_setzero_si256(); - const __m256i one = _mm256_set1_epi16(1); - const __m256i max = _mm256_sub_epi16(_mm256_slli_epi16(one, bd), one); - __m256i clamped, mask; - - mask = _mm256_cmpgt_epi16(u, max); - clamped = _mm256_andnot_si256(mask, u); - mask = _mm256_and_si256(mask, max); - clamped = _mm256_or_si256(mask, clamped); - mask = _mm256_cmpgt_epi16(clamped, zero); - clamped = _mm256_and_si256(clamped, mask); - - return clamped; -} - -static INLINE __m256i highbd_get_recon_16x8_avx2(const __m256i pred, - __m256i res0, __m256i res1, - const int bd) { - __m256i x0 = _mm256_cvtepi16_epi32(_mm256_castsi256_si128(pred)); - __m256i x1 = _mm256_cvtepi16_epi32(_mm256_extractf128_si256(pred, 1)); - - x0 = _mm256_add_epi32(res0, x0); - x1 = _mm256_add_epi32(res1, x1); - x0 = _mm256_packus_epi32(x0, x1); - x0 = _mm256_permute4x64_epi64(x0, 0xd8); - x0 = highbd_clamp_epi16_avx2(x0, bd); - return x0; -} - -static INLINE void highbd_write_buffer_16xn_avx2(__m256i *in, uint16_t *output, - int stride, int flipud, - int height, const int bd) { - int j = flipud ? (height - 1) : 0; - const int step = flipud ? -1 : 1; - for (int i = 0; i < height; ++i, j += step) { - __m256i v = _mm256_loadu_si256((__m256i const *)(output + i * stride)); - __m256i u = highbd_get_recon_16x8_avx2(v, in[j], in[j + height], bd); - - _mm256_storeu_si256((__m256i *)(output + i * stride), u); - } -} - -static INLINE __m256i av1_round_shift_32_avx2(__m256i vec, int bit) { - __m256i tmp, round; - round = _mm256_set1_epi32(1 << (bit - 1)); - tmp = _mm256_add_epi32(vec, round); - return _mm256_srai_epi32(tmp, bit); -} - -static INLINE void av1_round_shift_array_32_avx2(__m256i *input, - __m256i *output, - const int size, - const int bit) { - if (bit > 0) { - int i; - for (i = 0; i < size; i++) { - output[i] = av1_round_shift_32_avx2(input[i], bit); - } - } else { - int i; - for (i = 0; i < size; i++) { - output[i] = _mm256_slli_epi32(input[i], -bit); - } - } -} - -static void transpose_8x8_avx2(const __m256i *in, __m256i *out) { - __m256i u0, u1, u2, u3, u4, u5, u6, u7; - __m256i x0, x1; - - u0 = _mm256_unpacklo_epi32(in[0], in[1]); - u1 = _mm256_unpackhi_epi32(in[0], in[1]); - - u2 = _mm256_unpacklo_epi32(in[2], in[3]); - u3 = _mm256_unpackhi_epi32(in[2], in[3]); - - u4 = _mm256_unpacklo_epi32(in[4], in[5]); - u5 = _mm256_unpackhi_epi32(in[4], in[5]); - - u6 = _mm256_unpacklo_epi32(in[6], in[7]); - u7 = _mm256_unpackhi_epi32(in[6], in[7]); - - x0 = _mm256_unpacklo_epi64(u0, u2); - x1 = _mm256_unpacklo_epi64(u4, u6); - out[0] = _mm256_permute2f128_si256(x0, x1, 0x20); - out[4] = _mm256_permute2f128_si256(x0, x1, 0x31); - - x0 = _mm256_unpackhi_epi64(u0, u2); - x1 = _mm256_unpackhi_epi64(u4, u6); - out[1] = _mm256_permute2f128_si256(x0, x1, 0x20); - out[5] = _mm256_permute2f128_si256(x0, x1, 0x31); - - x0 = _mm256_unpacklo_epi64(u1, u3); - x1 = _mm256_unpacklo_epi64(u5, u7); - out[2] = _mm256_permute2f128_si256(x0, x1, 0x20); - out[6] = _mm256_permute2f128_si256(x0, x1, 0x31); - - x0 = _mm256_unpackhi_epi64(u1, u3); - x1 = _mm256_unpackhi_epi64(u5, u7); - out[3] = _mm256_permute2f128_si256(x0, x1, 0x20); - out[7] = _mm256_permute2f128_si256(x0, x1, 0x31); -} - -static void load_buffer_32x32(const int32_t *coeff, __m256i *in, - int input_stiride, int size) { - int i; - for (i = 0; i < size; ++i) { - in[i] = _mm256_loadu_si256((const __m256i *)(coeff + i * input_stiride)); - } -} - -static INLINE __m256i half_btf_0_avx2(const __m256i *w0, const __m256i *n0, - const __m256i *rounding, int bit) { - __m256i x; - x = _mm256_mullo_epi32(*w0, *n0); - x = _mm256_add_epi32(x, *rounding); - x = _mm256_srai_epi32(x, bit); - return x; -} - -static INLINE __m256i half_btf_avx2(const __m256i *w0, const __m256i *n0, - const __m256i *w1, const __m256i *n1, - const __m256i *rounding, int bit) { - __m256i x, y; - - x = _mm256_mullo_epi32(*w0, *n0); - y = _mm256_mullo_epi32(*w1, *n1); - x = _mm256_add_epi32(x, y); - x = _mm256_add_epi32(x, *rounding); - x = _mm256_srai_epi32(x, bit); - return x; -} - -static void addsub_avx2(const __m256i in0, const __m256i in1, __m256i *out0, - __m256i *out1, const __m256i *clamp_lo, - const __m256i *clamp_hi) { - __m256i a0 = _mm256_add_epi32(in0, in1); - __m256i a1 = _mm256_sub_epi32(in0, in1); - - a0 = _mm256_max_epi32(a0, *clamp_lo); - a0 = _mm256_min_epi32(a0, *clamp_hi); - a1 = _mm256_max_epi32(a1, *clamp_lo); - a1 = _mm256_min_epi32(a1, *clamp_hi); - - *out0 = a0; - *out1 = a1; -} - -static void addsub_no_clamp_avx2(const __m256i in0, const __m256i in1, - __m256i *out0, __m256i *out1) { - __m256i a0 = _mm256_add_epi32(in0, in1); - __m256i a1 = _mm256_sub_epi32(in0, in1); - - *out0 = a0; - *out1 = a1; -} - -static void addsub_shift_avx2(const __m256i in0, const __m256i in1, - __m256i *out0, __m256i *out1, - const __m256i *clamp_lo, const __m256i *clamp_hi, - int shift) { - __m256i offset = _mm256_set1_epi32((1 << shift) >> 1); - __m256i in0_w_offset = _mm256_add_epi32(in0, offset); - __m256i a0 = _mm256_add_epi32(in0_w_offset, in1); - __m256i a1 = _mm256_sub_epi32(in0_w_offset, in1); - - a0 = _mm256_sra_epi32(a0, _mm_cvtsi32_si128(shift)); - a1 = _mm256_sra_epi32(a1, _mm_cvtsi32_si128(shift)); - - a0 = _mm256_max_epi32(a0, *clamp_lo); - a0 = _mm256_min_epi32(a0, *clamp_hi); - a1 = _mm256_max_epi32(a1, *clamp_lo); - a1 = _mm256_min_epi32(a1, *clamp_hi); - - *out0 = a0; - *out1 = a1; -} - -static INLINE void idct32_stage4_avx2( - __m256i *bf1, const __m256i *cospim8, const __m256i *cospi56, - const __m256i *cospi8, const __m256i *cospim56, const __m256i *cospim40, - const __m256i *cospi24, const __m256i *cospi40, const __m256i *cospim24, - const __m256i *rounding, int bit) { - __m256i temp1, temp2; - temp1 = half_btf_avx2(cospim8, &bf1[17], cospi56, &bf1[30], rounding, bit); - bf1[30] = half_btf_avx2(cospi56, &bf1[17], cospi8, &bf1[30], rounding, bit); - bf1[17] = temp1; - - temp2 = half_btf_avx2(cospim56, &bf1[18], cospim8, &bf1[29], rounding, bit); - bf1[29] = half_btf_avx2(cospim8, &bf1[18], cospi56, &bf1[29], rounding, bit); - bf1[18] = temp2; - - temp1 = half_btf_avx2(cospim40, &bf1[21], cospi24, &bf1[26], rounding, bit); - bf1[26] = half_btf_avx2(cospi24, &bf1[21], cospi40, &bf1[26], rounding, bit); - bf1[21] = temp1; - - temp2 = half_btf_avx2(cospim24, &bf1[22], cospim40, &bf1[25], rounding, bit); - bf1[25] = half_btf_avx2(cospim40, &bf1[22], cospi24, &bf1[25], rounding, bit); - bf1[22] = temp2; -} - -static INLINE void idct32_stage5_avx2( - __m256i *bf1, const __m256i *cospim16, const __m256i *cospi48, - const __m256i *cospi16, const __m256i *cospim48, const __m256i *clamp_lo, - const __m256i *clamp_hi, const __m256i *rounding, int bit) { - __m256i temp1, temp2; - temp1 = half_btf_avx2(cospim16, &bf1[9], cospi48, &bf1[14], rounding, bit); - bf1[14] = half_btf_avx2(cospi48, &bf1[9], cospi16, &bf1[14], rounding, bit); - bf1[9] = temp1; - - temp2 = half_btf_avx2(cospim48, &bf1[10], cospim16, &bf1[13], rounding, bit); - bf1[13] = half_btf_avx2(cospim16, &bf1[10], cospi48, &bf1[13], rounding, bit); - bf1[10] = temp2; - - addsub_avx2(bf1[16], bf1[19], bf1 + 16, bf1 + 19, clamp_lo, clamp_hi); - addsub_avx2(bf1[17], bf1[18], bf1 + 17, bf1 + 18, clamp_lo, clamp_hi); - addsub_avx2(bf1[23], bf1[20], bf1 + 23, bf1 + 20, clamp_lo, clamp_hi); - addsub_avx2(bf1[22], bf1[21], bf1 + 22, bf1 + 21, clamp_lo, clamp_hi); - addsub_avx2(bf1[24], bf1[27], bf1 + 24, bf1 + 27, clamp_lo, clamp_hi); - addsub_avx2(bf1[25], bf1[26], bf1 + 25, bf1 + 26, clamp_lo, clamp_hi); - addsub_avx2(bf1[31], bf1[28], bf1 + 31, bf1 + 28, clamp_lo, clamp_hi); - addsub_avx2(bf1[30], bf1[29], bf1 + 30, bf1 + 29, clamp_lo, clamp_hi); -} - -static INLINE void idct32_stage6_avx2( - __m256i *bf1, const __m256i *cospim32, const __m256i *cospi32, - const __m256i *cospim16, const __m256i *cospi48, const __m256i *cospi16, - const __m256i *cospim48, const __m256i *clamp_lo, const __m256i *clamp_hi, - const __m256i *rounding, int bit) { - __m256i temp1, temp2; - temp1 = half_btf_avx2(cospim32, &bf1[5], cospi32, &bf1[6], rounding, bit); - bf1[6] = half_btf_avx2(cospi32, &bf1[5], cospi32, &bf1[6], rounding, bit); - bf1[5] = temp1; - - addsub_avx2(bf1[8], bf1[11], bf1 + 8, bf1 + 11, clamp_lo, clamp_hi); - addsub_avx2(bf1[9], bf1[10], bf1 + 9, bf1 + 10, clamp_lo, clamp_hi); - addsub_avx2(bf1[15], bf1[12], bf1 + 15, bf1 + 12, clamp_lo, clamp_hi); - addsub_avx2(bf1[14], bf1[13], bf1 + 14, bf1 + 13, clamp_lo, clamp_hi); - - temp1 = half_btf_avx2(cospim16, &bf1[18], cospi48, &bf1[29], rounding, bit); - bf1[29] = half_btf_avx2(cospi48, &bf1[18], cospi16, &bf1[29], rounding, bit); - bf1[18] = temp1; - temp2 = half_btf_avx2(cospim16, &bf1[19], cospi48, &bf1[28], rounding, bit); - bf1[28] = half_btf_avx2(cospi48, &bf1[19], cospi16, &bf1[28], rounding, bit); - bf1[19] = temp2; - temp1 = half_btf_avx2(cospim48, &bf1[20], cospim16, &bf1[27], rounding, bit); - bf1[27] = half_btf_avx2(cospim16, &bf1[20], cospi48, &bf1[27], rounding, bit); - bf1[20] = temp1; - temp2 = half_btf_avx2(cospim48, &bf1[21], cospim16, &bf1[26], rounding, bit); - bf1[26] = half_btf_avx2(cospim16, &bf1[21], cospi48, &bf1[26], rounding, bit); - bf1[21] = temp2; -} - -static INLINE void idct32_stage7_avx2(__m256i *bf1, const __m256i *cospim32, - const __m256i *cospi32, - const __m256i *clamp_lo, - const __m256i *clamp_hi, - const __m256i *rounding, int bit) { - __m256i temp1, temp2; - addsub_avx2(bf1[0], bf1[7], bf1 + 0, bf1 + 7, clamp_lo, clamp_hi); - addsub_avx2(bf1[1], bf1[6], bf1 + 1, bf1 + 6, clamp_lo, clamp_hi); - addsub_avx2(bf1[2], bf1[5], bf1 + 2, bf1 + 5, clamp_lo, clamp_hi); - addsub_avx2(bf1[3], bf1[4], bf1 + 3, bf1 + 4, clamp_lo, clamp_hi); - - temp1 = half_btf_avx2(cospim32, &bf1[10], cospi32, &bf1[13], rounding, bit); - bf1[13] = half_btf_avx2(cospi32, &bf1[10], cospi32, &bf1[13], rounding, bit); - bf1[10] = temp1; - temp2 = half_btf_avx2(cospim32, &bf1[11], cospi32, &bf1[12], rounding, bit); - bf1[12] = half_btf_avx2(cospi32, &bf1[11], cospi32, &bf1[12], rounding, bit); - bf1[11] = temp2; - - addsub_avx2(bf1[16], bf1[23], bf1 + 16, bf1 + 23, clamp_lo, clamp_hi); - addsub_avx2(bf1[17], bf1[22], bf1 + 17, bf1 + 22, clamp_lo, clamp_hi); - addsub_avx2(bf1[18], bf1[21], bf1 + 18, bf1 + 21, clamp_lo, clamp_hi); - addsub_avx2(bf1[19], bf1[20], bf1 + 19, bf1 + 20, clamp_lo, clamp_hi); - addsub_avx2(bf1[31], bf1[24], bf1 + 31, bf1 + 24, clamp_lo, clamp_hi); - addsub_avx2(bf1[30], bf1[25], bf1 + 30, bf1 + 25, clamp_lo, clamp_hi); - addsub_avx2(bf1[29], bf1[26], bf1 + 29, bf1 + 26, clamp_lo, clamp_hi); - addsub_avx2(bf1[28], bf1[27], bf1 + 28, bf1 + 27, clamp_lo, clamp_hi); -} - -static INLINE void idct32_stage8_avx2(__m256i *bf1, const __m256i *cospim32, - const __m256i *cospi32, - const __m256i *clamp_lo, - const __m256i *clamp_hi, - const __m256i *rounding, int bit) { - __m256i temp1, temp2; - addsub_avx2(bf1[0], bf1[15], bf1 + 0, bf1 + 15, clamp_lo, clamp_hi); - addsub_avx2(bf1[1], bf1[14], bf1 + 1, bf1 + 14, clamp_lo, clamp_hi); - addsub_avx2(bf1[2], bf1[13], bf1 + 2, bf1 + 13, clamp_lo, clamp_hi); - addsub_avx2(bf1[3], bf1[12], bf1 + 3, bf1 + 12, clamp_lo, clamp_hi); - addsub_avx2(bf1[4], bf1[11], bf1 + 4, bf1 + 11, clamp_lo, clamp_hi); - addsub_avx2(bf1[5], bf1[10], bf1 + 5, bf1 + 10, clamp_lo, clamp_hi); - addsub_avx2(bf1[6], bf1[9], bf1 + 6, bf1 + 9, clamp_lo, clamp_hi); - addsub_avx2(bf1[7], bf1[8], bf1 + 7, bf1 + 8, clamp_lo, clamp_hi); - - temp1 = half_btf_avx2(cospim32, &bf1[20], cospi32, &bf1[27], rounding, bit); - bf1[27] = half_btf_avx2(cospi32, &bf1[20], cospi32, &bf1[27], rounding, bit); - bf1[20] = temp1; - temp2 = half_btf_avx2(cospim32, &bf1[21], cospi32, &bf1[26], rounding, bit); - bf1[26] = half_btf_avx2(cospi32, &bf1[21], cospi32, &bf1[26], rounding, bit); - bf1[21] = temp2; - temp1 = half_btf_avx2(cospim32, &bf1[22], cospi32, &bf1[25], rounding, bit); - bf1[25] = half_btf_avx2(cospi32, &bf1[22], cospi32, &bf1[25], rounding, bit); - bf1[22] = temp1; - temp2 = half_btf_avx2(cospim32, &bf1[23], cospi32, &bf1[24], rounding, bit); - bf1[24] = half_btf_avx2(cospi32, &bf1[23], cospi32, &bf1[24], rounding, bit); - bf1[23] = temp2; -} - -static INLINE void idct32_stage9_avx2(__m256i *bf1, __m256i *out, - const int do_cols, const int bd, - const int out_shift, - const int log_range) { - if (do_cols) { - addsub_no_clamp_avx2(bf1[0], bf1[31], out + 0, out + 31); - addsub_no_clamp_avx2(bf1[1], bf1[30], out + 1, out + 30); - addsub_no_clamp_avx2(bf1[2], bf1[29], out + 2, out + 29); - addsub_no_clamp_avx2(bf1[3], bf1[28], out + 3, out + 28); - addsub_no_clamp_avx2(bf1[4], bf1[27], out + 4, out + 27); - addsub_no_clamp_avx2(bf1[5], bf1[26], out + 5, out + 26); - addsub_no_clamp_avx2(bf1[6], bf1[25], out + 6, out + 25); - addsub_no_clamp_avx2(bf1[7], bf1[24], out + 7, out + 24); - addsub_no_clamp_avx2(bf1[8], bf1[23], out + 8, out + 23); - addsub_no_clamp_avx2(bf1[9], bf1[22], out + 9, out + 22); - addsub_no_clamp_avx2(bf1[10], bf1[21], out + 10, out + 21); - addsub_no_clamp_avx2(bf1[11], bf1[20], out + 11, out + 20); - addsub_no_clamp_avx2(bf1[12], bf1[19], out + 12, out + 19); - addsub_no_clamp_avx2(bf1[13], bf1[18], out + 13, out + 18); - addsub_no_clamp_avx2(bf1[14], bf1[17], out + 14, out + 17); - addsub_no_clamp_avx2(bf1[15], bf1[16], out + 15, out + 16); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m256i clamp_lo_out = _mm256_set1_epi32(AOMMAX( - -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift)))); - const __m256i clamp_hi_out = _mm256_set1_epi32(AOMMIN( - (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift)))); - - addsub_shift_avx2(bf1[0], bf1[31], out + 0, out + 31, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf1[1], bf1[30], out + 1, out + 30, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf1[2], bf1[29], out + 2, out + 29, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf1[3], bf1[28], out + 3, out + 28, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf1[4], bf1[27], out + 4, out + 27, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf1[5], bf1[26], out + 5, out + 26, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf1[6], bf1[25], out + 6, out + 25, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf1[7], bf1[24], out + 7, out + 24, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf1[8], bf1[23], out + 8, out + 23, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf1[9], bf1[22], out + 9, out + 22, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf1[10], bf1[21], out + 10, out + 21, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf1[11], bf1[20], out + 11, out + 20, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf1[12], bf1[19], out + 12, out + 19, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf1[13], bf1[18], out + 13, out + 18, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf1[14], bf1[17], out + 14, out + 17, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf1[15], bf1[16], out + 15, out + 16, &clamp_lo_out, - &clamp_hi_out, out_shift); - } -} - -static void idct32_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols, - int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m256i cospi32 = _mm256_set1_epi32(cospi[32]); - const __m256i rounding = _mm256_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1))); - const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1); - __m256i x; - // stage 0 - // stage 1 - // stage 2 - // stage 3 - // stage 4 - // stage 5 - x = _mm256_mullo_epi32(in[0], cospi32); - x = _mm256_add_epi32(x, rounding); - x = _mm256_srai_epi32(x, bit); - - // stage 6 - // stage 7 - // stage 8 - // stage 9 - if (do_cols) { - x = _mm256_max_epi32(x, clamp_lo); - x = _mm256_min_epi32(x, clamp_hi); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m256i clamp_lo_out = _mm256_set1_epi32(AOMMAX( - -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift)))); - const __m256i clamp_hi_out = _mm256_set1_epi32(AOMMIN( - (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift)))); - __m256i offset = _mm256_set1_epi32((1 << out_shift) >> 1); - x = _mm256_add_epi32(offset, x); - x = _mm256_sra_epi32(x, _mm_cvtsi32_si128(out_shift)); - x = _mm256_max_epi32(x, clamp_lo_out); - x = _mm256_min_epi32(x, clamp_hi_out); - } - - out[0] = x; - out[1] = x; - out[2] = x; - out[3] = x; - out[4] = x; - out[5] = x; - out[6] = x; - out[7] = x; - out[8] = x; - out[9] = x; - out[10] = x; - out[11] = x; - out[12] = x; - out[13] = x; - out[14] = x; - out[15] = x; - out[16] = x; - out[17] = x; - out[18] = x; - out[19] = x; - out[20] = x; - out[21] = x; - out[22] = x; - out[23] = x; - out[24] = x; - out[25] = x; - out[26] = x; - out[27] = x; - out[28] = x; - out[29] = x; - out[30] = x; - out[31] = x; -} - -static void idct32_low8_avx2(__m256i *in, __m256i *out, int bit, int do_cols, - int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m256i cospi62 = _mm256_set1_epi32(cospi[62]); - const __m256i cospi14 = _mm256_set1_epi32(cospi[14]); - const __m256i cospi54 = _mm256_set1_epi32(cospi[54]); - const __m256i cospi6 = _mm256_set1_epi32(cospi[6]); - const __m256i cospi10 = _mm256_set1_epi32(cospi[10]); - const __m256i cospi2 = _mm256_set1_epi32(cospi[2]); - const __m256i cospim58 = _mm256_set1_epi32(-cospi[58]); - const __m256i cospim50 = _mm256_set1_epi32(-cospi[50]); - const __m256i cospi60 = _mm256_set1_epi32(cospi[60]); - const __m256i cospi12 = _mm256_set1_epi32(cospi[12]); - const __m256i cospi4 = _mm256_set1_epi32(cospi[4]); - const __m256i cospim52 = _mm256_set1_epi32(-cospi[52]); - const __m256i cospi56 = _mm256_set1_epi32(cospi[56]); - const __m256i cospi24 = _mm256_set1_epi32(cospi[24]); - const __m256i cospi40 = _mm256_set1_epi32(cospi[40]); - const __m256i cospi8 = _mm256_set1_epi32(cospi[8]); - const __m256i cospim40 = _mm256_set1_epi32(-cospi[40]); - const __m256i cospim8 = _mm256_set1_epi32(-cospi[8]); - const __m256i cospim56 = _mm256_set1_epi32(-cospi[56]); - const __m256i cospim24 = _mm256_set1_epi32(-cospi[24]); - const __m256i cospi32 = _mm256_set1_epi32(cospi[32]); - const __m256i cospim32 = _mm256_set1_epi32(-cospi[32]); - const __m256i cospi48 = _mm256_set1_epi32(cospi[48]); - const __m256i cospim48 = _mm256_set1_epi32(-cospi[48]); - const __m256i cospi16 = _mm256_set1_epi32(cospi[16]); - const __m256i cospim16 = _mm256_set1_epi32(-cospi[16]); - const __m256i rounding = _mm256_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1))); - const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1); - __m256i bf1[32]; - - { - // stage 0 - // stage 1 - bf1[0] = in[0]; - bf1[4] = in[4]; - bf1[8] = in[2]; - bf1[12] = in[6]; - bf1[16] = in[1]; - bf1[20] = in[5]; - bf1[24] = in[3]; - bf1[28] = in[7]; - - // stage 2 - bf1[31] = half_btf_0_avx2(&cospi2, &bf1[16], &rounding, bit); - bf1[16] = half_btf_0_avx2(&cospi62, &bf1[16], &rounding, bit); - bf1[19] = half_btf_0_avx2(&cospim50, &bf1[28], &rounding, bit); - bf1[28] = half_btf_0_avx2(&cospi14, &bf1[28], &rounding, bit); - bf1[27] = half_btf_0_avx2(&cospi10, &bf1[20], &rounding, bit); - bf1[20] = half_btf_0_avx2(&cospi54, &bf1[20], &rounding, bit); - bf1[23] = half_btf_0_avx2(&cospim58, &bf1[24], &rounding, bit); - bf1[24] = half_btf_0_avx2(&cospi6, &bf1[24], &rounding, bit); - - // stage 3 - bf1[15] = half_btf_0_avx2(&cospi4, &bf1[8], &rounding, bit); - bf1[8] = half_btf_0_avx2(&cospi60, &bf1[8], &rounding, bit); - - bf1[11] = half_btf_0_avx2(&cospim52, &bf1[12], &rounding, bit); - bf1[12] = half_btf_0_avx2(&cospi12, &bf1[12], &rounding, bit); - bf1[17] = bf1[16]; - bf1[18] = bf1[19]; - bf1[21] = bf1[20]; - bf1[22] = bf1[23]; - bf1[25] = bf1[24]; - bf1[26] = bf1[27]; - bf1[29] = bf1[28]; - bf1[30] = bf1[31]; - - // stage 4 - bf1[7] = half_btf_0_avx2(&cospi8, &bf1[4], &rounding, bit); - bf1[4] = half_btf_0_avx2(&cospi56, &bf1[4], &rounding, bit); - - bf1[9] = bf1[8]; - bf1[10] = bf1[11]; - bf1[13] = bf1[12]; - bf1[14] = bf1[15]; - - idct32_stage4_avx2(bf1, &cospim8, &cospi56, &cospi8, &cospim56, &cospim40, - &cospi24, &cospi40, &cospim24, &rounding, bit); - - // stage 5 - bf1[0] = half_btf_0_avx2(&cospi32, &bf1[0], &rounding, bit); - bf1[1] = bf1[0]; - bf1[5] = bf1[4]; - bf1[6] = bf1[7]; - - idct32_stage5_avx2(bf1, &cospim16, &cospi48, &cospi16, &cospim48, &clamp_lo, - &clamp_hi, &rounding, bit); - - // stage 6 - bf1[3] = bf1[0]; - bf1[2] = bf1[1]; - - idct32_stage6_avx2(bf1, &cospim32, &cospi32, &cospim16, &cospi48, &cospi16, - &cospim48, &clamp_lo, &clamp_hi, &rounding, bit); - - // stage 7 - idct32_stage7_avx2(bf1, &cospim32, &cospi32, &clamp_lo, &clamp_hi, - &rounding, bit); - - // stage 8 - idct32_stage8_avx2(bf1, &cospim32, &cospi32, &clamp_lo, &clamp_hi, - &rounding, bit); - - // stage 9 - idct32_stage9_avx2(bf1, out, do_cols, bd, out_shift, log_range); - } -} - -static void idct32_low16_avx2(__m256i *in, __m256i *out, int bit, int do_cols, - int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m256i cospi62 = _mm256_set1_epi32(cospi[62]); - const __m256i cospi30 = _mm256_set1_epi32(cospi[30]); - const __m256i cospi46 = _mm256_set1_epi32(cospi[46]); - const __m256i cospi14 = _mm256_set1_epi32(cospi[14]); - const __m256i cospi54 = _mm256_set1_epi32(cospi[54]); - const __m256i cospi22 = _mm256_set1_epi32(cospi[22]); - const __m256i cospi38 = _mm256_set1_epi32(cospi[38]); - const __m256i cospi6 = _mm256_set1_epi32(cospi[6]); - const __m256i cospi26 = _mm256_set1_epi32(cospi[26]); - const __m256i cospi10 = _mm256_set1_epi32(cospi[10]); - const __m256i cospi18 = _mm256_set1_epi32(cospi[18]); - const __m256i cospi2 = _mm256_set1_epi32(cospi[2]); - const __m256i cospim58 = _mm256_set1_epi32(-cospi[58]); - const __m256i cospim42 = _mm256_set1_epi32(-cospi[42]); - const __m256i cospim50 = _mm256_set1_epi32(-cospi[50]); - const __m256i cospim34 = _mm256_set1_epi32(-cospi[34]); - const __m256i cospi60 = _mm256_set1_epi32(cospi[60]); - const __m256i cospi28 = _mm256_set1_epi32(cospi[28]); - const __m256i cospi44 = _mm256_set1_epi32(cospi[44]); - const __m256i cospi12 = _mm256_set1_epi32(cospi[12]); - const __m256i cospi20 = _mm256_set1_epi32(cospi[20]); - const __m256i cospi4 = _mm256_set1_epi32(cospi[4]); - const __m256i cospim52 = _mm256_set1_epi32(-cospi[52]); - const __m256i cospim36 = _mm256_set1_epi32(-cospi[36]); - const __m256i cospi56 = _mm256_set1_epi32(cospi[56]); - const __m256i cospi24 = _mm256_set1_epi32(cospi[24]); - const __m256i cospi40 = _mm256_set1_epi32(cospi[40]); - const __m256i cospi8 = _mm256_set1_epi32(cospi[8]); - const __m256i cospim40 = _mm256_set1_epi32(-cospi[40]); - const __m256i cospim8 = _mm256_set1_epi32(-cospi[8]); - const __m256i cospim56 = _mm256_set1_epi32(-cospi[56]); - const __m256i cospim24 = _mm256_set1_epi32(-cospi[24]); - const __m256i cospi32 = _mm256_set1_epi32(cospi[32]); - const __m256i cospim32 = _mm256_set1_epi32(-cospi[32]); - const __m256i cospi48 = _mm256_set1_epi32(cospi[48]); - const __m256i cospim48 = _mm256_set1_epi32(-cospi[48]); - const __m256i cospi16 = _mm256_set1_epi32(cospi[16]); - const __m256i cospim16 = _mm256_set1_epi32(-cospi[16]); - const __m256i rounding = _mm256_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1))); - const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1); - __m256i bf1[32]; - - { - // stage 0 - // stage 1 - bf1[0] = in[0]; - bf1[2] = in[8]; - bf1[4] = in[4]; - bf1[6] = in[12]; - bf1[8] = in[2]; - bf1[10] = in[10]; - bf1[12] = in[6]; - bf1[14] = in[14]; - bf1[16] = in[1]; - bf1[18] = in[9]; - bf1[20] = in[5]; - bf1[22] = in[13]; - bf1[24] = in[3]; - bf1[26] = in[11]; - bf1[28] = in[7]; - bf1[30] = in[15]; - - // stage 2 - bf1[31] = half_btf_0_avx2(&cospi2, &bf1[16], &rounding, bit); - bf1[16] = half_btf_0_avx2(&cospi62, &bf1[16], &rounding, bit); - bf1[17] = half_btf_0_avx2(&cospim34, &bf1[30], &rounding, bit); - bf1[30] = half_btf_0_avx2(&cospi30, &bf1[30], &rounding, bit); - bf1[29] = half_btf_0_avx2(&cospi18, &bf1[18], &rounding, bit); - bf1[18] = half_btf_0_avx2(&cospi46, &bf1[18], &rounding, bit); - bf1[19] = half_btf_0_avx2(&cospim50, &bf1[28], &rounding, bit); - bf1[28] = half_btf_0_avx2(&cospi14, &bf1[28], &rounding, bit); - bf1[27] = half_btf_0_avx2(&cospi10, &bf1[20], &rounding, bit); - bf1[20] = half_btf_0_avx2(&cospi54, &bf1[20], &rounding, bit); - bf1[21] = half_btf_0_avx2(&cospim42, &bf1[26], &rounding, bit); - bf1[26] = half_btf_0_avx2(&cospi22, &bf1[26], &rounding, bit); - bf1[25] = half_btf_0_avx2(&cospi26, &bf1[22], &rounding, bit); - bf1[22] = half_btf_0_avx2(&cospi38, &bf1[22], &rounding, bit); - bf1[23] = half_btf_0_avx2(&cospim58, &bf1[24], &rounding, bit); - bf1[24] = half_btf_0_avx2(&cospi6, &bf1[24], &rounding, bit); - - // stage 3 - bf1[15] = half_btf_0_avx2(&cospi4, &bf1[8], &rounding, bit); - bf1[8] = half_btf_0_avx2(&cospi60, &bf1[8], &rounding, bit); - bf1[9] = half_btf_0_avx2(&cospim36, &bf1[14], &rounding, bit); - bf1[14] = half_btf_0_avx2(&cospi28, &bf1[14], &rounding, bit); - bf1[13] = half_btf_0_avx2(&cospi20, &bf1[10], &rounding, bit); - bf1[10] = half_btf_0_avx2(&cospi44, &bf1[10], &rounding, bit); - bf1[11] = half_btf_0_avx2(&cospim52, &bf1[12], &rounding, bit); - bf1[12] = half_btf_0_avx2(&cospi12, &bf1[12], &rounding, bit); - - addsub_avx2(bf1[16], bf1[17], bf1 + 16, bf1 + 17, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[19], bf1[18], bf1 + 19, bf1 + 18, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[20], bf1[21], bf1 + 20, bf1 + 21, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[23], bf1[22], bf1 + 23, bf1 + 22, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[24], bf1[25], bf1 + 24, bf1 + 25, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[27], bf1[26], bf1 + 27, bf1 + 26, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[28], bf1[29], bf1 + 28, bf1 + 29, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[31], bf1[30], bf1 + 31, bf1 + 30, &clamp_lo, &clamp_hi); - - // stage 4 - bf1[7] = half_btf_0_avx2(&cospi8, &bf1[4], &rounding, bit); - bf1[4] = half_btf_0_avx2(&cospi56, &bf1[4], &rounding, bit); - bf1[5] = half_btf_0_avx2(&cospim40, &bf1[6], &rounding, bit); - bf1[6] = half_btf_0_avx2(&cospi24, &bf1[6], &rounding, bit); - - addsub_avx2(bf1[8], bf1[9], bf1 + 8, bf1 + 9, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[11], bf1[10], bf1 + 11, bf1 + 10, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[12], bf1[13], bf1 + 12, bf1 + 13, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[15], bf1[14], bf1 + 15, bf1 + 14, &clamp_lo, &clamp_hi); - - idct32_stage4_avx2(bf1, &cospim8, &cospi56, &cospi8, &cospim56, &cospim40, - &cospi24, &cospi40, &cospim24, &rounding, bit); - - // stage 5 - bf1[0] = half_btf_0_avx2(&cospi32, &bf1[0], &rounding, bit); - bf1[1] = bf1[0]; - bf1[3] = half_btf_0_avx2(&cospi16, &bf1[2], &rounding, bit); - bf1[2] = half_btf_0_avx2(&cospi48, &bf1[2], &rounding, bit); - - addsub_avx2(bf1[4], bf1[5], bf1 + 4, bf1 + 5, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[7], bf1[6], bf1 + 7, bf1 + 6, &clamp_lo, &clamp_hi); - - idct32_stage5_avx2(bf1, &cospim16, &cospi48, &cospi16, &cospim48, &clamp_lo, - &clamp_hi, &rounding, bit); - - // stage 6 - addsub_avx2(bf1[0], bf1[3], bf1 + 0, bf1 + 3, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[1], bf1[2], bf1 + 1, bf1 + 2, &clamp_lo, &clamp_hi); - - idct32_stage6_avx2(bf1, &cospim32, &cospi32, &cospim16, &cospi48, &cospi16, - &cospim48, &clamp_lo, &clamp_hi, &rounding, bit); - - // stage 7 - idct32_stage7_avx2(bf1, &cospim32, &cospi32, &clamp_lo, &clamp_hi, - &rounding, bit); - - // stage 8 - idct32_stage8_avx2(bf1, &cospim32, &cospi32, &clamp_lo, &clamp_hi, - &rounding, bit); - - // stage 9 - idct32_stage9_avx2(bf1, out, do_cols, bd, out_shift, log_range); - } -} - -static void idct32_avx2(__m256i *in, __m256i *out, int bit, int do_cols, int bd, - int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m256i cospi62 = _mm256_set1_epi32(cospi[62]); - const __m256i cospi30 = _mm256_set1_epi32(cospi[30]); - const __m256i cospi46 = _mm256_set1_epi32(cospi[46]); - const __m256i cospi14 = _mm256_set1_epi32(cospi[14]); - const __m256i cospi54 = _mm256_set1_epi32(cospi[54]); - const __m256i cospi22 = _mm256_set1_epi32(cospi[22]); - const __m256i cospi38 = _mm256_set1_epi32(cospi[38]); - const __m256i cospi6 = _mm256_set1_epi32(cospi[6]); - const __m256i cospi58 = _mm256_set1_epi32(cospi[58]); - const __m256i cospi26 = _mm256_set1_epi32(cospi[26]); - const __m256i cospi42 = _mm256_set1_epi32(cospi[42]); - const __m256i cospi10 = _mm256_set1_epi32(cospi[10]); - const __m256i cospi50 = _mm256_set1_epi32(cospi[50]); - const __m256i cospi18 = _mm256_set1_epi32(cospi[18]); - const __m256i cospi34 = _mm256_set1_epi32(cospi[34]); - const __m256i cospi2 = _mm256_set1_epi32(cospi[2]); - const __m256i cospim58 = _mm256_set1_epi32(-cospi[58]); - const __m256i cospim26 = _mm256_set1_epi32(-cospi[26]); - const __m256i cospim42 = _mm256_set1_epi32(-cospi[42]); - const __m256i cospim10 = _mm256_set1_epi32(-cospi[10]); - const __m256i cospim50 = _mm256_set1_epi32(-cospi[50]); - const __m256i cospim18 = _mm256_set1_epi32(-cospi[18]); - const __m256i cospim34 = _mm256_set1_epi32(-cospi[34]); - const __m256i cospim2 = _mm256_set1_epi32(-cospi[2]); - const __m256i cospi60 = _mm256_set1_epi32(cospi[60]); - const __m256i cospi28 = _mm256_set1_epi32(cospi[28]); - const __m256i cospi44 = _mm256_set1_epi32(cospi[44]); - const __m256i cospi12 = _mm256_set1_epi32(cospi[12]); - const __m256i cospi52 = _mm256_set1_epi32(cospi[52]); - const __m256i cospi20 = _mm256_set1_epi32(cospi[20]); - const __m256i cospi36 = _mm256_set1_epi32(cospi[36]); - const __m256i cospi4 = _mm256_set1_epi32(cospi[4]); - const __m256i cospim52 = _mm256_set1_epi32(-cospi[52]); - const __m256i cospim20 = _mm256_set1_epi32(-cospi[20]); - const __m256i cospim36 = _mm256_set1_epi32(-cospi[36]); - const __m256i cospim4 = _mm256_set1_epi32(-cospi[4]); - const __m256i cospi56 = _mm256_set1_epi32(cospi[56]); - const __m256i cospi24 = _mm256_set1_epi32(cospi[24]); - const __m256i cospi40 = _mm256_set1_epi32(cospi[40]); - const __m256i cospi8 = _mm256_set1_epi32(cospi[8]); - const __m256i cospim40 = _mm256_set1_epi32(-cospi[40]); - const __m256i cospim8 = _mm256_set1_epi32(-cospi[8]); - const __m256i cospim56 = _mm256_set1_epi32(-cospi[56]); - const __m256i cospim24 = _mm256_set1_epi32(-cospi[24]); - const __m256i cospi32 = _mm256_set1_epi32(cospi[32]); - const __m256i cospim32 = _mm256_set1_epi32(-cospi[32]); - const __m256i cospi48 = _mm256_set1_epi32(cospi[48]); - const __m256i cospim48 = _mm256_set1_epi32(-cospi[48]); - const __m256i cospi16 = _mm256_set1_epi32(cospi[16]); - const __m256i cospim16 = _mm256_set1_epi32(-cospi[16]); - const __m256i rounding = _mm256_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1))); - const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1); - __m256i bf1[32], bf0[32]; - - { - // stage 0 - // stage 1 - bf1[0] = in[0]; - bf1[1] = in[16]; - bf1[2] = in[8]; - bf1[3] = in[24]; - bf1[4] = in[4]; - bf1[5] = in[20]; - bf1[6] = in[12]; - bf1[7] = in[28]; - bf1[8] = in[2]; - bf1[9] = in[18]; - bf1[10] = in[10]; - bf1[11] = in[26]; - bf1[12] = in[6]; - bf1[13] = in[22]; - bf1[14] = in[14]; - bf1[15] = in[30]; - bf1[16] = in[1]; - bf1[17] = in[17]; - bf1[18] = in[9]; - bf1[19] = in[25]; - bf1[20] = in[5]; - bf1[21] = in[21]; - bf1[22] = in[13]; - bf1[23] = in[29]; - bf1[24] = in[3]; - bf1[25] = in[19]; - bf1[26] = in[11]; - bf1[27] = in[27]; - bf1[28] = in[7]; - bf1[29] = in[23]; - bf1[30] = in[15]; - bf1[31] = in[31]; - - // stage 2 - bf0[0] = bf1[0]; - bf0[1] = bf1[1]; - bf0[2] = bf1[2]; - bf0[3] = bf1[3]; - bf0[4] = bf1[4]; - bf0[5] = bf1[5]; - bf0[6] = bf1[6]; - bf0[7] = bf1[7]; - bf0[8] = bf1[8]; - bf0[9] = bf1[9]; - bf0[10] = bf1[10]; - bf0[11] = bf1[11]; - bf0[12] = bf1[12]; - bf0[13] = bf1[13]; - bf0[14] = bf1[14]; - bf0[15] = bf1[15]; - bf0[16] = - half_btf_avx2(&cospi62, &bf1[16], &cospim2, &bf1[31], &rounding, bit); - bf0[17] = - half_btf_avx2(&cospi30, &bf1[17], &cospim34, &bf1[30], &rounding, bit); - bf0[18] = - half_btf_avx2(&cospi46, &bf1[18], &cospim18, &bf1[29], &rounding, bit); - bf0[19] = - half_btf_avx2(&cospi14, &bf1[19], &cospim50, &bf1[28], &rounding, bit); - bf0[20] = - half_btf_avx2(&cospi54, &bf1[20], &cospim10, &bf1[27], &rounding, bit); - bf0[21] = - half_btf_avx2(&cospi22, &bf1[21], &cospim42, &bf1[26], &rounding, bit); - bf0[22] = - half_btf_avx2(&cospi38, &bf1[22], &cospim26, &bf1[25], &rounding, bit); - bf0[23] = - half_btf_avx2(&cospi6, &bf1[23], &cospim58, &bf1[24], &rounding, bit); - bf0[24] = - half_btf_avx2(&cospi58, &bf1[23], &cospi6, &bf1[24], &rounding, bit); - bf0[25] = - half_btf_avx2(&cospi26, &bf1[22], &cospi38, &bf1[25], &rounding, bit); - bf0[26] = - half_btf_avx2(&cospi42, &bf1[21], &cospi22, &bf1[26], &rounding, bit); - bf0[27] = - half_btf_avx2(&cospi10, &bf1[20], &cospi54, &bf1[27], &rounding, bit); - bf0[28] = - half_btf_avx2(&cospi50, &bf1[19], &cospi14, &bf1[28], &rounding, bit); - bf0[29] = - half_btf_avx2(&cospi18, &bf1[18], &cospi46, &bf1[29], &rounding, bit); - bf0[30] = - half_btf_avx2(&cospi34, &bf1[17], &cospi30, &bf1[30], &rounding, bit); - bf0[31] = - half_btf_avx2(&cospi2, &bf1[16], &cospi62, &bf1[31], &rounding, bit); - - // stage 3 - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = bf0[6]; - bf1[7] = bf0[7]; - bf1[8] = - half_btf_avx2(&cospi60, &bf0[8], &cospim4, &bf0[15], &rounding, bit); - bf1[9] = - half_btf_avx2(&cospi28, &bf0[9], &cospim36, &bf0[14], &rounding, bit); - bf1[10] = - half_btf_avx2(&cospi44, &bf0[10], &cospim20, &bf0[13], &rounding, bit); - bf1[11] = - half_btf_avx2(&cospi12, &bf0[11], &cospim52, &bf0[12], &rounding, bit); - bf1[12] = - half_btf_avx2(&cospi52, &bf0[11], &cospi12, &bf0[12], &rounding, bit); - bf1[13] = - half_btf_avx2(&cospi20, &bf0[10], &cospi44, &bf0[13], &rounding, bit); - bf1[14] = - half_btf_avx2(&cospi36, &bf0[9], &cospi28, &bf0[14], &rounding, bit); - bf1[15] = - half_btf_avx2(&cospi4, &bf0[8], &cospi60, &bf0[15], &rounding, bit); - - addsub_avx2(bf0[16], bf0[17], bf1 + 16, bf1 + 17, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[19], bf0[18], bf1 + 19, bf1 + 18, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[20], bf0[21], bf1 + 20, bf1 + 21, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[23], bf0[22], bf1 + 23, bf1 + 22, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[24], bf0[25], bf1 + 24, bf1 + 25, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[27], bf0[26], bf1 + 27, bf1 + 26, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[28], bf0[29], bf1 + 28, bf1 + 29, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[31], bf0[30], bf1 + 31, bf1 + 30, &clamp_lo, &clamp_hi); - - // stage 4 - bf0[0] = bf1[0]; - bf0[1] = bf1[1]; - bf0[2] = bf1[2]; - bf0[3] = bf1[3]; - bf0[4] = - half_btf_avx2(&cospi56, &bf1[4], &cospim8, &bf1[7], &rounding, bit); - bf0[5] = - half_btf_avx2(&cospi24, &bf1[5], &cospim40, &bf1[6], &rounding, bit); - bf0[6] = - half_btf_avx2(&cospi40, &bf1[5], &cospi24, &bf1[6], &rounding, bit); - bf0[7] = half_btf_avx2(&cospi8, &bf1[4], &cospi56, &bf1[7], &rounding, bit); - - addsub_avx2(bf1[8], bf1[9], bf0 + 8, bf0 + 9, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[11], bf1[10], bf0 + 11, bf0 + 10, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[12], bf1[13], bf0 + 12, bf0 + 13, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[15], bf1[14], bf0 + 15, bf0 + 14, &clamp_lo, &clamp_hi); - - bf0[16] = bf1[16]; - bf0[17] = - half_btf_avx2(&cospim8, &bf1[17], &cospi56, &bf1[30], &rounding, bit); - bf0[18] = - half_btf_avx2(&cospim56, &bf1[18], &cospim8, &bf1[29], &rounding, bit); - bf0[19] = bf1[19]; - bf0[20] = bf1[20]; - bf0[21] = - half_btf_avx2(&cospim40, &bf1[21], &cospi24, &bf1[26], &rounding, bit); - bf0[22] = - half_btf_avx2(&cospim24, &bf1[22], &cospim40, &bf1[25], &rounding, bit); - bf0[23] = bf1[23]; - bf0[24] = bf1[24]; - bf0[25] = - half_btf_avx2(&cospim40, &bf1[22], &cospi24, &bf1[25], &rounding, bit); - bf0[26] = - half_btf_avx2(&cospi24, &bf1[21], &cospi40, &bf1[26], &rounding, bit); - bf0[27] = bf1[27]; - bf0[28] = bf1[28]; - bf0[29] = - half_btf_avx2(&cospim8, &bf1[18], &cospi56, &bf1[29], &rounding, bit); - bf0[30] = - half_btf_avx2(&cospi56, &bf1[17], &cospi8, &bf1[30], &rounding, bit); - bf0[31] = bf1[31]; - - // stage 5 - bf1[0] = - half_btf_avx2(&cospi32, &bf0[0], &cospi32, &bf0[1], &rounding, bit); - bf1[1] = - half_btf_avx2(&cospi32, &bf0[0], &cospim32, &bf0[1], &rounding, bit); - bf1[2] = - half_btf_avx2(&cospi48, &bf0[2], &cospim16, &bf0[3], &rounding, bit); - bf1[3] = - half_btf_avx2(&cospi16, &bf0[2], &cospi48, &bf0[3], &rounding, bit); - addsub_avx2(bf0[4], bf0[5], bf1 + 4, bf1 + 5, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[7], bf0[6], bf1 + 7, bf1 + 6, &clamp_lo, &clamp_hi); - bf1[8] = bf0[8]; - bf1[9] = - half_btf_avx2(&cospim16, &bf0[9], &cospi48, &bf0[14], &rounding, bit); - bf1[10] = - half_btf_avx2(&cospim48, &bf0[10], &cospim16, &bf0[13], &rounding, bit); - bf1[11] = bf0[11]; - bf1[12] = bf0[12]; - bf1[13] = - half_btf_avx2(&cospim16, &bf0[10], &cospi48, &bf0[13], &rounding, bit); - bf1[14] = - half_btf_avx2(&cospi48, &bf0[9], &cospi16, &bf0[14], &rounding, bit); - bf1[15] = bf0[15]; - addsub_avx2(bf0[16], bf0[19], bf1 + 16, bf1 + 19, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[17], bf0[18], bf1 + 17, bf1 + 18, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[23], bf0[20], bf1 + 23, bf1 + 20, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[22], bf0[21], bf1 + 22, bf1 + 21, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[24], bf0[27], bf1 + 24, bf1 + 27, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[25], bf0[26], bf1 + 25, bf1 + 26, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[31], bf0[28], bf1 + 31, bf1 + 28, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[30], bf0[29], bf1 + 30, bf1 + 29, &clamp_lo, &clamp_hi); - - // stage 6 - addsub_avx2(bf1[0], bf1[3], bf0 + 0, bf0 + 3, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[1], bf1[2], bf0 + 1, bf0 + 2, &clamp_lo, &clamp_hi); - bf0[4] = bf1[4]; - bf0[5] = - half_btf_avx2(&cospim32, &bf1[5], &cospi32, &bf1[6], &rounding, bit); - bf0[6] = - half_btf_avx2(&cospi32, &bf1[5], &cospi32, &bf1[6], &rounding, bit); - bf0[7] = bf1[7]; - addsub_avx2(bf1[8], bf1[11], bf0 + 8, bf0 + 11, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[9], bf1[10], bf0 + 9, bf0 + 10, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[15], bf1[12], bf0 + 15, bf0 + 12, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[14], bf1[13], bf0 + 14, bf0 + 13, &clamp_lo, &clamp_hi); - bf0[16] = bf1[16]; - bf0[17] = bf1[17]; - bf0[18] = - half_btf_avx2(&cospim16, &bf1[18], &cospi48, &bf1[29], &rounding, bit); - bf0[19] = - half_btf_avx2(&cospim16, &bf1[19], &cospi48, &bf1[28], &rounding, bit); - bf0[20] = - half_btf_avx2(&cospim48, &bf1[20], &cospim16, &bf1[27], &rounding, bit); - bf0[21] = - half_btf_avx2(&cospim48, &bf1[21], &cospim16, &bf1[26], &rounding, bit); - bf0[22] = bf1[22]; - bf0[23] = bf1[23]; - bf0[24] = bf1[24]; - bf0[25] = bf1[25]; - bf0[26] = - half_btf_avx2(&cospim16, &bf1[21], &cospi48, &bf1[26], &rounding, bit); - bf0[27] = - half_btf_avx2(&cospim16, &bf1[20], &cospi48, &bf1[27], &rounding, bit); - bf0[28] = - half_btf_avx2(&cospi48, &bf1[19], &cospi16, &bf1[28], &rounding, bit); - bf0[29] = - half_btf_avx2(&cospi48, &bf1[18], &cospi16, &bf1[29], &rounding, bit); - bf0[30] = bf1[30]; - bf0[31] = bf1[31]; - - // stage 7 - addsub_avx2(bf0[0], bf0[7], bf1 + 0, bf1 + 7, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[1], bf0[6], bf1 + 1, bf1 + 6, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[2], bf0[5], bf1 + 2, bf1 + 5, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[3], bf0[4], bf1 + 3, bf1 + 4, &clamp_lo, &clamp_hi); - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = - half_btf_avx2(&cospim32, &bf0[10], &cospi32, &bf0[13], &rounding, bit); - bf1[11] = - half_btf_avx2(&cospim32, &bf0[11], &cospi32, &bf0[12], &rounding, bit); - bf1[12] = - half_btf_avx2(&cospi32, &bf0[11], &cospi32, &bf0[12], &rounding, bit); - bf1[13] = - half_btf_avx2(&cospi32, &bf0[10], &cospi32, &bf0[13], &rounding, bit); - bf1[14] = bf0[14]; - bf1[15] = bf0[15]; - addsub_avx2(bf0[16], bf0[23], bf1 + 16, bf1 + 23, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[17], bf0[22], bf1 + 17, bf1 + 22, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[18], bf0[21], bf1 + 18, bf1 + 21, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[19], bf0[20], bf1 + 19, bf1 + 20, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[31], bf0[24], bf1 + 31, bf1 + 24, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[30], bf0[25], bf1 + 30, bf1 + 25, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[29], bf0[26], bf1 + 29, bf1 + 26, &clamp_lo, &clamp_hi); - addsub_avx2(bf0[28], bf0[27], bf1 + 28, bf1 + 27, &clamp_lo, &clamp_hi); - - // stage 8 - addsub_avx2(bf1[0], bf1[15], bf0 + 0, bf0 + 15, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[1], bf1[14], bf0 + 1, bf0 + 14, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[2], bf1[13], bf0 + 2, bf0 + 13, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[3], bf1[12], bf0 + 3, bf0 + 12, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[4], bf1[11], bf0 + 4, bf0 + 11, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[5], bf1[10], bf0 + 5, bf0 + 10, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[6], bf1[9], bf0 + 6, bf0 + 9, &clamp_lo, &clamp_hi); - addsub_avx2(bf1[7], bf1[8], bf0 + 7, bf0 + 8, &clamp_lo, &clamp_hi); - bf0[16] = bf1[16]; - bf0[17] = bf1[17]; - bf0[18] = bf1[18]; - bf0[19] = bf1[19]; - bf0[20] = - half_btf_avx2(&cospim32, &bf1[20], &cospi32, &bf1[27], &rounding, bit); - bf0[21] = - half_btf_avx2(&cospim32, &bf1[21], &cospi32, &bf1[26], &rounding, bit); - bf0[22] = - half_btf_avx2(&cospim32, &bf1[22], &cospi32, &bf1[25], &rounding, bit); - bf0[23] = - half_btf_avx2(&cospim32, &bf1[23], &cospi32, &bf1[24], &rounding, bit); - bf0[24] = - half_btf_avx2(&cospi32, &bf1[23], &cospi32, &bf1[24], &rounding, bit); - bf0[25] = - half_btf_avx2(&cospi32, &bf1[22], &cospi32, &bf1[25], &rounding, bit); - bf0[26] = - half_btf_avx2(&cospi32, &bf1[21], &cospi32, &bf1[26], &rounding, bit); - bf0[27] = - half_btf_avx2(&cospi32, &bf1[20], &cospi32, &bf1[27], &rounding, bit); - bf0[28] = bf1[28]; - bf0[29] = bf1[29]; - bf0[30] = bf1[30]; - bf0[31] = bf1[31]; - - // stage 9 - if (do_cols) { - addsub_no_clamp_avx2(bf0[0], bf0[31], out + 0, out + 31); - addsub_no_clamp_avx2(bf0[1], bf0[30], out + 1, out + 30); - addsub_no_clamp_avx2(bf0[2], bf0[29], out + 2, out + 29); - addsub_no_clamp_avx2(bf0[3], bf0[28], out + 3, out + 28); - addsub_no_clamp_avx2(bf0[4], bf0[27], out + 4, out + 27); - addsub_no_clamp_avx2(bf0[5], bf0[26], out + 5, out + 26); - addsub_no_clamp_avx2(bf0[6], bf0[25], out + 6, out + 25); - addsub_no_clamp_avx2(bf0[7], bf0[24], out + 7, out + 24); - addsub_no_clamp_avx2(bf0[8], bf0[23], out + 8, out + 23); - addsub_no_clamp_avx2(bf0[9], bf0[22], out + 9, out + 22); - addsub_no_clamp_avx2(bf0[10], bf0[21], out + 10, out + 21); - addsub_no_clamp_avx2(bf0[11], bf0[20], out + 11, out + 20); - addsub_no_clamp_avx2(bf0[12], bf0[19], out + 12, out + 19); - addsub_no_clamp_avx2(bf0[13], bf0[18], out + 13, out + 18); - addsub_no_clamp_avx2(bf0[14], bf0[17], out + 14, out + 17); - addsub_no_clamp_avx2(bf0[15], bf0[16], out + 15, out + 16); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m256i clamp_lo_out = _mm256_set1_epi32(AOMMAX( - -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift)))); - const __m256i clamp_hi_out = _mm256_set1_epi32(AOMMIN( - (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift)))); - - addsub_shift_avx2(bf0[0], bf0[31], out + 0, out + 31, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf0[1], bf0[30], out + 1, out + 30, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf0[2], bf0[29], out + 2, out + 29, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf0[3], bf0[28], out + 3, out + 28, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf0[4], bf0[27], out + 4, out + 27, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf0[5], bf0[26], out + 5, out + 26, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf0[6], bf0[25], out + 6, out + 25, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf0[7], bf0[24], out + 7, out + 24, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf0[8], bf0[23], out + 8, out + 23, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf0[9], bf0[22], out + 9, out + 22, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf0[10], bf0[21], out + 10, out + 21, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf0[11], bf0[20], out + 11, out + 20, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf0[12], bf0[19], out + 12, out + 19, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf0[13], bf0[18], out + 13, out + 18, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf0[14], bf0[17], out + 14, out + 17, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_avx2(bf0[15], bf0[16], out + 15, out + 16, &clamp_lo_out, - &clamp_hi_out, out_shift); - } - } -} - -typedef void (*transform_1d_avx2)(__m256i *in, __m256i *out, int bit, - int do_cols, int bd, int out_shift); - -static const transform_1d_avx2 - highbd_txfm_all_1d_zeros_w8_arr[TX_SIZES][ITX_TYPES_1D][4] = { - { - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL }, - }, - { { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL } }, - { - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL }, - }, - { { idct32_low1_avx2, idct32_low8_avx2, idct32_low16_avx2, idct32_avx2 }, - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL } }, - - { { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL } } - }; - -static void highbd_inv_txfm2d_add_no_identity_avx2(const int32_t *input, - uint16_t *output, int stride, - TX_TYPE tx_type, - TX_SIZE tx_size, int eob, - const int bd) { - __m256i buf1[64 * 2]; - int eobx, eoby; - get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob); - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int buf_size_w_div8 = txfm_size_col >> 3; - const int buf_size_nonzero_w_div8 = (eobx + 8) >> 3; - const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3; - const int input_stride = AOMMIN(32, txfm_size_col); - - const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx]; - const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby]; - const transform_1d_avx2 row_txfm = - highbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x]; - const transform_1d_avx2 col_txfm = - highbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y]; - - assert(col_txfm != NULL); - assert(row_txfm != NULL); - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - // 1st stage: column transform - for (int i = 0; i < buf_size_nonzero_h_div8; i++) { - __m256i buf0[32]; - const int32_t *input_row = input + i * input_stride * 8; - for (int j = 0; j < buf_size_nonzero_w_div8; ++j) { - __m256i *buf0_cur = buf0 + j * 8; - load_buffer_32x32(input_row + j * 8, buf0_cur, input_stride, 8); - - transpose_8x8_avx2(&buf0_cur[0], &buf0_cur[0]); - } - - row_txfm(buf0, buf0, inv_cos_bit_row[txw_idx][txh_idx], 0, bd, -shift[0]); - - __m256i *_buf1 = buf1 + i * 8; - for (int j = 0; j < buf_size_w_div8; ++j) { - transpose_8x8_avx2(&buf0[j * 8], &_buf1[j * txfm_size_row]); - } - } - // 2nd stage: column transform - for (int i = 0; i < buf_size_w_div8; i++) { - col_txfm(buf1 + i * txfm_size_row, buf1 + i * txfm_size_row, - inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0); - - av1_round_shift_array_32_avx2(buf1 + i * txfm_size_row, - buf1 + i * txfm_size_row, txfm_size_row, - -shift[1]); - } - - // write to buffer - { - for (int i = 0; i < (txfm_size_col >> 4); i++) { - highbd_write_buffer_16xn_avx2(buf1 + i * txfm_size_row * 2, - output + 16 * i, stride, ud_flip, - txfm_size_row, bd); - } - } -} - -void av1_highbd_inv_txfm2d_add_universe_avx2(const int32_t *input, - uint8_t *output, int stride, - TX_TYPE tx_type, TX_SIZE tx_size, - int eob, const int bd) { - switch (tx_type) { - case DCT_DCT: - highbd_inv_txfm2d_add_no_identity_avx2(input, CONVERT_TO_SHORTPTR(output), - stride, tx_type, tx_size, eob, bd); - break; - default: assert(0); break; - } -} - -void av1_highbd_inv_txfm_add_32x32_avx2(const tran_low_t *input, uint8_t *dest, - int stride, - const TxfmParam *txfm_param) { - const int bd = txfm_param->bd; - const TX_TYPE tx_type = txfm_param->tx_type; - const int32_t *src = cast_to_int32(input); - switch (tx_type) { - case DCT_DCT: - av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type, - txfm_param->tx_size, - txfm_param->eob, bd); - break; - // Assembly version doesn't support IDTX, so use C version for it. - case IDTX: - av1_inv_txfm2d_add_32x32_c(src, CONVERT_TO_SHORTPTR(dest), stride, - tx_type, bd); - break; - - default: assert(0); - } -} - -void av1_highbd_inv_txfm_add_avx2(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); - const TX_SIZE tx_size = txfm_param->tx_size; - switch (tx_size) { - case TX_32X32: - av1_highbd_inv_txfm_add_32x32_avx2(input, dest, stride, txfm_param); - break; - case TX_16X16: - av1_highbd_inv_txfm_add_16x16_sse4_1(input, dest, stride, txfm_param); - break; - case TX_8X8: - av1_highbd_inv_txfm_add_8x8_sse4_1(input, dest, stride, txfm_param); - break; - case TX_4X8: - av1_highbd_inv_txfm_add_4x8(input, dest, stride, txfm_param); - break; - case TX_8X4: - av1_highbd_inv_txfm_add_8x4(input, dest, stride, txfm_param); - break; - case TX_8X16: - av1_highbd_inv_txfm_add_8x16_sse4_1(input, dest, stride, txfm_param); - break; - case TX_16X8: - av1_highbd_inv_txfm_add_16x8_sse4_1(input, dest, stride, txfm_param); - break; - case TX_16X32: - av1_highbd_inv_txfm_add_16x32(input, dest, stride, txfm_param); - break; - case TX_32X16: - av1_highbd_inv_txfm_add_32x16(input, dest, stride, txfm_param); - break; - case TX_32X64: - av1_highbd_inv_txfm_add_32x64(input, dest, stride, txfm_param); - break; - case TX_64X32: - av1_highbd_inv_txfm_add_64x32(input, dest, stride, txfm_param); - break; - case TX_4X4: - av1_highbd_inv_txfm_add_4x4_sse4_1(input, dest, stride, txfm_param); - break; - case TX_16X4: - av1_highbd_inv_txfm_add_16x4(input, dest, stride, txfm_param); - break; - case TX_4X16: - av1_highbd_inv_txfm_add_4x16(input, dest, stride, txfm_param); - break; - case TX_8X32: - av1_highbd_inv_txfm_add_8x32(input, dest, stride, txfm_param); - break; - case TX_32X8: - av1_highbd_inv_txfm_add_32x8(input, dest, stride, txfm_param); - break; - case TX_64X64: - case TX_16X64: - case TX_64X16: - av1_highbd_inv_txfm2d_add_universe_sse4_1( - input, dest, stride, txfm_param->tx_type, txfm_param->tx_size, - txfm_param->eob, txfm_param->bd); - break; - default: assert(0 && "Invalid transform size"); break; - } -} diff --git a/third_party/aom/av1/common/x86/highbd_inv_txfm_sse4.c b/third_party/aom/av1/common/x86/highbd_inv_txfm_sse4.c deleted file mode 100644 index e29e0baf5..000000000 --- a/third_party/aom/av1/common/x86/highbd_inv_txfm_sse4.c +++ /dev/null @@ -1,5348 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#include <assert.h> -#include <smmintrin.h> /* SSE4.1 */ - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" - -#include "av1/common/av1_inv_txfm1d_cfg.h" -#include "av1/common/idct.h" -#include "av1/common/x86/av1_inv_txfm_ssse3.h" -#include "av1/common/x86/av1_txfm_sse4.h" -#include "av1/common/x86/highbd_txfm_utility_sse4.h" - -static INLINE __m128i highbd_clamp_epi16(__m128i u, int bd) { - const __m128i zero = _mm_setzero_si128(); - const __m128i one = _mm_set1_epi16(1); - const __m128i max = _mm_sub_epi16(_mm_slli_epi16(one, bd), one); - __m128i clamped, mask; - - mask = _mm_cmpgt_epi16(u, max); - clamped = _mm_andnot_si128(mask, u); - mask = _mm_and_si128(mask, max); - clamped = _mm_or_si128(mask, clamped); - mask = _mm_cmpgt_epi16(clamped, zero); - clamped = _mm_and_si128(clamped, mask); - - return clamped; -} - -static INLINE __m128i highbd_get_recon_8x8_sse4_1(const __m128i pred, - __m128i res0, __m128i res1, - const int bd) { - __m128i x0 = _mm_cvtepi16_epi32(pred); - __m128i x1 = _mm_cvtepi16_epi32(_mm_srli_si128(pred, 8)); - - x0 = _mm_add_epi32(res0, x0); - x1 = _mm_add_epi32(res1, x1); - x0 = _mm_packus_epi32(x0, x1); - x0 = highbd_clamp_epi16(x0, bd); - return x0; -} - -static INLINE void highbd_write_buffer_8xn_sse4_1(__m128i *in, uint16_t *output, - int stride, int flipud, - int height, const int bd) { - int j = flipud ? (height - 1) : 0; - const int step = flipud ? -1 : 1; - for (int i = 0; i < height; ++i, j += step) { - __m128i v = _mm_loadu_si128((__m128i const *)(output + i * stride)); - __m128i u = highbd_get_recon_8x8_sse4_1(v, in[j], in[j + height], bd); - - _mm_storeu_si128((__m128i *)(output + i * stride), u); - } -} - -static INLINE void load_buffer_32bit_input(const int32_t *in, int stride, - __m128i *out, int out_size) { - for (int i = 0; i < out_size; ++i) { - out[i] = _mm_loadu_si128((const __m128i *)(in + i * stride)); - } -} - -static INLINE void load_buffer_4x4(const int32_t *coeff, __m128i *in) { - in[0] = _mm_load_si128((const __m128i *)(coeff + 0)); - in[1] = _mm_load_si128((const __m128i *)(coeff + 4)); - in[2] = _mm_load_si128((const __m128i *)(coeff + 8)); - in[3] = _mm_load_si128((const __m128i *)(coeff + 12)); -} - -static void addsub_sse4_1(const __m128i in0, const __m128i in1, __m128i *out0, - __m128i *out1, const __m128i *clamp_lo, - const __m128i *clamp_hi) { - __m128i a0 = _mm_add_epi32(in0, in1); - __m128i a1 = _mm_sub_epi32(in0, in1); - - a0 = _mm_max_epi32(a0, *clamp_lo); - a0 = _mm_min_epi32(a0, *clamp_hi); - a1 = _mm_max_epi32(a1, *clamp_lo); - a1 = _mm_min_epi32(a1, *clamp_hi); - - *out0 = a0; - *out1 = a1; -} - -static void addsub_no_clamp_sse4_1(const __m128i in0, const __m128i in1, - __m128i *out0, __m128i *out1) { - __m128i a0 = _mm_add_epi32(in0, in1); - __m128i a1 = _mm_sub_epi32(in0, in1); - - *out0 = a0; - *out1 = a1; -} - -static void addsub_shift_sse4_1(const __m128i in0, const __m128i in1, - __m128i *out0, __m128i *out1, - const __m128i *clamp_lo, - const __m128i *clamp_hi, int shift) { - __m128i offset = _mm_set1_epi32((1 << shift) >> 1); - __m128i in0_w_offset = _mm_add_epi32(in0, offset); - __m128i a0 = _mm_add_epi32(in0_w_offset, in1); - __m128i a1 = _mm_sub_epi32(in0_w_offset, in1); - - a0 = _mm_sra_epi32(a0, _mm_cvtsi32_si128(shift)); - a1 = _mm_sra_epi32(a1, _mm_cvtsi32_si128(shift)); - - a0 = _mm_max_epi32(a0, *clamp_lo); - a0 = _mm_min_epi32(a0, *clamp_hi); - a1 = _mm_max_epi32(a1, *clamp_lo); - a1 = _mm_min_epi32(a1, *clamp_hi); - - *out0 = a0; - *out1 = a1; -} - -static INLINE void idct32_stage4_sse4_1( - __m128i *bf1, const __m128i *cospim8, const __m128i *cospi56, - const __m128i *cospi8, const __m128i *cospim56, const __m128i *cospim40, - const __m128i *cospi24, const __m128i *cospi40, const __m128i *cospim24, - const __m128i *rounding, int bit) { - __m128i temp1, temp2; - temp1 = half_btf_sse4_1(cospim8, &bf1[17], cospi56, &bf1[30], rounding, bit); - bf1[30] = half_btf_sse4_1(cospi56, &bf1[17], cospi8, &bf1[30], rounding, bit); - bf1[17] = temp1; - - temp2 = half_btf_sse4_1(cospim56, &bf1[18], cospim8, &bf1[29], rounding, bit); - bf1[29] = - half_btf_sse4_1(cospim8, &bf1[18], cospi56, &bf1[29], rounding, bit); - bf1[18] = temp2; - - temp1 = half_btf_sse4_1(cospim40, &bf1[21], cospi24, &bf1[26], rounding, bit); - bf1[26] = - half_btf_sse4_1(cospi24, &bf1[21], cospi40, &bf1[26], rounding, bit); - bf1[21] = temp1; - - temp2 = - half_btf_sse4_1(cospim24, &bf1[22], cospim40, &bf1[25], rounding, bit); - bf1[25] = - half_btf_sse4_1(cospim40, &bf1[22], cospi24, &bf1[25], rounding, bit); - bf1[22] = temp2; -} - -static INLINE void idct32_stage5_sse4_1( - __m128i *bf1, const __m128i *cospim16, const __m128i *cospi48, - const __m128i *cospi16, const __m128i *cospim48, const __m128i *clamp_lo, - const __m128i *clamp_hi, const __m128i *rounding, int bit) { - __m128i temp1, temp2; - temp1 = half_btf_sse4_1(cospim16, &bf1[9], cospi48, &bf1[14], rounding, bit); - bf1[14] = half_btf_sse4_1(cospi48, &bf1[9], cospi16, &bf1[14], rounding, bit); - bf1[9] = temp1; - - temp2 = - half_btf_sse4_1(cospim48, &bf1[10], cospim16, &bf1[13], rounding, bit); - bf1[13] = - half_btf_sse4_1(cospim16, &bf1[10], cospi48, &bf1[13], rounding, bit); - bf1[10] = temp2; - - addsub_sse4_1(bf1[16], bf1[19], bf1 + 16, bf1 + 19, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[17], bf1[18], bf1 + 17, bf1 + 18, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[23], bf1[20], bf1 + 23, bf1 + 20, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[22], bf1[21], bf1 + 22, bf1 + 21, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[24], bf1[27], bf1 + 24, bf1 + 27, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[25], bf1[26], bf1 + 25, bf1 + 26, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[31], bf1[28], bf1 + 31, bf1 + 28, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[30], bf1[29], bf1 + 30, bf1 + 29, clamp_lo, clamp_hi); -} - -static INLINE void idct32_stage6_sse4_1( - __m128i *bf1, const __m128i *cospim32, const __m128i *cospi32, - const __m128i *cospim16, const __m128i *cospi48, const __m128i *cospi16, - const __m128i *cospim48, const __m128i *clamp_lo, const __m128i *clamp_hi, - const __m128i *rounding, int bit) { - __m128i temp1, temp2; - temp1 = half_btf_sse4_1(cospim32, &bf1[5], cospi32, &bf1[6], rounding, bit); - bf1[6] = half_btf_sse4_1(cospi32, &bf1[5], cospi32, &bf1[6], rounding, bit); - bf1[5] = temp1; - - addsub_sse4_1(bf1[8], bf1[11], bf1 + 8, bf1 + 11, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[9], bf1[10], bf1 + 9, bf1 + 10, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[15], bf1[12], bf1 + 15, bf1 + 12, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[14], bf1[13], bf1 + 14, bf1 + 13, clamp_lo, clamp_hi); - - temp1 = half_btf_sse4_1(cospim16, &bf1[18], cospi48, &bf1[29], rounding, bit); - bf1[29] = - half_btf_sse4_1(cospi48, &bf1[18], cospi16, &bf1[29], rounding, bit); - bf1[18] = temp1; - temp2 = half_btf_sse4_1(cospim16, &bf1[19], cospi48, &bf1[28], rounding, bit); - bf1[28] = - half_btf_sse4_1(cospi48, &bf1[19], cospi16, &bf1[28], rounding, bit); - bf1[19] = temp2; - temp1 = - half_btf_sse4_1(cospim48, &bf1[20], cospim16, &bf1[27], rounding, bit); - bf1[27] = - half_btf_sse4_1(cospim16, &bf1[20], cospi48, &bf1[27], rounding, bit); - bf1[20] = temp1; - temp2 = - half_btf_sse4_1(cospim48, &bf1[21], cospim16, &bf1[26], rounding, bit); - bf1[26] = - half_btf_sse4_1(cospim16, &bf1[21], cospi48, &bf1[26], rounding, bit); - bf1[21] = temp2; -} - -static INLINE void idct32_stage7_sse4_1(__m128i *bf1, const __m128i *cospim32, - const __m128i *cospi32, - const __m128i *clamp_lo, - const __m128i *clamp_hi, - const __m128i *rounding, int bit) { - __m128i temp1, temp2; - addsub_sse4_1(bf1[0], bf1[7], bf1 + 0, bf1 + 7, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[1], bf1[6], bf1 + 1, bf1 + 6, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[2], bf1[5], bf1 + 2, bf1 + 5, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[3], bf1[4], bf1 + 3, bf1 + 4, clamp_lo, clamp_hi); - - temp1 = half_btf_sse4_1(cospim32, &bf1[10], cospi32, &bf1[13], rounding, bit); - bf1[13] = - half_btf_sse4_1(cospi32, &bf1[10], cospi32, &bf1[13], rounding, bit); - bf1[10] = temp1; - temp2 = half_btf_sse4_1(cospim32, &bf1[11], cospi32, &bf1[12], rounding, bit); - bf1[12] = - half_btf_sse4_1(cospi32, &bf1[11], cospi32, &bf1[12], rounding, bit); - bf1[11] = temp2; - - addsub_sse4_1(bf1[16], bf1[23], bf1 + 16, bf1 + 23, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[17], bf1[22], bf1 + 17, bf1 + 22, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[18], bf1[21], bf1 + 18, bf1 + 21, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[19], bf1[20], bf1 + 19, bf1 + 20, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[31], bf1[24], bf1 + 31, bf1 + 24, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[30], bf1[25], bf1 + 30, bf1 + 25, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[29], bf1[26], bf1 + 29, bf1 + 26, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[28], bf1[27], bf1 + 28, bf1 + 27, clamp_lo, clamp_hi); -} - -static INLINE void idct32_stage8_sse4_1(__m128i *bf1, const __m128i *cospim32, - const __m128i *cospi32, - const __m128i *clamp_lo, - const __m128i *clamp_hi, - const __m128i *rounding, int bit) { - __m128i temp1, temp2; - addsub_sse4_1(bf1[0], bf1[15], bf1 + 0, bf1 + 15, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[1], bf1[14], bf1 + 1, bf1 + 14, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[2], bf1[13], bf1 + 2, bf1 + 13, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[3], bf1[12], bf1 + 3, bf1 + 12, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[4], bf1[11], bf1 + 4, bf1 + 11, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[5], bf1[10], bf1 + 5, bf1 + 10, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[6], bf1[9], bf1 + 6, bf1 + 9, clamp_lo, clamp_hi); - addsub_sse4_1(bf1[7], bf1[8], bf1 + 7, bf1 + 8, clamp_lo, clamp_hi); - - temp1 = half_btf_sse4_1(cospim32, &bf1[20], cospi32, &bf1[27], rounding, bit); - bf1[27] = - half_btf_sse4_1(cospi32, &bf1[20], cospi32, &bf1[27], rounding, bit); - bf1[20] = temp1; - temp2 = half_btf_sse4_1(cospim32, &bf1[21], cospi32, &bf1[26], rounding, bit); - bf1[26] = - half_btf_sse4_1(cospi32, &bf1[21], cospi32, &bf1[26], rounding, bit); - bf1[21] = temp2; - temp1 = half_btf_sse4_1(cospim32, &bf1[22], cospi32, &bf1[25], rounding, bit); - bf1[25] = - half_btf_sse4_1(cospi32, &bf1[22], cospi32, &bf1[25], rounding, bit); - bf1[22] = temp1; - temp2 = half_btf_sse4_1(cospim32, &bf1[23], cospi32, &bf1[24], rounding, bit); - bf1[24] = - half_btf_sse4_1(cospi32, &bf1[23], cospi32, &bf1[24], rounding, bit); - bf1[23] = temp2; -} - -static INLINE void idct32_stage9_sse4_1(__m128i *bf1, __m128i *out, - const int do_cols, const int bd, - const int out_shift, - const int log_range) { - if (do_cols) { - addsub_no_clamp_sse4_1(bf1[0], bf1[31], out + 0, out + 31); - addsub_no_clamp_sse4_1(bf1[1], bf1[30], out + 1, out + 30); - addsub_no_clamp_sse4_1(bf1[2], bf1[29], out + 2, out + 29); - addsub_no_clamp_sse4_1(bf1[3], bf1[28], out + 3, out + 28); - addsub_no_clamp_sse4_1(bf1[4], bf1[27], out + 4, out + 27); - addsub_no_clamp_sse4_1(bf1[5], bf1[26], out + 5, out + 26); - addsub_no_clamp_sse4_1(bf1[6], bf1[25], out + 6, out + 25); - addsub_no_clamp_sse4_1(bf1[7], bf1[24], out + 7, out + 24); - addsub_no_clamp_sse4_1(bf1[8], bf1[23], out + 8, out + 23); - addsub_no_clamp_sse4_1(bf1[9], bf1[22], out + 9, out + 22); - addsub_no_clamp_sse4_1(bf1[10], bf1[21], out + 10, out + 21); - addsub_no_clamp_sse4_1(bf1[11], bf1[20], out + 11, out + 20); - addsub_no_clamp_sse4_1(bf1[12], bf1[19], out + 12, out + 19); - addsub_no_clamp_sse4_1(bf1[13], bf1[18], out + 13, out + 18); - addsub_no_clamp_sse4_1(bf1[14], bf1[17], out + 14, out + 17); - addsub_no_clamp_sse4_1(bf1[15], bf1[16], out + 15, out + 16); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX( - -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift)))); - const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN( - (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift)))); - - addsub_shift_sse4_1(bf1[0], bf1[31], out + 0, out + 31, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf1[1], bf1[30], out + 1, out + 30, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf1[2], bf1[29], out + 2, out + 29, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf1[3], bf1[28], out + 3, out + 28, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf1[4], bf1[27], out + 4, out + 27, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf1[5], bf1[26], out + 5, out + 26, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf1[6], bf1[25], out + 6, out + 25, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf1[7], bf1[24], out + 7, out + 24, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf1[8], bf1[23], out + 8, out + 23, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf1[9], bf1[22], out + 9, out + 22, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf1[10], bf1[21], out + 10, out + 21, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf1[11], bf1[20], out + 11, out + 20, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf1[12], bf1[19], out + 12, out + 19, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf1[13], bf1[18], out + 13, out + 18, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf1[14], bf1[17], out + 14, out + 17, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf1[15], bf1[16], out + 15, out + 16, &clamp_lo_out, - &clamp_hi_out, out_shift); - } -} - -static void neg_shift_sse4_1(const __m128i in0, const __m128i in1, - __m128i *out0, __m128i *out1, - const __m128i *clamp_lo, const __m128i *clamp_hi, - int shift) { - __m128i offset = _mm_set1_epi32((1 << shift) >> 1); - __m128i a0 = _mm_add_epi32(offset, in0); - __m128i a1 = _mm_sub_epi32(offset, in1); - - a0 = _mm_sra_epi32(a0, _mm_cvtsi32_si128(shift)); - a1 = _mm_sra_epi32(a1, _mm_cvtsi32_si128(shift)); - - a0 = _mm_max_epi32(a0, *clamp_lo); - a0 = _mm_min_epi32(a0, *clamp_hi); - a1 = _mm_max_epi32(a1, *clamp_lo); - a1 = _mm_min_epi32(a1, *clamp_hi); - - *out0 = a0; - *out1 = a1; -} - -static void idct4x4_sse4_1(__m128i *in, int bit, int do_cols, int bd) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospim16 = _mm_set1_epi32(-cospi[16]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - - __m128i u0, u1, u2, u3; - __m128i v0, v1, v2, v3, x, y; - - v0 = _mm_unpacklo_epi32(in[0], in[1]); - v1 = _mm_unpackhi_epi32(in[0], in[1]); - v2 = _mm_unpacklo_epi32(in[2], in[3]); - v3 = _mm_unpackhi_epi32(in[2], in[3]); - - u0 = _mm_unpacklo_epi64(v0, v2); - u1 = _mm_unpackhi_epi64(v0, v2); - u2 = _mm_unpacklo_epi64(v1, v3); - u3 = _mm_unpackhi_epi64(v1, v3); - - x = _mm_mullo_epi32(u0, cospi32); - y = _mm_mullo_epi32(u2, cospi32); - v0 = _mm_add_epi32(x, y); - v0 = _mm_add_epi32(v0, rnding); - v0 = _mm_srai_epi32(v0, bit); - - v1 = _mm_sub_epi32(x, y); - v1 = _mm_add_epi32(v1, rnding); - v1 = _mm_srai_epi32(v1, bit); - - x = _mm_mullo_epi32(u1, cospi48); - y = _mm_mullo_epi32(u3, cospim16); - v2 = _mm_add_epi32(x, y); - v2 = _mm_add_epi32(v2, rnding); - v2 = _mm_srai_epi32(v2, bit); - - x = _mm_mullo_epi32(u1, cospi16); - y = _mm_mullo_epi32(u3, cospi48); - v3 = _mm_add_epi32(x, y); - v3 = _mm_add_epi32(v3, rnding); - v3 = _mm_srai_epi32(v3, bit); - - if (do_cols) { - addsub_no_clamp_sse4_1(v0, v3, in + 0, in + 3); - addsub_no_clamp_sse4_1(v1, v2, in + 1, in + 2); - } else { - const int log_range = AOMMAX(16, bd + 6); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - addsub_sse4_1(v0, v3, in + 0, in + 3, &clamp_lo, &clamp_hi); - addsub_sse4_1(v1, v2, in + 1, in + 2, &clamp_lo, &clamp_hi); - } -} - -static void iadst4x4_sse4_1(__m128i *in, int bit, int do_cols, int bd) { - const int32_t *sinpi = sinpi_arr(bit); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const __m128i sinpi1 = _mm_set1_epi32((int)sinpi[1]); - const __m128i sinpi2 = _mm_set1_epi32((int)sinpi[2]); - const __m128i sinpi3 = _mm_set1_epi32((int)sinpi[3]); - const __m128i sinpi4 = _mm_set1_epi32((int)sinpi[4]); - __m128i t; - __m128i s0, s1, s2, s3, s4, s5, s6, s7; - __m128i x0, x1, x2, x3; - __m128i u0, u1, u2, u3; - __m128i v0, v1, v2, v3; - - v0 = _mm_unpacklo_epi32(in[0], in[1]); - v1 = _mm_unpackhi_epi32(in[0], in[1]); - v2 = _mm_unpacklo_epi32(in[2], in[3]); - v3 = _mm_unpackhi_epi32(in[2], in[3]); - - x0 = _mm_unpacklo_epi64(v0, v2); - x1 = _mm_unpackhi_epi64(v0, v2); - x2 = _mm_unpacklo_epi64(v1, v3); - x3 = _mm_unpackhi_epi64(v1, v3); - - s0 = _mm_mullo_epi32(x0, sinpi1); - s1 = _mm_mullo_epi32(x0, sinpi2); - s2 = _mm_mullo_epi32(x1, sinpi3); - s3 = _mm_mullo_epi32(x2, sinpi4); - s4 = _mm_mullo_epi32(x2, sinpi1); - s5 = _mm_mullo_epi32(x3, sinpi2); - s6 = _mm_mullo_epi32(x3, sinpi4); - t = _mm_sub_epi32(x0, x2); - s7 = _mm_add_epi32(t, x3); - - t = _mm_add_epi32(s0, s3); - s0 = _mm_add_epi32(t, s5); - t = _mm_sub_epi32(s1, s4); - s1 = _mm_sub_epi32(t, s6); - s3 = s2; - s2 = _mm_mullo_epi32(s7, sinpi3); - - u0 = _mm_add_epi32(s0, s3); - u1 = _mm_add_epi32(s1, s3); - u2 = s2; - t = _mm_add_epi32(s0, s1); - u3 = _mm_sub_epi32(t, s3); - - u0 = _mm_add_epi32(u0, rnding); - u0 = _mm_srai_epi32(u0, bit); - - u1 = _mm_add_epi32(u1, rnding); - u1 = _mm_srai_epi32(u1, bit); - - u2 = _mm_add_epi32(u2, rnding); - u2 = _mm_srai_epi32(u2, bit); - - u3 = _mm_add_epi32(u3, rnding); - u3 = _mm_srai_epi32(u3, bit); - - if (!do_cols) { - const int log_range = AOMMAX(16, bd + 6); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - - u0 = _mm_max_epi32(u0, clamp_lo); - u0 = _mm_min_epi32(u0, clamp_hi); - u1 = _mm_max_epi32(u1, clamp_lo); - u1 = _mm_min_epi32(u1, clamp_hi); - u2 = _mm_max_epi32(u2, clamp_lo); - u2 = _mm_min_epi32(u2, clamp_hi); - u3 = _mm_max_epi32(u3, clamp_lo); - u3 = _mm_min_epi32(u3, clamp_hi); - } - - in[0] = u0; - in[1] = u1; - in[2] = u2; - in[3] = u3; -} - -static INLINE void round_shift_4x4(__m128i *in, int shift) { - __m128i rnding = _mm_set1_epi32(1 << (shift - 1)); - - in[0] = _mm_add_epi32(in[0], rnding); - in[1] = _mm_add_epi32(in[1], rnding); - in[2] = _mm_add_epi32(in[2], rnding); - in[3] = _mm_add_epi32(in[3], rnding); - - in[0] = _mm_srai_epi32(in[0], shift); - in[1] = _mm_srai_epi32(in[1], shift); - in[2] = _mm_srai_epi32(in[2], shift); - in[3] = _mm_srai_epi32(in[3], shift); -} - -static void write_buffer_4x4(__m128i *in, uint16_t *output, int stride, - int fliplr, int flipud, int shift, int bd) { - const __m128i zero = _mm_setzero_si128(); - __m128i u0, u1, u2, u3; - __m128i v0, v1, v2, v3; - - round_shift_4x4(in, shift); - - v0 = _mm_loadl_epi64((__m128i const *)(output + 0 * stride)); - v1 = _mm_loadl_epi64((__m128i const *)(output + 1 * stride)); - v2 = _mm_loadl_epi64((__m128i const *)(output + 2 * stride)); - v3 = _mm_loadl_epi64((__m128i const *)(output + 3 * stride)); - - v0 = _mm_unpacklo_epi16(v0, zero); - v1 = _mm_unpacklo_epi16(v1, zero); - v2 = _mm_unpacklo_epi16(v2, zero); - v3 = _mm_unpacklo_epi16(v3, zero); - - if (fliplr) { - in[0] = _mm_shuffle_epi32(in[0], 0x1B); - in[1] = _mm_shuffle_epi32(in[1], 0x1B); - in[2] = _mm_shuffle_epi32(in[2], 0x1B); - in[3] = _mm_shuffle_epi32(in[3], 0x1B); - } - - if (flipud) { - u0 = _mm_add_epi32(in[3], v0); - u1 = _mm_add_epi32(in[2], v1); - u2 = _mm_add_epi32(in[1], v2); - u3 = _mm_add_epi32(in[0], v3); - } else { - u0 = _mm_add_epi32(in[0], v0); - u1 = _mm_add_epi32(in[1], v1); - u2 = _mm_add_epi32(in[2], v2); - u3 = _mm_add_epi32(in[3], v3); - } - - v0 = _mm_packus_epi32(u0, u1); - v2 = _mm_packus_epi32(u2, u3); - - u0 = highbd_clamp_epi16(v0, bd); - u2 = highbd_clamp_epi16(v2, bd); - - v0 = _mm_unpacklo_epi64(u0, u0); - v1 = _mm_unpackhi_epi64(u0, u0); - v2 = _mm_unpacklo_epi64(u2, u2); - v3 = _mm_unpackhi_epi64(u2, u2); - - _mm_storel_epi64((__m128i *)(output + 0 * stride), v0); - _mm_storel_epi64((__m128i *)(output + 1 * stride), v1); - _mm_storel_epi64((__m128i *)(output + 2 * stride), v2); - _mm_storel_epi64((__m128i *)(output + 3 * stride), v3); -} - -void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *coeff, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - __m128i in[4]; - const int8_t *shift = inv_txfm_shift_ls[TX_4X4]; - const int txw_idx = get_txw_idx(TX_4X4); - const int txh_idx = get_txh_idx(TX_4X4); - - switch (tx_type) { - case DCT_DCT: - load_buffer_4x4(coeff, in); - idct4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd); - idct4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd); - write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd); - break; - case ADST_DCT: - load_buffer_4x4(coeff, in); - idct4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd); - iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd); - write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd); - break; - case DCT_ADST: - load_buffer_4x4(coeff, in); - iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd); - idct4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd); - write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd); - break; - case ADST_ADST: - load_buffer_4x4(coeff, in); - iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd); - iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd); - write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd); - break; - case FLIPADST_DCT: - load_buffer_4x4(coeff, in); - idct4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd); - iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd); - write_buffer_4x4(in, output, stride, 0, 1, -shift[1], bd); - break; - case DCT_FLIPADST: - load_buffer_4x4(coeff, in); - iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd); - idct4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd); - write_buffer_4x4(in, output, stride, 1, 0, -shift[1], bd); - break; - case FLIPADST_FLIPADST: - load_buffer_4x4(coeff, in); - iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd); - iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd); - write_buffer_4x4(in, output, stride, 1, 1, -shift[1], bd); - break; - case ADST_FLIPADST: - load_buffer_4x4(coeff, in); - iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd); - iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd); - write_buffer_4x4(in, output, stride, 1, 0, -shift[1], bd); - break; - case FLIPADST_ADST: - load_buffer_4x4(coeff, in); - iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd); - iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd); - write_buffer_4x4(in, output, stride, 0, 1, -shift[1], bd); - break; - default: assert(0); - } -} - -// 8x8 -static void load_buffer_8x8(const int32_t *coeff, __m128i *in) { - in[0] = _mm_load_si128((const __m128i *)(coeff + 0)); - in[1] = _mm_load_si128((const __m128i *)(coeff + 4)); - in[2] = _mm_load_si128((const __m128i *)(coeff + 8)); - in[3] = _mm_load_si128((const __m128i *)(coeff + 12)); - in[4] = _mm_load_si128((const __m128i *)(coeff + 16)); - in[5] = _mm_load_si128((const __m128i *)(coeff + 20)); - in[6] = _mm_load_si128((const __m128i *)(coeff + 24)); - in[7] = _mm_load_si128((const __m128i *)(coeff + 28)); - in[8] = _mm_load_si128((const __m128i *)(coeff + 32)); - in[9] = _mm_load_si128((const __m128i *)(coeff + 36)); - in[10] = _mm_load_si128((const __m128i *)(coeff + 40)); - in[11] = _mm_load_si128((const __m128i *)(coeff + 44)); - in[12] = _mm_load_si128((const __m128i *)(coeff + 48)); - in[13] = _mm_load_si128((const __m128i *)(coeff + 52)); - in[14] = _mm_load_si128((const __m128i *)(coeff + 56)); - in[15] = _mm_load_si128((const __m128i *)(coeff + 60)); -} - -static void idct8x8_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols, - int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi56 = _mm_set1_epi32(cospi[56]); - const __m128i cospim8 = _mm_set1_epi32(-cospi[8]); - const __m128i cospi24 = _mm_set1_epi32(cospi[24]); - const __m128i cospim40 = _mm_set1_epi32(-cospi[40]); - const __m128i cospi40 = _mm_set1_epi32(cospi[40]); - const __m128i cospi8 = _mm_set1_epi32(cospi[8]); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospim16 = _mm_set1_epi32(-cospi[16]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - __m128i u0, u1, u2, u3, u4, u5, u6, u7; - __m128i v0, v1, v2, v3, v4, v5, v6, v7; - __m128i x, y; - int col; - - // Note: - // Even column: 0, 2, ..., 14 - // Odd column: 1, 3, ..., 15 - // one even column plus one odd column constructs one row (8 coeffs) - // total we have 8 rows (8x8). - for (col = 0; col < 2; ++col) { - // stage 0 - // stage 1 - // stage 2 - u0 = in[0 * 2 + col]; - u1 = in[4 * 2 + col]; - u2 = in[2 * 2 + col]; - u3 = in[6 * 2 + col]; - - x = _mm_mullo_epi32(in[1 * 2 + col], cospi56); - y = _mm_mullo_epi32(in[7 * 2 + col], cospim8); - u4 = _mm_add_epi32(x, y); - u4 = _mm_add_epi32(u4, rnding); - u4 = _mm_srai_epi32(u4, bit); - - x = _mm_mullo_epi32(in[1 * 2 + col], cospi8); - y = _mm_mullo_epi32(in[7 * 2 + col], cospi56); - u7 = _mm_add_epi32(x, y); - u7 = _mm_add_epi32(u7, rnding); - u7 = _mm_srai_epi32(u7, bit); - - x = _mm_mullo_epi32(in[5 * 2 + col], cospi24); - y = _mm_mullo_epi32(in[3 * 2 + col], cospim40); - u5 = _mm_add_epi32(x, y); - u5 = _mm_add_epi32(u5, rnding); - u5 = _mm_srai_epi32(u5, bit); - - x = _mm_mullo_epi32(in[5 * 2 + col], cospi40); - y = _mm_mullo_epi32(in[3 * 2 + col], cospi24); - u6 = _mm_add_epi32(x, y); - u6 = _mm_add_epi32(u6, rnding); - u6 = _mm_srai_epi32(u6, bit); - - // stage 3 - x = _mm_mullo_epi32(u0, cospi32); - y = _mm_mullo_epi32(u1, cospi32); - v0 = _mm_add_epi32(x, y); - v0 = _mm_add_epi32(v0, rnding); - v0 = _mm_srai_epi32(v0, bit); - - v1 = _mm_sub_epi32(x, y); - v1 = _mm_add_epi32(v1, rnding); - v1 = _mm_srai_epi32(v1, bit); - - x = _mm_mullo_epi32(u2, cospi48); - y = _mm_mullo_epi32(u3, cospim16); - v2 = _mm_add_epi32(x, y); - v2 = _mm_add_epi32(v2, rnding); - v2 = _mm_srai_epi32(v2, bit); - - x = _mm_mullo_epi32(u2, cospi16); - y = _mm_mullo_epi32(u3, cospi48); - v3 = _mm_add_epi32(x, y); - v3 = _mm_add_epi32(v3, rnding); - v3 = _mm_srai_epi32(v3, bit); - - addsub_sse4_1(u4, u5, &v4, &v5, &clamp_lo, &clamp_hi); - addsub_sse4_1(u7, u6, &v7, &v6, &clamp_lo, &clamp_hi); - - // stage 4 - addsub_sse4_1(v0, v3, &u0, &u3, &clamp_lo, &clamp_hi); - addsub_sse4_1(v1, v2, &u1, &u2, &clamp_lo, &clamp_hi); - u4 = v4; - u7 = v7; - - x = _mm_mullo_epi32(v5, cospi32); - y = _mm_mullo_epi32(v6, cospi32); - u6 = _mm_add_epi32(y, x); - u6 = _mm_add_epi32(u6, rnding); - u6 = _mm_srai_epi32(u6, bit); - - u5 = _mm_sub_epi32(y, x); - u5 = _mm_add_epi32(u5, rnding); - u5 = _mm_srai_epi32(u5, bit); - - // stage 5 - if (do_cols) { - addsub_no_clamp_sse4_1(u0, u7, out + 0 * 2 + col, out + 7 * 2 + col); - addsub_no_clamp_sse4_1(u1, u6, out + 1 * 2 + col, out + 6 * 2 + col); - addsub_no_clamp_sse4_1(u2, u5, out + 2 * 2 + col, out + 5 * 2 + col); - addsub_no_clamp_sse4_1(u3, u4, out + 3 * 2 + col, out + 4 * 2 + col); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX( - -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift)))); - const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN( - (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift)))); - addsub_shift_sse4_1(u0, u7, out + 0 * 2 + col, out + 7 * 2 + col, - &clamp_lo_out, &clamp_hi_out, out_shift); - addsub_shift_sse4_1(u1, u6, out + 1 * 2 + col, out + 6 * 2 + col, - &clamp_lo_out, &clamp_hi_out, out_shift); - addsub_shift_sse4_1(u2, u5, out + 2 * 2 + col, out + 5 * 2 + col, - &clamp_lo_out, &clamp_hi_out, out_shift); - addsub_shift_sse4_1(u3, u4, out + 3 * 2 + col, out + 4 * 2 + col, - &clamp_lo_out, &clamp_hi_out, out_shift); - } - } -} - -static void iadst8x8_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols, - int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi4 = _mm_set1_epi32(cospi[4]); - const __m128i cospi60 = _mm_set1_epi32(cospi[60]); - const __m128i cospi20 = _mm_set1_epi32(cospi[20]); - const __m128i cospi44 = _mm_set1_epi32(cospi[44]); - const __m128i cospi36 = _mm_set1_epi32(cospi[36]); - const __m128i cospi28 = _mm_set1_epi32(cospi[28]); - const __m128i cospi52 = _mm_set1_epi32(cospi[52]); - const __m128i cospi12 = _mm_set1_epi32(cospi[12]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospim48 = _mm_set1_epi32(-cospi[48]); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const __m128i kZero = _mm_setzero_si128(); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - __m128i u[8], v[8], x; - - // Even 8 points: 0, 2, ..., 14 - // stage 0 - // stage 1 - // stage 2 - // (1) - u[0] = _mm_mullo_epi32(in[14], cospi4); - x = _mm_mullo_epi32(in[0], cospi60); - u[0] = _mm_add_epi32(u[0], x); - u[0] = _mm_add_epi32(u[0], rnding); - u[0] = _mm_srai_epi32(u[0], bit); - - u[1] = _mm_mullo_epi32(in[14], cospi60); - x = _mm_mullo_epi32(in[0], cospi4); - u[1] = _mm_sub_epi32(u[1], x); - u[1] = _mm_add_epi32(u[1], rnding); - u[1] = _mm_srai_epi32(u[1], bit); - - // (2) - u[2] = _mm_mullo_epi32(in[10], cospi20); - x = _mm_mullo_epi32(in[4], cospi44); - u[2] = _mm_add_epi32(u[2], x); - u[2] = _mm_add_epi32(u[2], rnding); - u[2] = _mm_srai_epi32(u[2], bit); - - u[3] = _mm_mullo_epi32(in[10], cospi44); - x = _mm_mullo_epi32(in[4], cospi20); - u[3] = _mm_sub_epi32(u[3], x); - u[3] = _mm_add_epi32(u[3], rnding); - u[3] = _mm_srai_epi32(u[3], bit); - - // (3) - u[4] = _mm_mullo_epi32(in[6], cospi36); - x = _mm_mullo_epi32(in[8], cospi28); - u[4] = _mm_add_epi32(u[4], x); - u[4] = _mm_add_epi32(u[4], rnding); - u[4] = _mm_srai_epi32(u[4], bit); - - u[5] = _mm_mullo_epi32(in[6], cospi28); - x = _mm_mullo_epi32(in[8], cospi36); - u[5] = _mm_sub_epi32(u[5], x); - u[5] = _mm_add_epi32(u[5], rnding); - u[5] = _mm_srai_epi32(u[5], bit); - - // (4) - u[6] = _mm_mullo_epi32(in[2], cospi52); - x = _mm_mullo_epi32(in[12], cospi12); - u[6] = _mm_add_epi32(u[6], x); - u[6] = _mm_add_epi32(u[6], rnding); - u[6] = _mm_srai_epi32(u[6], bit); - - u[7] = _mm_mullo_epi32(in[2], cospi12); - x = _mm_mullo_epi32(in[12], cospi52); - u[7] = _mm_sub_epi32(u[7], x); - u[7] = _mm_add_epi32(u[7], rnding); - u[7] = _mm_srai_epi32(u[7], bit); - - // stage 3 - addsub_sse4_1(u[0], u[4], &v[0], &v[4], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[1], u[5], &v[1], &v[5], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[2], u[6], &v[2], &v[6], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[3], u[7], &v[3], &v[7], &clamp_lo, &clamp_hi); - - // stage 4 - u[0] = v[0]; - u[1] = v[1]; - u[2] = v[2]; - u[3] = v[3]; - - u[4] = _mm_mullo_epi32(v[4], cospi16); - x = _mm_mullo_epi32(v[5], cospi48); - u[4] = _mm_add_epi32(u[4], x); - u[4] = _mm_add_epi32(u[4], rnding); - u[4] = _mm_srai_epi32(u[4], bit); - - u[5] = _mm_mullo_epi32(v[4], cospi48); - x = _mm_mullo_epi32(v[5], cospi16); - u[5] = _mm_sub_epi32(u[5], x); - u[5] = _mm_add_epi32(u[5], rnding); - u[5] = _mm_srai_epi32(u[5], bit); - - u[6] = _mm_mullo_epi32(v[6], cospim48); - x = _mm_mullo_epi32(v[7], cospi16); - u[6] = _mm_add_epi32(u[6], x); - u[6] = _mm_add_epi32(u[6], rnding); - u[6] = _mm_srai_epi32(u[6], bit); - - u[7] = _mm_mullo_epi32(v[6], cospi16); - x = _mm_mullo_epi32(v[7], cospim48); - u[7] = _mm_sub_epi32(u[7], x); - u[7] = _mm_add_epi32(u[7], rnding); - u[7] = _mm_srai_epi32(u[7], bit); - - // stage 5 - addsub_sse4_1(u[0], u[2], &v[0], &v[2], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[1], u[3], &v[1], &v[3], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[4], u[6], &v[4], &v[6], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[5], u[7], &v[5], &v[7], &clamp_lo, &clamp_hi); - - // stage 6 - u[0] = v[0]; - u[1] = v[1]; - u[4] = v[4]; - u[5] = v[5]; - - v[0] = _mm_mullo_epi32(v[2], cospi32); - x = _mm_mullo_epi32(v[3], cospi32); - u[2] = _mm_add_epi32(v[0], x); - u[2] = _mm_add_epi32(u[2], rnding); - u[2] = _mm_srai_epi32(u[2], bit); - - u[3] = _mm_sub_epi32(v[0], x); - u[3] = _mm_add_epi32(u[3], rnding); - u[3] = _mm_srai_epi32(u[3], bit); - - v[0] = _mm_mullo_epi32(v[6], cospi32); - x = _mm_mullo_epi32(v[7], cospi32); - u[6] = _mm_add_epi32(v[0], x); - u[6] = _mm_add_epi32(u[6], rnding); - u[6] = _mm_srai_epi32(u[6], bit); - - u[7] = _mm_sub_epi32(v[0], x); - u[7] = _mm_add_epi32(u[7], rnding); - u[7] = _mm_srai_epi32(u[7], bit); - - // stage 7 - if (do_cols) { - out[0] = u[0]; - out[2] = _mm_sub_epi32(kZero, u[4]); - out[4] = u[6]; - out[6] = _mm_sub_epi32(kZero, u[2]); - out[8] = u[3]; - out[10] = _mm_sub_epi32(kZero, u[7]); - out[12] = u[5]; - out[14] = _mm_sub_epi32(kZero, u[1]); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1))); - const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1); - - neg_shift_sse4_1(u[0], u[4], out + 0, out + 2, &clamp_lo_out, &clamp_hi_out, - out_shift); - neg_shift_sse4_1(u[6], u[2], out + 4, out + 6, &clamp_lo_out, &clamp_hi_out, - out_shift); - neg_shift_sse4_1(u[3], u[7], out + 8, out + 10, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(u[5], u[1], out + 12, out + 14, &clamp_lo_out, - &clamp_hi_out, out_shift); - } - - // Odd 8 points: 1, 3, ..., 15 - // stage 0 - // stage 1 - // stage 2 - // (1) - u[0] = _mm_mullo_epi32(in[15], cospi4); - x = _mm_mullo_epi32(in[1], cospi60); - u[0] = _mm_add_epi32(u[0], x); - u[0] = _mm_add_epi32(u[0], rnding); - u[0] = _mm_srai_epi32(u[0], bit); - - u[1] = _mm_mullo_epi32(in[15], cospi60); - x = _mm_mullo_epi32(in[1], cospi4); - u[1] = _mm_sub_epi32(u[1], x); - u[1] = _mm_add_epi32(u[1], rnding); - u[1] = _mm_srai_epi32(u[1], bit); - - // (2) - u[2] = _mm_mullo_epi32(in[11], cospi20); - x = _mm_mullo_epi32(in[5], cospi44); - u[2] = _mm_add_epi32(u[2], x); - u[2] = _mm_add_epi32(u[2], rnding); - u[2] = _mm_srai_epi32(u[2], bit); - - u[3] = _mm_mullo_epi32(in[11], cospi44); - x = _mm_mullo_epi32(in[5], cospi20); - u[3] = _mm_sub_epi32(u[3], x); - u[3] = _mm_add_epi32(u[3], rnding); - u[3] = _mm_srai_epi32(u[3], bit); - - // (3) - u[4] = _mm_mullo_epi32(in[7], cospi36); - x = _mm_mullo_epi32(in[9], cospi28); - u[4] = _mm_add_epi32(u[4], x); - u[4] = _mm_add_epi32(u[4], rnding); - u[4] = _mm_srai_epi32(u[4], bit); - - u[5] = _mm_mullo_epi32(in[7], cospi28); - x = _mm_mullo_epi32(in[9], cospi36); - u[5] = _mm_sub_epi32(u[5], x); - u[5] = _mm_add_epi32(u[5], rnding); - u[5] = _mm_srai_epi32(u[5], bit); - - // (4) - u[6] = _mm_mullo_epi32(in[3], cospi52); - x = _mm_mullo_epi32(in[13], cospi12); - u[6] = _mm_add_epi32(u[6], x); - u[6] = _mm_add_epi32(u[6], rnding); - u[6] = _mm_srai_epi32(u[6], bit); - - u[7] = _mm_mullo_epi32(in[3], cospi12); - x = _mm_mullo_epi32(in[13], cospi52); - u[7] = _mm_sub_epi32(u[7], x); - u[7] = _mm_add_epi32(u[7], rnding); - u[7] = _mm_srai_epi32(u[7], bit); - - // stage 3 - addsub_sse4_1(u[0], u[4], &v[0], &v[4], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[1], u[5], &v[1], &v[5], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[2], u[6], &v[2], &v[6], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[3], u[7], &v[3], &v[7], &clamp_lo, &clamp_hi); - - // stage 4 - u[0] = v[0]; - u[1] = v[1]; - u[2] = v[2]; - u[3] = v[3]; - - u[4] = _mm_mullo_epi32(v[4], cospi16); - x = _mm_mullo_epi32(v[5], cospi48); - u[4] = _mm_add_epi32(u[4], x); - u[4] = _mm_add_epi32(u[4], rnding); - u[4] = _mm_srai_epi32(u[4], bit); - - u[5] = _mm_mullo_epi32(v[4], cospi48); - x = _mm_mullo_epi32(v[5], cospi16); - u[5] = _mm_sub_epi32(u[5], x); - u[5] = _mm_add_epi32(u[5], rnding); - u[5] = _mm_srai_epi32(u[5], bit); - - u[6] = _mm_mullo_epi32(v[6], cospim48); - x = _mm_mullo_epi32(v[7], cospi16); - u[6] = _mm_add_epi32(u[6], x); - u[6] = _mm_add_epi32(u[6], rnding); - u[6] = _mm_srai_epi32(u[6], bit); - - u[7] = _mm_mullo_epi32(v[6], cospi16); - x = _mm_mullo_epi32(v[7], cospim48); - u[7] = _mm_sub_epi32(u[7], x); - u[7] = _mm_add_epi32(u[7], rnding); - u[7] = _mm_srai_epi32(u[7], bit); - - // stage 5 - addsub_sse4_1(u[0], u[2], &v[0], &v[2], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[1], u[3], &v[1], &v[3], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[4], u[6], &v[4], &v[6], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[5], u[7], &v[5], &v[7], &clamp_lo, &clamp_hi); - - // stage 6 - u[0] = v[0]; - u[1] = v[1]; - u[4] = v[4]; - u[5] = v[5]; - - v[0] = _mm_mullo_epi32(v[2], cospi32); - x = _mm_mullo_epi32(v[3], cospi32); - u[2] = _mm_add_epi32(v[0], x); - u[2] = _mm_add_epi32(u[2], rnding); - u[2] = _mm_srai_epi32(u[2], bit); - - u[3] = _mm_sub_epi32(v[0], x); - u[3] = _mm_add_epi32(u[3], rnding); - u[3] = _mm_srai_epi32(u[3], bit); - - v[0] = _mm_mullo_epi32(v[6], cospi32); - x = _mm_mullo_epi32(v[7], cospi32); - u[6] = _mm_add_epi32(v[0], x); - u[6] = _mm_add_epi32(u[6], rnding); - u[6] = _mm_srai_epi32(u[6], bit); - - u[7] = _mm_sub_epi32(v[0], x); - u[7] = _mm_add_epi32(u[7], rnding); - u[7] = _mm_srai_epi32(u[7], bit); - - // stage 7 - if (do_cols) { - out[1] = u[0]; - out[3] = _mm_sub_epi32(kZero, u[4]); - out[5] = u[6]; - out[7] = _mm_sub_epi32(kZero, u[2]); - out[9] = u[3]; - out[11] = _mm_sub_epi32(kZero, u[7]); - out[13] = u[5]; - out[15] = _mm_sub_epi32(kZero, u[1]); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1))); - const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1); - - neg_shift_sse4_1(u[0], u[4], out + 1, out + 3, &clamp_lo_out, &clamp_hi_out, - out_shift); - neg_shift_sse4_1(u[6], u[2], out + 5, out + 7, &clamp_lo_out, &clamp_hi_out, - out_shift); - neg_shift_sse4_1(u[3], u[7], out + 9, out + 11, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(u[5], u[1], out + 13, out + 15, &clamp_lo_out, - &clamp_hi_out, out_shift); - } -} - -static void round_shift_8x8(__m128i *in, int shift) { - round_shift_4x4(&in[0], shift); - round_shift_4x4(&in[4], shift); - round_shift_4x4(&in[8], shift); - round_shift_4x4(&in[12], shift); -} - -static __m128i get_recon_8x8(const __m128i pred, __m128i res_lo, __m128i res_hi, - int fliplr, int bd) { - __m128i x0, x1; - const __m128i zero = _mm_setzero_si128(); - - x0 = _mm_unpacklo_epi16(pred, zero); - x1 = _mm_unpackhi_epi16(pred, zero); - - if (fliplr) { - res_lo = _mm_shuffle_epi32(res_lo, 0x1B); - res_hi = _mm_shuffle_epi32(res_hi, 0x1B); - x0 = _mm_add_epi32(res_hi, x0); - x1 = _mm_add_epi32(res_lo, x1); - - } else { - x0 = _mm_add_epi32(res_lo, x0); - x1 = _mm_add_epi32(res_hi, x1); - } - - x0 = _mm_packus_epi32(x0, x1); - return highbd_clamp_epi16(x0, bd); -} - -static void write_buffer_8x8(__m128i *in, uint16_t *output, int stride, - int fliplr, int flipud, int shift, int bd) { - __m128i u0, u1, u2, u3, u4, u5, u6, u7; - __m128i v0, v1, v2, v3, v4, v5, v6, v7; - - round_shift_8x8(in, shift); - - v0 = _mm_load_si128((__m128i const *)(output + 0 * stride)); - v1 = _mm_load_si128((__m128i const *)(output + 1 * stride)); - v2 = _mm_load_si128((__m128i const *)(output + 2 * stride)); - v3 = _mm_load_si128((__m128i const *)(output + 3 * stride)); - v4 = _mm_load_si128((__m128i const *)(output + 4 * stride)); - v5 = _mm_load_si128((__m128i const *)(output + 5 * stride)); - v6 = _mm_load_si128((__m128i const *)(output + 6 * stride)); - v7 = _mm_load_si128((__m128i const *)(output + 7 * stride)); - - if (flipud) { - u0 = get_recon_8x8(v0, in[14], in[15], fliplr, bd); - u1 = get_recon_8x8(v1, in[12], in[13], fliplr, bd); - u2 = get_recon_8x8(v2, in[10], in[11], fliplr, bd); - u3 = get_recon_8x8(v3, in[8], in[9], fliplr, bd); - u4 = get_recon_8x8(v4, in[6], in[7], fliplr, bd); - u5 = get_recon_8x8(v5, in[4], in[5], fliplr, bd); - u6 = get_recon_8x8(v6, in[2], in[3], fliplr, bd); - u7 = get_recon_8x8(v7, in[0], in[1], fliplr, bd); - } else { - u0 = get_recon_8x8(v0, in[0], in[1], fliplr, bd); - u1 = get_recon_8x8(v1, in[2], in[3], fliplr, bd); - u2 = get_recon_8x8(v2, in[4], in[5], fliplr, bd); - u3 = get_recon_8x8(v3, in[6], in[7], fliplr, bd); - u4 = get_recon_8x8(v4, in[8], in[9], fliplr, bd); - u5 = get_recon_8x8(v5, in[10], in[11], fliplr, bd); - u6 = get_recon_8x8(v6, in[12], in[13], fliplr, bd); - u7 = get_recon_8x8(v7, in[14], in[15], fliplr, bd); - } - - _mm_store_si128((__m128i *)(output + 0 * stride), u0); - _mm_store_si128((__m128i *)(output + 1 * stride), u1); - _mm_store_si128((__m128i *)(output + 2 * stride), u2); - _mm_store_si128((__m128i *)(output + 3 * stride), u3); - _mm_store_si128((__m128i *)(output + 4 * stride), u4); - _mm_store_si128((__m128i *)(output + 5 * stride), u5); - _mm_store_si128((__m128i *)(output + 6 * stride), u6); - _mm_store_si128((__m128i *)(output + 7 * stride), u7); -} - -void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output, - int stride, TX_TYPE tx_type, int bd) { - __m128i in[16], out[16]; - const int8_t *shift = inv_txfm_shift_ls[TX_8X8]; - const int txw_idx = get_txw_idx(TX_8X8); - const int txh_idx = get_txh_idx(TX_8X8); - - switch (tx_type) { - case DCT_DCT: - load_buffer_8x8(coeff, in); - transpose_8x8(in, out); - idct8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd, - -shift[0]); - transpose_8x8(in, out); - idct8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0); - write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd); - break; - case DCT_ADST: - load_buffer_8x8(coeff, in); - transpose_8x8(in, out); - iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd, - -shift[0]); - transpose_8x8(in, out); - idct8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0); - write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd); - break; - case ADST_DCT: - load_buffer_8x8(coeff, in); - transpose_8x8(in, out); - idct8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd, - -shift[0]); - transpose_8x8(in, out); - iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0); - write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd); - break; - case ADST_ADST: - load_buffer_8x8(coeff, in); - transpose_8x8(in, out); - iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd, - -shift[0]); - transpose_8x8(in, out); - iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0); - write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd); - break; - case FLIPADST_DCT: - load_buffer_8x8(coeff, in); - transpose_8x8(in, out); - idct8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd, - -shift[0]); - transpose_8x8(in, out); - iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0); - write_buffer_8x8(in, output, stride, 0, 1, -shift[1], bd); - break; - case DCT_FLIPADST: - load_buffer_8x8(coeff, in); - transpose_8x8(in, out); - iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd, - -shift[0]); - transpose_8x8(in, out); - idct8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0); - write_buffer_8x8(in, output, stride, 1, 0, -shift[1], bd); - break; - case ADST_FLIPADST: - load_buffer_8x8(coeff, in); - transpose_8x8(in, out); - iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd, - -shift[0]); - transpose_8x8(in, out); - iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0); - write_buffer_8x8(in, output, stride, 1, 0, -shift[1], bd); - break; - case FLIPADST_FLIPADST: - load_buffer_8x8(coeff, in); - transpose_8x8(in, out); - iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd, - -shift[0]); - transpose_8x8(in, out); - iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0); - write_buffer_8x8(in, output, stride, 1, 1, -shift[1], bd); - break; - case FLIPADST_ADST: - load_buffer_8x8(coeff, in); - transpose_8x8(in, out); - iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd, - -shift[0]); - transpose_8x8(in, out); - iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0); - write_buffer_8x8(in, output, stride, 0, 1, -shift[1], bd); - break; - default: assert(0); - } -} - -static void idct8x8_low1_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols, - int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - __m128i x; - - // stage 0 - // stage 1 - // stage 2 - // stage 3 - x = _mm_mullo_epi32(in[0], cospi32); - x = _mm_add_epi32(x, rnding); - x = _mm_srai_epi32(x, bit); - - // stage 4 - // stage 5 - if (!do_cols) { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX( - -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift)))); - const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN( - (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift)))); - - __m128i offset = _mm_set1_epi32((1 << out_shift) >> 1); - x = _mm_add_epi32(x, offset); - x = _mm_sra_epi32(x, _mm_cvtsi32_si128(out_shift)); - x = _mm_max_epi32(x, clamp_lo_out); - x = _mm_min_epi32(x, clamp_hi_out); - } - - out[0] = x; - out[1] = x; - out[2] = x; - out[3] = x; - out[4] = x; - out[5] = x; - out[6] = x; - out[7] = x; -} - -static void idct8x8_new_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols, - int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi56 = _mm_set1_epi32(cospi[56]); - const __m128i cospim8 = _mm_set1_epi32(-cospi[8]); - const __m128i cospi24 = _mm_set1_epi32(cospi[24]); - const __m128i cospim40 = _mm_set1_epi32(-cospi[40]); - const __m128i cospi40 = _mm_set1_epi32(cospi[40]); - const __m128i cospi8 = _mm_set1_epi32(cospi[8]); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospim16 = _mm_set1_epi32(-cospi[16]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - __m128i u0, u1, u2, u3, u4, u5, u6, u7; - __m128i v0, v1, v2, v3, v4, v5, v6, v7; - __m128i x, y; - - // stage 0 - // stage 1 - // stage 2 - u0 = in[0]; - u1 = in[4]; - u2 = in[2]; - u3 = in[6]; - - x = _mm_mullo_epi32(in[1], cospi56); - y = _mm_mullo_epi32(in[7], cospim8); - u4 = _mm_add_epi32(x, y); - u4 = _mm_add_epi32(u4, rnding); - u4 = _mm_srai_epi32(u4, bit); - - x = _mm_mullo_epi32(in[1], cospi8); - y = _mm_mullo_epi32(in[7], cospi56); - u7 = _mm_add_epi32(x, y); - u7 = _mm_add_epi32(u7, rnding); - u7 = _mm_srai_epi32(u7, bit); - - x = _mm_mullo_epi32(in[5], cospi24); - y = _mm_mullo_epi32(in[3], cospim40); - u5 = _mm_add_epi32(x, y); - u5 = _mm_add_epi32(u5, rnding); - u5 = _mm_srai_epi32(u5, bit); - - x = _mm_mullo_epi32(in[5], cospi40); - y = _mm_mullo_epi32(in[3], cospi24); - u6 = _mm_add_epi32(x, y); - u6 = _mm_add_epi32(u6, rnding); - u6 = _mm_srai_epi32(u6, bit); - - // stage 3 - x = _mm_mullo_epi32(u0, cospi32); - y = _mm_mullo_epi32(u1, cospi32); - v0 = _mm_add_epi32(x, y); - v0 = _mm_add_epi32(v0, rnding); - v0 = _mm_srai_epi32(v0, bit); - - v1 = _mm_sub_epi32(x, y); - v1 = _mm_add_epi32(v1, rnding); - v1 = _mm_srai_epi32(v1, bit); - - x = _mm_mullo_epi32(u2, cospi48); - y = _mm_mullo_epi32(u3, cospim16); - v2 = _mm_add_epi32(x, y); - v2 = _mm_add_epi32(v2, rnding); - v2 = _mm_srai_epi32(v2, bit); - - x = _mm_mullo_epi32(u2, cospi16); - y = _mm_mullo_epi32(u3, cospi48); - v3 = _mm_add_epi32(x, y); - v3 = _mm_add_epi32(v3, rnding); - v3 = _mm_srai_epi32(v3, bit); - - addsub_sse4_1(u4, u5, &v4, &v5, &clamp_lo, &clamp_hi); - addsub_sse4_1(u7, u6, &v7, &v6, &clamp_lo, &clamp_hi); - - // stage 4 - addsub_sse4_1(v0, v3, &u0, &u3, &clamp_lo, &clamp_hi); - addsub_sse4_1(v1, v2, &u1, &u2, &clamp_lo, &clamp_hi); - u4 = v4; - u7 = v7; - - x = _mm_mullo_epi32(v5, cospi32); - y = _mm_mullo_epi32(v6, cospi32); - u6 = _mm_add_epi32(y, x); - u6 = _mm_add_epi32(u6, rnding); - u6 = _mm_srai_epi32(u6, bit); - - u5 = _mm_sub_epi32(y, x); - u5 = _mm_add_epi32(u5, rnding); - u5 = _mm_srai_epi32(u5, bit); - - // stage 5 - if (do_cols) { - addsub_no_clamp_sse4_1(u0, u7, out + 0, out + 7); - addsub_no_clamp_sse4_1(u1, u6, out + 1, out + 6); - addsub_no_clamp_sse4_1(u2, u5, out + 2, out + 5); - addsub_no_clamp_sse4_1(u3, u4, out + 3, out + 4); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX( - -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift)))); - const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN( - (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift)))); - addsub_shift_sse4_1(u0, u7, out + 0, out + 7, &clamp_lo_out, &clamp_hi_out, - out_shift); - addsub_shift_sse4_1(u1, u6, out + 1, out + 6, &clamp_lo_out, &clamp_hi_out, - out_shift); - addsub_shift_sse4_1(u2, u5, out + 2, out + 5, &clamp_lo_out, &clamp_hi_out, - out_shift); - addsub_shift_sse4_1(u3, u4, out + 3, out + 4, &clamp_lo_out, &clamp_hi_out, - out_shift); - } -} - -static void iadst8x8_low1_sse4_1(__m128i *in, __m128i *out, int bit, - int do_cols, int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi4 = _mm_set1_epi32(cospi[4]); - const __m128i cospi60 = _mm_set1_epi32(cospi[60]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const __m128i kZero = _mm_setzero_si128(); - __m128i u[8], x; - - // stage 0 - // stage 1 - // stage 2 - - x = _mm_mullo_epi32(in[0], cospi60); - u[0] = _mm_add_epi32(x, rnding); - u[0] = _mm_srai_epi32(u[0], bit); - - x = _mm_mullo_epi32(in[0], cospi4); - u[1] = _mm_sub_epi32(kZero, x); - u[1] = _mm_add_epi32(u[1], rnding); - u[1] = _mm_srai_epi32(u[1], bit); - - // stage 3 - // stage 4 - __m128i temp1, temp2; - temp1 = _mm_mullo_epi32(u[0], cospi16); - x = _mm_mullo_epi32(u[1], cospi48); - temp1 = _mm_add_epi32(temp1, x); - temp1 = _mm_add_epi32(temp1, rnding); - temp1 = _mm_srai_epi32(temp1, bit); - u[4] = temp1; - - temp2 = _mm_mullo_epi32(u[0], cospi48); - x = _mm_mullo_epi32(u[1], cospi16); - u[5] = _mm_sub_epi32(temp2, x); - u[5] = _mm_add_epi32(u[5], rnding); - u[5] = _mm_srai_epi32(u[5], bit); - - // stage 5 - // stage 6 - temp1 = _mm_mullo_epi32(u[0], cospi32); - x = _mm_mullo_epi32(u[1], cospi32); - u[2] = _mm_add_epi32(temp1, x); - u[2] = _mm_add_epi32(u[2], rnding); - u[2] = _mm_srai_epi32(u[2], bit); - - u[3] = _mm_sub_epi32(temp1, x); - u[3] = _mm_add_epi32(u[3], rnding); - u[3] = _mm_srai_epi32(u[3], bit); - - temp1 = _mm_mullo_epi32(u[4], cospi32); - x = _mm_mullo_epi32(u[5], cospi32); - u[6] = _mm_add_epi32(temp1, x); - u[6] = _mm_add_epi32(u[6], rnding); - u[6] = _mm_srai_epi32(u[6], bit); - - u[7] = _mm_sub_epi32(temp1, x); - u[7] = _mm_add_epi32(u[7], rnding); - u[7] = _mm_srai_epi32(u[7], bit); - - // stage 7 - if (do_cols) { - out[0] = u[0]; - out[1] = _mm_sub_epi32(kZero, u[4]); - out[2] = u[6]; - out[3] = _mm_sub_epi32(kZero, u[2]); - out[4] = u[3]; - out[5] = _mm_sub_epi32(kZero, u[7]); - out[6] = u[5]; - out[7] = _mm_sub_epi32(kZero, u[1]); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1))); - const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1); - - neg_shift_sse4_1(u[0], u[4], out + 0, out + 1, &clamp_lo_out, &clamp_hi_out, - out_shift); - neg_shift_sse4_1(u[6], u[2], out + 2, out + 3, &clamp_lo_out, &clamp_hi_out, - out_shift); - neg_shift_sse4_1(u[3], u[7], out + 4, out + 5, &clamp_lo_out, &clamp_hi_out, - out_shift); - neg_shift_sse4_1(u[5], u[1], out + 6, out + 7, &clamp_lo_out, &clamp_hi_out, - out_shift); - } -} - -static void iadst8x8_new_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols, - int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi4 = _mm_set1_epi32(cospi[4]); - const __m128i cospi60 = _mm_set1_epi32(cospi[60]); - const __m128i cospi20 = _mm_set1_epi32(cospi[20]); - const __m128i cospi44 = _mm_set1_epi32(cospi[44]); - const __m128i cospi36 = _mm_set1_epi32(cospi[36]); - const __m128i cospi28 = _mm_set1_epi32(cospi[28]); - const __m128i cospi52 = _mm_set1_epi32(cospi[52]); - const __m128i cospi12 = _mm_set1_epi32(cospi[12]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospim48 = _mm_set1_epi32(-cospi[48]); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const __m128i kZero = _mm_setzero_si128(); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - __m128i u[8], v[8], x; - - // stage 0 - // stage 1 - // stage 2 - - u[0] = _mm_mullo_epi32(in[7], cospi4); - x = _mm_mullo_epi32(in[0], cospi60); - u[0] = _mm_add_epi32(u[0], x); - u[0] = _mm_add_epi32(u[0], rnding); - u[0] = _mm_srai_epi32(u[0], bit); - - u[1] = _mm_mullo_epi32(in[7], cospi60); - x = _mm_mullo_epi32(in[0], cospi4); - u[1] = _mm_sub_epi32(u[1], x); - u[1] = _mm_add_epi32(u[1], rnding); - u[1] = _mm_srai_epi32(u[1], bit); - - // (2) - u[2] = _mm_mullo_epi32(in[5], cospi20); - x = _mm_mullo_epi32(in[2], cospi44); - u[2] = _mm_add_epi32(u[2], x); - u[2] = _mm_add_epi32(u[2], rnding); - u[2] = _mm_srai_epi32(u[2], bit); - - u[3] = _mm_mullo_epi32(in[5], cospi44); - x = _mm_mullo_epi32(in[2], cospi20); - u[3] = _mm_sub_epi32(u[3], x); - u[3] = _mm_add_epi32(u[3], rnding); - u[3] = _mm_srai_epi32(u[3], bit); - - // (3) - u[4] = _mm_mullo_epi32(in[3], cospi36); - x = _mm_mullo_epi32(in[4], cospi28); - u[4] = _mm_add_epi32(u[4], x); - u[4] = _mm_add_epi32(u[4], rnding); - u[4] = _mm_srai_epi32(u[4], bit); - - u[5] = _mm_mullo_epi32(in[3], cospi28); - x = _mm_mullo_epi32(in[4], cospi36); - u[5] = _mm_sub_epi32(u[5], x); - u[5] = _mm_add_epi32(u[5], rnding); - u[5] = _mm_srai_epi32(u[5], bit); - - // (4) - u[6] = _mm_mullo_epi32(in[1], cospi52); - x = _mm_mullo_epi32(in[6], cospi12); - u[6] = _mm_add_epi32(u[6], x); - u[6] = _mm_add_epi32(u[6], rnding); - u[6] = _mm_srai_epi32(u[6], bit); - - u[7] = _mm_mullo_epi32(in[1], cospi12); - x = _mm_mullo_epi32(in[6], cospi52); - u[7] = _mm_sub_epi32(u[7], x); - u[7] = _mm_add_epi32(u[7], rnding); - u[7] = _mm_srai_epi32(u[7], bit); - - // stage 3 - addsub_sse4_1(u[0], u[4], &v[0], &v[4], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[1], u[5], &v[1], &v[5], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[2], u[6], &v[2], &v[6], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[3], u[7], &v[3], &v[7], &clamp_lo, &clamp_hi); - - // stage 4 - u[0] = v[0]; - u[1] = v[1]; - u[2] = v[2]; - u[3] = v[3]; - - u[4] = _mm_mullo_epi32(v[4], cospi16); - x = _mm_mullo_epi32(v[5], cospi48); - u[4] = _mm_add_epi32(u[4], x); - u[4] = _mm_add_epi32(u[4], rnding); - u[4] = _mm_srai_epi32(u[4], bit); - - u[5] = _mm_mullo_epi32(v[4], cospi48); - x = _mm_mullo_epi32(v[5], cospi16); - u[5] = _mm_sub_epi32(u[5], x); - u[5] = _mm_add_epi32(u[5], rnding); - u[5] = _mm_srai_epi32(u[5], bit); - - u[6] = _mm_mullo_epi32(v[6], cospim48); - x = _mm_mullo_epi32(v[7], cospi16); - u[6] = _mm_add_epi32(u[6], x); - u[6] = _mm_add_epi32(u[6], rnding); - u[6] = _mm_srai_epi32(u[6], bit); - - u[7] = _mm_mullo_epi32(v[6], cospi16); - x = _mm_mullo_epi32(v[7], cospim48); - u[7] = _mm_sub_epi32(u[7], x); - u[7] = _mm_add_epi32(u[7], rnding); - u[7] = _mm_srai_epi32(u[7], bit); - - // stage 5 - addsub_sse4_1(u[0], u[2], &v[0], &v[2], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[1], u[3], &v[1], &v[3], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[4], u[6], &v[4], &v[6], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[5], u[7], &v[5], &v[7], &clamp_lo, &clamp_hi); - - // stage 6 - u[0] = v[0]; - u[1] = v[1]; - u[4] = v[4]; - u[5] = v[5]; - - v[0] = _mm_mullo_epi32(v[2], cospi32); - x = _mm_mullo_epi32(v[3], cospi32); - u[2] = _mm_add_epi32(v[0], x); - u[2] = _mm_add_epi32(u[2], rnding); - u[2] = _mm_srai_epi32(u[2], bit); - - u[3] = _mm_sub_epi32(v[0], x); - u[3] = _mm_add_epi32(u[3], rnding); - u[3] = _mm_srai_epi32(u[3], bit); - - v[0] = _mm_mullo_epi32(v[6], cospi32); - x = _mm_mullo_epi32(v[7], cospi32); - u[6] = _mm_add_epi32(v[0], x); - u[6] = _mm_add_epi32(u[6], rnding); - u[6] = _mm_srai_epi32(u[6], bit); - - u[7] = _mm_sub_epi32(v[0], x); - u[7] = _mm_add_epi32(u[7], rnding); - u[7] = _mm_srai_epi32(u[7], bit); - - // stage 7 - if (do_cols) { - out[0] = u[0]; - out[1] = _mm_sub_epi32(kZero, u[4]); - out[2] = u[6]; - out[3] = _mm_sub_epi32(kZero, u[2]); - out[4] = u[3]; - out[5] = _mm_sub_epi32(kZero, u[7]); - out[6] = u[5]; - out[7] = _mm_sub_epi32(kZero, u[1]); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1))); - const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1); - - neg_shift_sse4_1(u[0], u[4], out + 0, out + 1, &clamp_lo_out, &clamp_hi_out, - out_shift); - neg_shift_sse4_1(u[6], u[2], out + 2, out + 3, &clamp_lo_out, &clamp_hi_out, - out_shift); - neg_shift_sse4_1(u[3], u[7], out + 4, out + 5, &clamp_lo_out, &clamp_hi_out, - out_shift); - neg_shift_sse4_1(u[5], u[1], out + 6, out + 7, &clamp_lo_out, &clamp_hi_out, - out_shift); - } -} - -static void idct16x16_low1_sse4_1(__m128i *in, __m128i *out, int bit, - int do_cols, int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - - { - // stage 0 - // stage 1 - // stage 2 - // stage 3 - // stage 4 - in[0] = _mm_mullo_epi32(in[0], cospi32); - in[0] = _mm_add_epi32(in[0], rnding); - in[0] = _mm_srai_epi32(in[0], bit); - - // stage 5 - // stage 6 - // stage 7 - if (do_cols) { - in[0] = _mm_max_epi32(in[0], clamp_lo); - in[0] = _mm_min_epi32(in[0], clamp_hi); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX( - -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift)))); - const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN( - (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift)))); - __m128i offset = _mm_set1_epi32((1 << out_shift) >> 1); - in[0] = _mm_add_epi32(in[0], offset); - in[0] = _mm_sra_epi32(in[0], _mm_cvtsi32_si128(out_shift)); - in[0] = _mm_max_epi32(in[0], clamp_lo_out); - in[0] = _mm_min_epi32(in[0], clamp_hi_out); - } - - out[0] = in[0]; - out[1] = in[0]; - out[2] = in[0]; - out[3] = in[0]; - out[4] = in[0]; - out[5] = in[0]; - out[6] = in[0]; - out[7] = in[0]; - out[8] = in[0]; - out[9] = in[0]; - out[10] = in[0]; - out[11] = in[0]; - out[12] = in[0]; - out[13] = in[0]; - out[14] = in[0]; - out[15] = in[0]; - } -} - -static void idct16x16_low8_sse4_1(__m128i *in, __m128i *out, int bit, - int do_cols, int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi60 = _mm_set1_epi32(cospi[60]); - const __m128i cospi28 = _mm_set1_epi32(cospi[28]); - const __m128i cospi44 = _mm_set1_epi32(cospi[44]); - const __m128i cospi20 = _mm_set1_epi32(cospi[20]); - const __m128i cospi12 = _mm_set1_epi32(cospi[12]); - const __m128i cospi4 = _mm_set1_epi32(cospi[4]); - const __m128i cospi56 = _mm_set1_epi32(cospi[56]); - const __m128i cospi24 = _mm_set1_epi32(cospi[24]); - const __m128i cospim40 = _mm_set1_epi32(-cospi[40]); - const __m128i cospi8 = _mm_set1_epi32(cospi[8]); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospim16 = _mm_set1_epi32(-cospi[16]); - const __m128i cospim48 = _mm_set1_epi32(-cospi[48]); - const __m128i cospim36 = _mm_set1_epi32(-cospi[36]); - const __m128i cospim52 = _mm_set1_epi32(-cospi[52]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - __m128i u[16], x, y; - - { - // stage 0 - // stage 1 - u[0] = in[0]; - u[2] = in[4]; - u[4] = in[2]; - u[6] = in[6]; - u[8] = in[1]; - u[10] = in[5]; - u[12] = in[3]; - u[14] = in[7]; - - // stage 2 - u[15] = half_btf_0_sse4_1(&cospi4, &u[8], &rnding, bit); - u[8] = half_btf_0_sse4_1(&cospi60, &u[8], &rnding, bit); - - u[9] = half_btf_0_sse4_1(&cospim36, &u[14], &rnding, bit); - u[14] = half_btf_0_sse4_1(&cospi28, &u[14], &rnding, bit); - - u[13] = half_btf_0_sse4_1(&cospi20, &u[10], &rnding, bit); - u[10] = half_btf_0_sse4_1(&cospi44, &u[10], &rnding, bit); - - u[11] = half_btf_0_sse4_1(&cospim52, &u[12], &rnding, bit); - u[12] = half_btf_0_sse4_1(&cospi12, &u[12], &rnding, bit); - - // stage 3 - u[7] = half_btf_0_sse4_1(&cospi8, &u[4], &rnding, bit); - u[4] = half_btf_0_sse4_1(&cospi56, &u[4], &rnding, bit); - u[5] = half_btf_0_sse4_1(&cospim40, &u[6], &rnding, bit); - u[6] = half_btf_0_sse4_1(&cospi24, &u[6], &rnding, bit); - - addsub_sse4_1(u[8], u[9], &u[8], &u[9], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[11], u[10], &u[11], &u[10], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[12], u[13], &u[12], &u[13], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[15], u[14], &u[15], &u[14], &clamp_lo, &clamp_hi); - - // stage 4 - x = _mm_mullo_epi32(u[0], cospi32); - u[0] = _mm_add_epi32(x, rnding); - u[0] = _mm_srai_epi32(u[0], bit); - u[1] = u[0]; - - u[3] = half_btf_0_sse4_1(&cospi16, &u[2], &rnding, bit); - u[2] = half_btf_0_sse4_1(&cospi48, &u[2], &rnding, bit); - - addsub_sse4_1(u[4], u[5], &u[4], &u[5], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[7], u[6], &u[7], &u[6], &clamp_lo, &clamp_hi); - - x = half_btf_sse4_1(&cospim16, &u[9], &cospi48, &u[14], &rnding, bit); - u[14] = half_btf_sse4_1(&cospi48, &u[9], &cospi16, &u[14], &rnding, bit); - u[9] = x; - y = half_btf_sse4_1(&cospim48, &u[10], &cospim16, &u[13], &rnding, bit); - u[13] = half_btf_sse4_1(&cospim16, &u[10], &cospi48, &u[13], &rnding, bit); - u[10] = y; - - // stage 5 - addsub_sse4_1(u[0], u[3], &u[0], &u[3], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[1], u[2], &u[1], &u[2], &clamp_lo, &clamp_hi); - - x = _mm_mullo_epi32(u[5], cospi32); - y = _mm_mullo_epi32(u[6], cospi32); - u[5] = _mm_sub_epi32(y, x); - u[5] = _mm_add_epi32(u[5], rnding); - u[5] = _mm_srai_epi32(u[5], bit); - - u[6] = _mm_add_epi32(y, x); - u[6] = _mm_add_epi32(u[6], rnding); - u[6] = _mm_srai_epi32(u[6], bit); - - addsub_sse4_1(u[8], u[11], &u[8], &u[11], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[9], u[10], &u[9], &u[10], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[15], u[12], &u[15], &u[12], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[14], u[13], &u[14], &u[13], &clamp_lo, &clamp_hi); - - // stage 6 - addsub_sse4_1(u[0], u[7], &u[0], &u[7], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[1], u[6], &u[1], &u[6], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[2], u[5], &u[2], &u[5], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[3], u[4], &u[3], &u[4], &clamp_lo, &clamp_hi); - - x = _mm_mullo_epi32(u[10], cospi32); - y = _mm_mullo_epi32(u[13], cospi32); - u[10] = _mm_sub_epi32(y, x); - u[10] = _mm_add_epi32(u[10], rnding); - u[10] = _mm_srai_epi32(u[10], bit); - - u[13] = _mm_add_epi32(x, y); - u[13] = _mm_add_epi32(u[13], rnding); - u[13] = _mm_srai_epi32(u[13], bit); - - x = _mm_mullo_epi32(u[11], cospi32); - y = _mm_mullo_epi32(u[12], cospi32); - u[11] = _mm_sub_epi32(y, x); - u[11] = _mm_add_epi32(u[11], rnding); - u[11] = _mm_srai_epi32(u[11], bit); - - u[12] = _mm_add_epi32(x, y); - u[12] = _mm_add_epi32(u[12], rnding); - u[12] = _mm_srai_epi32(u[12], bit); - // stage 7 - if (do_cols) { - addsub_no_clamp_sse4_1(u[0], u[15], out + 0, out + 15); - addsub_no_clamp_sse4_1(u[1], u[14], out + 1, out + 14); - addsub_no_clamp_sse4_1(u[2], u[13], out + 2, out + 13); - addsub_no_clamp_sse4_1(u[3], u[12], out + 3, out + 12); - addsub_no_clamp_sse4_1(u[4], u[11], out + 4, out + 11); - addsub_no_clamp_sse4_1(u[5], u[10], out + 5, out + 10); - addsub_no_clamp_sse4_1(u[6], u[9], out + 6, out + 9); - addsub_no_clamp_sse4_1(u[7], u[8], out + 7, out + 8); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX( - -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift)))); - const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN( - (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift)))); - - addsub_shift_sse4_1(u[0], u[15], out + 0, out + 15, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(u[1], u[14], out + 1, out + 14, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(u[2], u[13], out + 2, out + 13, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(u[3], u[12], out + 3, out + 12, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(u[4], u[11], out + 4, out + 11, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(u[5], u[10], out + 5, out + 10, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(u[6], u[9], out + 6, out + 9, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(u[7], u[8], out + 7, out + 8, &clamp_lo_out, - &clamp_hi_out, out_shift); - } - } -} - -static void iadst16x16_low1_sse4_1(__m128i *in, __m128i *out, int bit, - int do_cols, int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi2 = _mm_set1_epi32(cospi[2]); - const __m128i cospi62 = _mm_set1_epi32(cospi[62]); - const __m128i cospi8 = _mm_set1_epi32(cospi[8]); - const __m128i cospi56 = _mm_set1_epi32(cospi[56]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const __m128i zero = _mm_setzero_si128(); - __m128i v[16], x, y, temp1, temp2; - - // Calculate the column 0, 1, 2, 3 - { - // stage 0 - // stage 1 - // stage 2 - x = _mm_mullo_epi32(in[0], cospi62); - v[0] = _mm_add_epi32(x, rnding); - v[0] = _mm_srai_epi32(v[0], bit); - - x = _mm_mullo_epi32(in[0], cospi2); - v[1] = _mm_sub_epi32(zero, x); - v[1] = _mm_add_epi32(v[1], rnding); - v[1] = _mm_srai_epi32(v[1], bit); - - // stage 3 - v[8] = v[0]; - v[9] = v[1]; - - // stage 4 - temp1 = _mm_mullo_epi32(v[8], cospi8); - x = _mm_mullo_epi32(v[9], cospi56); - temp1 = _mm_add_epi32(temp1, x); - temp1 = _mm_add_epi32(temp1, rnding); - temp1 = _mm_srai_epi32(temp1, bit); - - temp2 = _mm_mullo_epi32(v[8], cospi56); - x = _mm_mullo_epi32(v[9], cospi8); - temp2 = _mm_sub_epi32(temp2, x); - temp2 = _mm_add_epi32(temp2, rnding); - temp2 = _mm_srai_epi32(temp2, bit); - v[8] = temp1; - v[9] = temp2; - - // stage 5 - v[4] = v[0]; - v[5] = v[1]; - v[12] = v[8]; - v[13] = v[9]; - - // stage 6 - temp1 = _mm_mullo_epi32(v[4], cospi16); - x = _mm_mullo_epi32(v[5], cospi48); - temp1 = _mm_add_epi32(temp1, x); - temp1 = _mm_add_epi32(temp1, rnding); - temp1 = _mm_srai_epi32(temp1, bit); - - temp2 = _mm_mullo_epi32(v[4], cospi48); - x = _mm_mullo_epi32(v[5], cospi16); - temp2 = _mm_sub_epi32(temp2, x); - temp2 = _mm_add_epi32(temp2, rnding); - temp2 = _mm_srai_epi32(temp2, bit); - v[4] = temp1; - v[5] = temp2; - - temp1 = _mm_mullo_epi32(v[12], cospi16); - x = _mm_mullo_epi32(v[13], cospi48); - temp1 = _mm_add_epi32(temp1, x); - temp1 = _mm_add_epi32(temp1, rnding); - temp1 = _mm_srai_epi32(temp1, bit); - - temp2 = _mm_mullo_epi32(v[12], cospi48); - x = _mm_mullo_epi32(v[13], cospi16); - temp2 = _mm_sub_epi32(temp2, x); - temp2 = _mm_add_epi32(temp2, rnding); - temp2 = _mm_srai_epi32(temp2, bit); - v[12] = temp1; - v[13] = temp2; - - // stage 7 - v[2] = v[0]; - v[3] = v[1]; - v[6] = v[4]; - v[7] = v[5]; - v[10] = v[8]; - v[11] = v[9]; - v[14] = v[12]; - v[15] = v[13]; - - // stage 8 - y = _mm_mullo_epi32(v[2], cospi32); - x = _mm_mullo_epi32(v[3], cospi32); - v[2] = _mm_add_epi32(y, x); - v[2] = _mm_add_epi32(v[2], rnding); - v[2] = _mm_srai_epi32(v[2], bit); - - v[3] = _mm_sub_epi32(y, x); - v[3] = _mm_add_epi32(v[3], rnding); - v[3] = _mm_srai_epi32(v[3], bit); - - y = _mm_mullo_epi32(v[6], cospi32); - x = _mm_mullo_epi32(v[7], cospi32); - v[6] = _mm_add_epi32(y, x); - v[6] = _mm_add_epi32(v[6], rnding); - v[6] = _mm_srai_epi32(v[6], bit); - - v[7] = _mm_sub_epi32(y, x); - v[7] = _mm_add_epi32(v[7], rnding); - v[7] = _mm_srai_epi32(v[7], bit); - - y = _mm_mullo_epi32(v[10], cospi32); - x = _mm_mullo_epi32(v[11], cospi32); - v[10] = _mm_add_epi32(y, x); - v[10] = _mm_add_epi32(v[10], rnding); - v[10] = _mm_srai_epi32(v[10], bit); - - v[11] = _mm_sub_epi32(y, x); - v[11] = _mm_add_epi32(v[11], rnding); - v[11] = _mm_srai_epi32(v[11], bit); - - y = _mm_mullo_epi32(v[14], cospi32); - x = _mm_mullo_epi32(v[15], cospi32); - v[14] = _mm_add_epi32(y, x); - v[14] = _mm_add_epi32(v[14], rnding); - v[14] = _mm_srai_epi32(v[14], bit); - - v[15] = _mm_sub_epi32(y, x); - v[15] = _mm_add_epi32(v[15], rnding); - v[15] = _mm_srai_epi32(v[15], bit); - - // stage 9 - if (do_cols) { - out[0] = v[0]; - out[1] = _mm_sub_epi32(_mm_setzero_si128(), v[8]); - out[2] = v[12]; - out[3] = _mm_sub_epi32(_mm_setzero_si128(), v[4]); - out[4] = v[6]; - out[5] = _mm_sub_epi32(_mm_setzero_si128(), v[14]); - out[6] = v[10]; - out[7] = _mm_sub_epi32(_mm_setzero_si128(), v[2]); - out[8] = v[3]; - out[9] = _mm_sub_epi32(_mm_setzero_si128(), v[11]); - out[10] = v[15]; - out[11] = _mm_sub_epi32(_mm_setzero_si128(), v[7]); - out[12] = v[5]; - out[13] = _mm_sub_epi32(_mm_setzero_si128(), v[13]); - out[14] = v[9]; - out[15] = _mm_sub_epi32(_mm_setzero_si128(), v[1]); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1))); - const __m128i clamp_hi_out = - _mm_set1_epi32((1 << (log_range_out - 1)) - 1); - - neg_shift_sse4_1(v[0], v[8], out + 0, out + 1, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(v[12], v[4], out + 2, out + 3, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(v[6], v[14], out + 4, out + 5, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(v[10], v[2], out + 6, out + 7, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(v[3], v[11], out + 8, out + 9, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(v[15], v[7], out + 10, out + 11, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(v[5], v[13], out + 12, out + 13, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(v[9], v[1], out + 14, out + 15, &clamp_lo_out, - &clamp_hi_out, out_shift); - } - } -} - -static void iadst16x16_low8_sse4_1(__m128i *in, __m128i *out, int bit, - int do_cols, int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi2 = _mm_set1_epi32(cospi[2]); - const __m128i cospi62 = _mm_set1_epi32(cospi[62]); - const __m128i cospi10 = _mm_set1_epi32(cospi[10]); - const __m128i cospi54 = _mm_set1_epi32(cospi[54]); - const __m128i cospi18 = _mm_set1_epi32(cospi[18]); - const __m128i cospi46 = _mm_set1_epi32(cospi[46]); - const __m128i cospi26 = _mm_set1_epi32(cospi[26]); - const __m128i cospi38 = _mm_set1_epi32(cospi[38]); - const __m128i cospi34 = _mm_set1_epi32(cospi[34]); - const __m128i cospi30 = _mm_set1_epi32(cospi[30]); - const __m128i cospi42 = _mm_set1_epi32(cospi[42]); - const __m128i cospi22 = _mm_set1_epi32(cospi[22]); - const __m128i cospi50 = _mm_set1_epi32(cospi[50]); - const __m128i cospi14 = _mm_set1_epi32(cospi[14]); - const __m128i cospi58 = _mm_set1_epi32(cospi[58]); - const __m128i cospi6 = _mm_set1_epi32(cospi[6]); - const __m128i cospi8 = _mm_set1_epi32(cospi[8]); - const __m128i cospi56 = _mm_set1_epi32(cospi[56]); - const __m128i cospi40 = _mm_set1_epi32(cospi[40]); - const __m128i cospi24 = _mm_set1_epi32(cospi[24]); - const __m128i cospim56 = _mm_set1_epi32(-cospi[56]); - const __m128i cospim24 = _mm_set1_epi32(-cospi[24]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospim48 = _mm_set1_epi32(-cospi[48]); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - __m128i u[16], x, y; - - // Calculate the column 0, 1, 2, 3 - { - // stage 0 - // stage 1 - // stage 2 - __m128i zero = _mm_setzero_si128(); - x = _mm_mullo_epi32(in[0], cospi62); - u[0] = _mm_add_epi32(x, rnding); - u[0] = _mm_srai_epi32(u[0], bit); - - x = _mm_mullo_epi32(in[0], cospi2); - u[1] = _mm_sub_epi32(zero, x); - u[1] = _mm_add_epi32(u[1], rnding); - u[1] = _mm_srai_epi32(u[1], bit); - - x = _mm_mullo_epi32(in[2], cospi54); - u[2] = _mm_add_epi32(x, rnding); - u[2] = _mm_srai_epi32(u[2], bit); - - x = _mm_mullo_epi32(in[2], cospi10); - u[3] = _mm_sub_epi32(zero, x); - u[3] = _mm_add_epi32(u[3], rnding); - u[3] = _mm_srai_epi32(u[3], bit); - - x = _mm_mullo_epi32(in[4], cospi46); - u[4] = _mm_add_epi32(x, rnding); - u[4] = _mm_srai_epi32(u[4], bit); - - x = _mm_mullo_epi32(in[4], cospi18); - u[5] = _mm_sub_epi32(zero, x); - u[5] = _mm_add_epi32(u[5], rnding); - u[5] = _mm_srai_epi32(u[5], bit); - - x = _mm_mullo_epi32(in[6], cospi38); - u[6] = _mm_add_epi32(x, rnding); - u[6] = _mm_srai_epi32(u[6], bit); - - x = _mm_mullo_epi32(in[6], cospi26); - u[7] = _mm_sub_epi32(zero, x); - u[7] = _mm_add_epi32(u[7], rnding); - u[7] = _mm_srai_epi32(u[7], bit); - - u[8] = _mm_mullo_epi32(in[7], cospi34); - u[8] = _mm_add_epi32(u[8], rnding); - u[8] = _mm_srai_epi32(u[8], bit); - - u[9] = _mm_mullo_epi32(in[7], cospi30); - u[9] = _mm_add_epi32(u[9], rnding); - u[9] = _mm_srai_epi32(u[9], bit); - - u[10] = _mm_mullo_epi32(in[5], cospi42); - u[10] = _mm_add_epi32(u[10], rnding); - u[10] = _mm_srai_epi32(u[10], bit); - - u[11] = _mm_mullo_epi32(in[5], cospi22); - u[11] = _mm_add_epi32(u[11], rnding); - u[11] = _mm_srai_epi32(u[11], bit); - - u[12] = _mm_mullo_epi32(in[3], cospi50); - u[12] = _mm_add_epi32(u[12], rnding); - u[12] = _mm_srai_epi32(u[12], bit); - - u[13] = _mm_mullo_epi32(in[3], cospi14); - u[13] = _mm_add_epi32(u[13], rnding); - u[13] = _mm_srai_epi32(u[13], bit); - - u[14] = _mm_mullo_epi32(in[1], cospi58); - u[14] = _mm_add_epi32(u[14], rnding); - u[14] = _mm_srai_epi32(u[14], bit); - - u[15] = _mm_mullo_epi32(in[1], cospi6); - u[15] = _mm_add_epi32(u[15], rnding); - u[15] = _mm_srai_epi32(u[15], bit); - - // stage 3 - addsub_sse4_1(u[0], u[8], &u[0], &u[8], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[1], u[9], &u[1], &u[9], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[2], u[10], &u[2], &u[10], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[3], u[11], &u[3], &u[11], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[4], u[12], &u[4], &u[12], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[5], u[13], &u[5], &u[13], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[6], u[14], &u[6], &u[14], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[7], u[15], &u[7], &u[15], &clamp_lo, &clamp_hi); - - // stage 4 - y = _mm_mullo_epi32(u[8], cospi56); - x = _mm_mullo_epi32(u[9], cospi56); - u[8] = _mm_mullo_epi32(u[8], cospi8); - u[8] = _mm_add_epi32(u[8], x); - u[8] = _mm_add_epi32(u[8], rnding); - u[8] = _mm_srai_epi32(u[8], bit); - - x = _mm_mullo_epi32(u[9], cospi8); - u[9] = _mm_sub_epi32(y, x); - u[9] = _mm_add_epi32(u[9], rnding); - u[9] = _mm_srai_epi32(u[9], bit); - - x = _mm_mullo_epi32(u[11], cospi24); - y = _mm_mullo_epi32(u[10], cospi24); - u[10] = _mm_mullo_epi32(u[10], cospi40); - u[10] = _mm_add_epi32(u[10], x); - u[10] = _mm_add_epi32(u[10], rnding); - u[10] = _mm_srai_epi32(u[10], bit); - - x = _mm_mullo_epi32(u[11], cospi40); - u[11] = _mm_sub_epi32(y, x); - u[11] = _mm_add_epi32(u[11], rnding); - u[11] = _mm_srai_epi32(u[11], bit); - - x = _mm_mullo_epi32(u[13], cospi8); - y = _mm_mullo_epi32(u[12], cospi8); - u[12] = _mm_mullo_epi32(u[12], cospim56); - u[12] = _mm_add_epi32(u[12], x); - u[12] = _mm_add_epi32(u[12], rnding); - u[12] = _mm_srai_epi32(u[12], bit); - - x = _mm_mullo_epi32(u[13], cospim56); - u[13] = _mm_sub_epi32(y, x); - u[13] = _mm_add_epi32(u[13], rnding); - u[13] = _mm_srai_epi32(u[13], bit); - - x = _mm_mullo_epi32(u[15], cospi40); - y = _mm_mullo_epi32(u[14], cospi40); - u[14] = _mm_mullo_epi32(u[14], cospim24); - u[14] = _mm_add_epi32(u[14], x); - u[14] = _mm_add_epi32(u[14], rnding); - u[14] = _mm_srai_epi32(u[14], bit); - - x = _mm_mullo_epi32(u[15], cospim24); - u[15] = _mm_sub_epi32(y, x); - u[15] = _mm_add_epi32(u[15], rnding); - u[15] = _mm_srai_epi32(u[15], bit); - - // stage 5 - addsub_sse4_1(u[0], u[4], &u[0], &u[4], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[1], u[5], &u[1], &u[5], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[2], u[6], &u[2], &u[6], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[3], u[7], &u[3], &u[7], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[8], u[12], &u[8], &u[12], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[9], u[13], &u[9], &u[13], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[10], u[14], &u[10], &u[14], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[11], u[15], &u[11], &u[15], &clamp_lo, &clamp_hi); - - // stage 6 - x = _mm_mullo_epi32(u[5], cospi48); - y = _mm_mullo_epi32(u[4], cospi48); - u[4] = _mm_mullo_epi32(u[4], cospi16); - u[4] = _mm_add_epi32(u[4], x); - u[4] = _mm_add_epi32(u[4], rnding); - u[4] = _mm_srai_epi32(u[4], bit); - - x = _mm_mullo_epi32(u[5], cospi16); - u[5] = _mm_sub_epi32(y, x); - u[5] = _mm_add_epi32(u[5], rnding); - u[5] = _mm_srai_epi32(u[5], bit); - - x = _mm_mullo_epi32(u[7], cospi16); - y = _mm_mullo_epi32(u[6], cospi16); - u[6] = _mm_mullo_epi32(u[6], cospim48); - u[6] = _mm_add_epi32(u[6], x); - u[6] = _mm_add_epi32(u[6], rnding); - u[6] = _mm_srai_epi32(u[6], bit); - - x = _mm_mullo_epi32(u[7], cospim48); - u[7] = _mm_sub_epi32(y, x); - u[7] = _mm_add_epi32(u[7], rnding); - u[7] = _mm_srai_epi32(u[7], bit); - - x = _mm_mullo_epi32(u[13], cospi48); - y = _mm_mullo_epi32(u[12], cospi48); - u[12] = _mm_mullo_epi32(u[12], cospi16); - u[12] = _mm_add_epi32(u[12], x); - u[12] = _mm_add_epi32(u[12], rnding); - u[12] = _mm_srai_epi32(u[12], bit); - - x = _mm_mullo_epi32(u[13], cospi16); - u[13] = _mm_sub_epi32(y, x); - u[13] = _mm_add_epi32(u[13], rnding); - u[13] = _mm_srai_epi32(u[13], bit); - - x = _mm_mullo_epi32(u[15], cospi16); - y = _mm_mullo_epi32(u[14], cospi16); - u[14] = _mm_mullo_epi32(u[14], cospim48); - u[14] = _mm_add_epi32(u[14], x); - u[14] = _mm_add_epi32(u[14], rnding); - u[14] = _mm_srai_epi32(u[14], bit); - - x = _mm_mullo_epi32(u[15], cospim48); - u[15] = _mm_sub_epi32(y, x); - u[15] = _mm_add_epi32(u[15], rnding); - u[15] = _mm_srai_epi32(u[15], bit); - - // stage 7 - addsub_sse4_1(u[0], u[2], &u[0], &u[2], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[1], u[3], &u[1], &u[3], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[4], u[6], &u[4], &u[6], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[5], u[7], &u[5], &u[7], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[8], u[10], &u[8], &u[10], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[9], u[11], &u[9], &u[11], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[12], u[14], &u[12], &u[14], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[13], u[15], &u[13], &u[15], &clamp_lo, &clamp_hi); - - // stage 8 - y = _mm_mullo_epi32(u[2], cospi32); - x = _mm_mullo_epi32(u[3], cospi32); - u[2] = _mm_add_epi32(y, x); - u[2] = _mm_add_epi32(u[2], rnding); - u[2] = _mm_srai_epi32(u[2], bit); - - u[3] = _mm_sub_epi32(y, x); - u[3] = _mm_add_epi32(u[3], rnding); - u[3] = _mm_srai_epi32(u[3], bit); - y = _mm_mullo_epi32(u[6], cospi32); - x = _mm_mullo_epi32(u[7], cospi32); - u[6] = _mm_add_epi32(y, x); - u[6] = _mm_add_epi32(u[6], rnding); - u[6] = _mm_srai_epi32(u[6], bit); - - u[7] = _mm_sub_epi32(y, x); - u[7] = _mm_add_epi32(u[7], rnding); - u[7] = _mm_srai_epi32(u[7], bit); - - y = _mm_mullo_epi32(u[10], cospi32); - x = _mm_mullo_epi32(u[11], cospi32); - u[10] = _mm_add_epi32(y, x); - u[10] = _mm_add_epi32(u[10], rnding); - u[10] = _mm_srai_epi32(u[10], bit); - - u[11] = _mm_sub_epi32(y, x); - u[11] = _mm_add_epi32(u[11], rnding); - u[11] = _mm_srai_epi32(u[11], bit); - - y = _mm_mullo_epi32(u[14], cospi32); - x = _mm_mullo_epi32(u[15], cospi32); - u[14] = _mm_add_epi32(y, x); - u[14] = _mm_add_epi32(u[14], rnding); - u[14] = _mm_srai_epi32(u[14], bit); - - u[15] = _mm_sub_epi32(y, x); - u[15] = _mm_add_epi32(u[15], rnding); - u[15] = _mm_srai_epi32(u[15], bit); - - // stage 9 - if (do_cols) { - out[0] = u[0]; - out[1] = _mm_sub_epi32(_mm_setzero_si128(), u[8]); - out[2] = u[12]; - out[3] = _mm_sub_epi32(_mm_setzero_si128(), u[4]); - out[4] = u[6]; - out[5] = _mm_sub_epi32(_mm_setzero_si128(), u[14]); - out[6] = u[10]; - out[7] = _mm_sub_epi32(_mm_setzero_si128(), u[2]); - out[8] = u[3]; - out[9] = _mm_sub_epi32(_mm_setzero_si128(), u[11]); - out[10] = u[15]; - out[11] = _mm_sub_epi32(_mm_setzero_si128(), u[7]); - out[12] = u[5]; - out[13] = _mm_sub_epi32(_mm_setzero_si128(), u[13]); - out[14] = u[9]; - out[15] = _mm_sub_epi32(_mm_setzero_si128(), u[1]); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1))); - const __m128i clamp_hi_out = - _mm_set1_epi32((1 << (log_range_out - 1)) - 1); - - neg_shift_sse4_1(u[0], u[8], out + 0, out + 1, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(u[12], u[4], out + 2, out + 3, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(u[6], u[14], out + 4, out + 5, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(u[10], u[2], out + 6, out + 7, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(u[3], u[11], out + 8, out + 9, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(u[15], u[7], out + 10, out + 11, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(u[5], u[13], out + 12, out + 13, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(u[9], u[1], out + 14, out + 15, &clamp_lo_out, - &clamp_hi_out, out_shift); - } - } -} - -static void idct16x16_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols, - int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi60 = _mm_set1_epi32(cospi[60]); - const __m128i cospim4 = _mm_set1_epi32(-cospi[4]); - const __m128i cospi28 = _mm_set1_epi32(cospi[28]); - const __m128i cospim36 = _mm_set1_epi32(-cospi[36]); - const __m128i cospi44 = _mm_set1_epi32(cospi[44]); - const __m128i cospi20 = _mm_set1_epi32(cospi[20]); - const __m128i cospim20 = _mm_set1_epi32(-cospi[20]); - const __m128i cospi12 = _mm_set1_epi32(cospi[12]); - const __m128i cospim52 = _mm_set1_epi32(-cospi[52]); - const __m128i cospi52 = _mm_set1_epi32(cospi[52]); - const __m128i cospi36 = _mm_set1_epi32(cospi[36]); - const __m128i cospi4 = _mm_set1_epi32(cospi[4]); - const __m128i cospi56 = _mm_set1_epi32(cospi[56]); - const __m128i cospim8 = _mm_set1_epi32(-cospi[8]); - const __m128i cospi24 = _mm_set1_epi32(cospi[24]); - const __m128i cospim40 = _mm_set1_epi32(-cospi[40]); - const __m128i cospi40 = _mm_set1_epi32(cospi[40]); - const __m128i cospi8 = _mm_set1_epi32(cospi[8]); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospim16 = _mm_set1_epi32(-cospi[16]); - const __m128i cospim48 = _mm_set1_epi32(-cospi[48]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - __m128i u[16], v[16], x, y; - - { - // stage 0 - // stage 1 - u[0] = in[0]; - u[1] = in[8]; - u[2] = in[4]; - u[3] = in[12]; - u[4] = in[2]; - u[5] = in[10]; - u[6] = in[6]; - u[7] = in[14]; - u[8] = in[1]; - u[9] = in[9]; - u[10] = in[5]; - u[11] = in[13]; - u[12] = in[3]; - u[13] = in[11]; - u[14] = in[7]; - u[15] = in[15]; - - // stage 2 - v[0] = u[0]; - v[1] = u[1]; - v[2] = u[2]; - v[3] = u[3]; - v[4] = u[4]; - v[5] = u[5]; - v[6] = u[6]; - v[7] = u[7]; - - v[8] = half_btf_sse4_1(&cospi60, &u[8], &cospim4, &u[15], &rnding, bit); - v[9] = half_btf_sse4_1(&cospi28, &u[9], &cospim36, &u[14], &rnding, bit); - v[10] = half_btf_sse4_1(&cospi44, &u[10], &cospim20, &u[13], &rnding, bit); - v[11] = half_btf_sse4_1(&cospi12, &u[11], &cospim52, &u[12], &rnding, bit); - v[12] = half_btf_sse4_1(&cospi52, &u[11], &cospi12, &u[12], &rnding, bit); - v[13] = half_btf_sse4_1(&cospi20, &u[10], &cospi44, &u[13], &rnding, bit); - v[14] = half_btf_sse4_1(&cospi36, &u[9], &cospi28, &u[14], &rnding, bit); - v[15] = half_btf_sse4_1(&cospi4, &u[8], &cospi60, &u[15], &rnding, bit); - - // stage 3 - u[0] = v[0]; - u[1] = v[1]; - u[2] = v[2]; - u[3] = v[3]; - u[4] = half_btf_sse4_1(&cospi56, &v[4], &cospim8, &v[7], &rnding, bit); - u[5] = half_btf_sse4_1(&cospi24, &v[5], &cospim40, &v[6], &rnding, bit); - u[6] = half_btf_sse4_1(&cospi40, &v[5], &cospi24, &v[6], &rnding, bit); - u[7] = half_btf_sse4_1(&cospi8, &v[4], &cospi56, &v[7], &rnding, bit); - addsub_sse4_1(v[8], v[9], &u[8], &u[9], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[11], v[10], &u[11], &u[10], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[12], v[13], &u[12], &u[13], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[15], v[14], &u[15], &u[14], &clamp_lo, &clamp_hi); - - // stage 4 - x = _mm_mullo_epi32(u[0], cospi32); - y = _mm_mullo_epi32(u[1], cospi32); - v[0] = _mm_add_epi32(x, y); - v[0] = _mm_add_epi32(v[0], rnding); - v[0] = _mm_srai_epi32(v[0], bit); - - v[1] = _mm_sub_epi32(x, y); - v[1] = _mm_add_epi32(v[1], rnding); - v[1] = _mm_srai_epi32(v[1], bit); - - v[2] = half_btf_sse4_1(&cospi48, &u[2], &cospim16, &u[3], &rnding, bit); - v[3] = half_btf_sse4_1(&cospi16, &u[2], &cospi48, &u[3], &rnding, bit); - addsub_sse4_1(u[4], u[5], &v[4], &v[5], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[7], u[6], &v[7], &v[6], &clamp_lo, &clamp_hi); - v[8] = u[8]; - v[9] = half_btf_sse4_1(&cospim16, &u[9], &cospi48, &u[14], &rnding, bit); - v[10] = half_btf_sse4_1(&cospim48, &u[10], &cospim16, &u[13], &rnding, bit); - v[11] = u[11]; - v[12] = u[12]; - v[13] = half_btf_sse4_1(&cospim16, &u[10], &cospi48, &u[13], &rnding, bit); - v[14] = half_btf_sse4_1(&cospi48, &u[9], &cospi16, &u[14], &rnding, bit); - v[15] = u[15]; - - // stage 5 - addsub_sse4_1(v[0], v[3], &u[0], &u[3], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[1], v[2], &u[1], &u[2], &clamp_lo, &clamp_hi); - u[4] = v[4]; - - x = _mm_mullo_epi32(v[5], cospi32); - y = _mm_mullo_epi32(v[6], cospi32); - u[5] = _mm_sub_epi32(y, x); - u[5] = _mm_add_epi32(u[5], rnding); - u[5] = _mm_srai_epi32(u[5], bit); - - u[6] = _mm_add_epi32(y, x); - u[6] = _mm_add_epi32(u[6], rnding); - u[6] = _mm_srai_epi32(u[6], bit); - - u[7] = v[7]; - addsub_sse4_1(v[8], v[11], &u[8], &u[11], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[9], v[10], &u[9], &u[10], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[15], v[12], &u[15], &u[12], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[14], v[13], &u[14], &u[13], &clamp_lo, &clamp_hi); - - // stage 6 - addsub_sse4_1(u[0], u[7], &v[0], &v[7], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[1], u[6], &v[1], &v[6], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[2], u[5], &v[2], &v[5], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[3], u[4], &v[3], &v[4], &clamp_lo, &clamp_hi); - v[8] = u[8]; - v[9] = u[9]; - - x = _mm_mullo_epi32(u[10], cospi32); - y = _mm_mullo_epi32(u[13], cospi32); - v[10] = _mm_sub_epi32(y, x); - v[10] = _mm_add_epi32(v[10], rnding); - v[10] = _mm_srai_epi32(v[10], bit); - - v[13] = _mm_add_epi32(x, y); - v[13] = _mm_add_epi32(v[13], rnding); - v[13] = _mm_srai_epi32(v[13], bit); - - x = _mm_mullo_epi32(u[11], cospi32); - y = _mm_mullo_epi32(u[12], cospi32); - v[11] = _mm_sub_epi32(y, x); - v[11] = _mm_add_epi32(v[11], rnding); - v[11] = _mm_srai_epi32(v[11], bit); - - v[12] = _mm_add_epi32(x, y); - v[12] = _mm_add_epi32(v[12], rnding); - v[12] = _mm_srai_epi32(v[12], bit); - - v[14] = u[14]; - v[15] = u[15]; - - // stage 7 - if (do_cols) { - addsub_no_clamp_sse4_1(v[0], v[15], out + 0, out + 15); - addsub_no_clamp_sse4_1(v[1], v[14], out + 1, out + 14); - addsub_no_clamp_sse4_1(v[2], v[13], out + 2, out + 13); - addsub_no_clamp_sse4_1(v[3], v[12], out + 3, out + 12); - addsub_no_clamp_sse4_1(v[4], v[11], out + 4, out + 11); - addsub_no_clamp_sse4_1(v[5], v[10], out + 5, out + 10); - addsub_no_clamp_sse4_1(v[6], v[9], out + 6, out + 9); - addsub_no_clamp_sse4_1(v[7], v[8], out + 7, out + 8); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX( - -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift)))); - const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN( - (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift)))); - - addsub_shift_sse4_1(v[0], v[15], out + 0, out + 15, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(v[1], v[14], out + 1, out + 14, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(v[2], v[13], out + 2, out + 13, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(v[3], v[12], out + 3, out + 12, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(v[4], v[11], out + 4, out + 11, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(v[5], v[10], out + 5, out + 10, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(v[6], v[9], out + 6, out + 9, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(v[7], v[8], out + 7, out + 8, &clamp_lo_out, - &clamp_hi_out, out_shift); - } - } -} - -static void iadst16x16_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols, - int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi2 = _mm_set1_epi32(cospi[2]); - const __m128i cospi62 = _mm_set1_epi32(cospi[62]); - const __m128i cospi10 = _mm_set1_epi32(cospi[10]); - const __m128i cospi54 = _mm_set1_epi32(cospi[54]); - const __m128i cospi18 = _mm_set1_epi32(cospi[18]); - const __m128i cospi46 = _mm_set1_epi32(cospi[46]); - const __m128i cospi26 = _mm_set1_epi32(cospi[26]); - const __m128i cospi38 = _mm_set1_epi32(cospi[38]); - const __m128i cospi34 = _mm_set1_epi32(cospi[34]); - const __m128i cospi30 = _mm_set1_epi32(cospi[30]); - const __m128i cospi42 = _mm_set1_epi32(cospi[42]); - const __m128i cospi22 = _mm_set1_epi32(cospi[22]); - const __m128i cospi50 = _mm_set1_epi32(cospi[50]); - const __m128i cospi14 = _mm_set1_epi32(cospi[14]); - const __m128i cospi58 = _mm_set1_epi32(cospi[58]); - const __m128i cospi6 = _mm_set1_epi32(cospi[6]); - const __m128i cospi8 = _mm_set1_epi32(cospi[8]); - const __m128i cospi56 = _mm_set1_epi32(cospi[56]); - const __m128i cospi40 = _mm_set1_epi32(cospi[40]); - const __m128i cospi24 = _mm_set1_epi32(cospi[24]); - const __m128i cospim56 = _mm_set1_epi32(-cospi[56]); - const __m128i cospim24 = _mm_set1_epi32(-cospi[24]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospim48 = _mm_set1_epi32(-cospi[48]); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - __m128i u[16], v[16], x, y; - - // Calculate the column 0, 1, 2, 3 - { - // stage 0 - // stage 1 - // stage 2 - v[0] = _mm_mullo_epi32(in[15], cospi2); - x = _mm_mullo_epi32(in[0], cospi62); - v[0] = _mm_add_epi32(v[0], x); - v[0] = _mm_add_epi32(v[0], rnding); - v[0] = _mm_srai_epi32(v[0], bit); - - v[1] = _mm_mullo_epi32(in[15], cospi62); - x = _mm_mullo_epi32(in[0], cospi2); - v[1] = _mm_sub_epi32(v[1], x); - v[1] = _mm_add_epi32(v[1], rnding); - v[1] = _mm_srai_epi32(v[1], bit); - - v[2] = _mm_mullo_epi32(in[13], cospi10); - x = _mm_mullo_epi32(in[2], cospi54); - v[2] = _mm_add_epi32(v[2], x); - v[2] = _mm_add_epi32(v[2], rnding); - v[2] = _mm_srai_epi32(v[2], bit); - - v[3] = _mm_mullo_epi32(in[13], cospi54); - x = _mm_mullo_epi32(in[2], cospi10); - v[3] = _mm_sub_epi32(v[3], x); - v[3] = _mm_add_epi32(v[3], rnding); - v[3] = _mm_srai_epi32(v[3], bit); - - v[4] = _mm_mullo_epi32(in[11], cospi18); - x = _mm_mullo_epi32(in[4], cospi46); - v[4] = _mm_add_epi32(v[4], x); - v[4] = _mm_add_epi32(v[4], rnding); - v[4] = _mm_srai_epi32(v[4], bit); - - v[5] = _mm_mullo_epi32(in[11], cospi46); - x = _mm_mullo_epi32(in[4], cospi18); - v[5] = _mm_sub_epi32(v[5], x); - v[5] = _mm_add_epi32(v[5], rnding); - v[5] = _mm_srai_epi32(v[5], bit); - - v[6] = _mm_mullo_epi32(in[9], cospi26); - x = _mm_mullo_epi32(in[6], cospi38); - v[6] = _mm_add_epi32(v[6], x); - v[6] = _mm_add_epi32(v[6], rnding); - v[6] = _mm_srai_epi32(v[6], bit); - - v[7] = _mm_mullo_epi32(in[9], cospi38); - x = _mm_mullo_epi32(in[6], cospi26); - v[7] = _mm_sub_epi32(v[7], x); - v[7] = _mm_add_epi32(v[7], rnding); - v[7] = _mm_srai_epi32(v[7], bit); - - v[8] = _mm_mullo_epi32(in[7], cospi34); - x = _mm_mullo_epi32(in[8], cospi30); - v[8] = _mm_add_epi32(v[8], x); - v[8] = _mm_add_epi32(v[8], rnding); - v[8] = _mm_srai_epi32(v[8], bit); - - v[9] = _mm_mullo_epi32(in[7], cospi30); - x = _mm_mullo_epi32(in[8], cospi34); - v[9] = _mm_sub_epi32(v[9], x); - v[9] = _mm_add_epi32(v[9], rnding); - v[9] = _mm_srai_epi32(v[9], bit); - - v[10] = _mm_mullo_epi32(in[5], cospi42); - x = _mm_mullo_epi32(in[10], cospi22); - v[10] = _mm_add_epi32(v[10], x); - v[10] = _mm_add_epi32(v[10], rnding); - v[10] = _mm_srai_epi32(v[10], bit); - - v[11] = _mm_mullo_epi32(in[5], cospi22); - x = _mm_mullo_epi32(in[10], cospi42); - v[11] = _mm_sub_epi32(v[11], x); - v[11] = _mm_add_epi32(v[11], rnding); - v[11] = _mm_srai_epi32(v[11], bit); - - v[12] = _mm_mullo_epi32(in[3], cospi50); - x = _mm_mullo_epi32(in[12], cospi14); - v[12] = _mm_add_epi32(v[12], x); - v[12] = _mm_add_epi32(v[12], rnding); - v[12] = _mm_srai_epi32(v[12], bit); - - v[13] = _mm_mullo_epi32(in[3], cospi14); - x = _mm_mullo_epi32(in[12], cospi50); - v[13] = _mm_sub_epi32(v[13], x); - v[13] = _mm_add_epi32(v[13], rnding); - v[13] = _mm_srai_epi32(v[13], bit); - - v[14] = _mm_mullo_epi32(in[1], cospi58); - x = _mm_mullo_epi32(in[14], cospi6); - v[14] = _mm_add_epi32(v[14], x); - v[14] = _mm_add_epi32(v[14], rnding); - v[14] = _mm_srai_epi32(v[14], bit); - - v[15] = _mm_mullo_epi32(in[1], cospi6); - x = _mm_mullo_epi32(in[14], cospi58); - v[15] = _mm_sub_epi32(v[15], x); - v[15] = _mm_add_epi32(v[15], rnding); - v[15] = _mm_srai_epi32(v[15], bit); - - // stage 3 - addsub_sse4_1(v[0], v[8], &u[0], &u[8], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[1], v[9], &u[1], &u[9], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[2], v[10], &u[2], &u[10], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[3], v[11], &u[3], &u[11], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[4], v[12], &u[4], &u[12], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[5], v[13], &u[5], &u[13], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[6], v[14], &u[6], &u[14], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[7], v[15], &u[7], &u[15], &clamp_lo, &clamp_hi); - - // stage 4 - v[0] = u[0]; - v[1] = u[1]; - v[2] = u[2]; - v[3] = u[3]; - v[4] = u[4]; - v[5] = u[5]; - v[6] = u[6]; - v[7] = u[7]; - - v[8] = _mm_mullo_epi32(u[8], cospi8); - x = _mm_mullo_epi32(u[9], cospi56); - v[8] = _mm_add_epi32(v[8], x); - v[8] = _mm_add_epi32(v[8], rnding); - v[8] = _mm_srai_epi32(v[8], bit); - - v[9] = _mm_mullo_epi32(u[8], cospi56); - x = _mm_mullo_epi32(u[9], cospi8); - v[9] = _mm_sub_epi32(v[9], x); - v[9] = _mm_add_epi32(v[9], rnding); - v[9] = _mm_srai_epi32(v[9], bit); - - v[10] = _mm_mullo_epi32(u[10], cospi40); - x = _mm_mullo_epi32(u[11], cospi24); - v[10] = _mm_add_epi32(v[10], x); - v[10] = _mm_add_epi32(v[10], rnding); - v[10] = _mm_srai_epi32(v[10], bit); - - v[11] = _mm_mullo_epi32(u[10], cospi24); - x = _mm_mullo_epi32(u[11], cospi40); - v[11] = _mm_sub_epi32(v[11], x); - v[11] = _mm_add_epi32(v[11], rnding); - v[11] = _mm_srai_epi32(v[11], bit); - - v[12] = _mm_mullo_epi32(u[12], cospim56); - x = _mm_mullo_epi32(u[13], cospi8); - v[12] = _mm_add_epi32(v[12], x); - v[12] = _mm_add_epi32(v[12], rnding); - v[12] = _mm_srai_epi32(v[12], bit); - - v[13] = _mm_mullo_epi32(u[12], cospi8); - x = _mm_mullo_epi32(u[13], cospim56); - v[13] = _mm_sub_epi32(v[13], x); - v[13] = _mm_add_epi32(v[13], rnding); - v[13] = _mm_srai_epi32(v[13], bit); - - v[14] = _mm_mullo_epi32(u[14], cospim24); - x = _mm_mullo_epi32(u[15], cospi40); - v[14] = _mm_add_epi32(v[14], x); - v[14] = _mm_add_epi32(v[14], rnding); - v[14] = _mm_srai_epi32(v[14], bit); - - v[15] = _mm_mullo_epi32(u[14], cospi40); - x = _mm_mullo_epi32(u[15], cospim24); - v[15] = _mm_sub_epi32(v[15], x); - v[15] = _mm_add_epi32(v[15], rnding); - v[15] = _mm_srai_epi32(v[15], bit); - - // stage 5 - addsub_sse4_1(v[0], v[4], &u[0], &u[4], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[1], v[5], &u[1], &u[5], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[2], v[6], &u[2], &u[6], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[3], v[7], &u[3], &u[7], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[8], v[12], &u[8], &u[12], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[9], v[13], &u[9], &u[13], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[10], v[14], &u[10], &u[14], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[11], v[15], &u[11], &u[15], &clamp_lo, &clamp_hi); - - // stage 6 - v[0] = u[0]; - v[1] = u[1]; - v[2] = u[2]; - v[3] = u[3]; - - v[4] = _mm_mullo_epi32(u[4], cospi16); - x = _mm_mullo_epi32(u[5], cospi48); - v[4] = _mm_add_epi32(v[4], x); - v[4] = _mm_add_epi32(v[4], rnding); - v[4] = _mm_srai_epi32(v[4], bit); - - v[5] = _mm_mullo_epi32(u[4], cospi48); - x = _mm_mullo_epi32(u[5], cospi16); - v[5] = _mm_sub_epi32(v[5], x); - v[5] = _mm_add_epi32(v[5], rnding); - v[5] = _mm_srai_epi32(v[5], bit); - - v[6] = _mm_mullo_epi32(u[6], cospim48); - x = _mm_mullo_epi32(u[7], cospi16); - v[6] = _mm_add_epi32(v[6], x); - v[6] = _mm_add_epi32(v[6], rnding); - v[6] = _mm_srai_epi32(v[6], bit); - - v[7] = _mm_mullo_epi32(u[6], cospi16); - x = _mm_mullo_epi32(u[7], cospim48); - v[7] = _mm_sub_epi32(v[7], x); - v[7] = _mm_add_epi32(v[7], rnding); - v[7] = _mm_srai_epi32(v[7], bit); - - v[8] = u[8]; - v[9] = u[9]; - v[10] = u[10]; - v[11] = u[11]; - - v[12] = _mm_mullo_epi32(u[12], cospi16); - x = _mm_mullo_epi32(u[13], cospi48); - v[12] = _mm_add_epi32(v[12], x); - v[12] = _mm_add_epi32(v[12], rnding); - v[12] = _mm_srai_epi32(v[12], bit); - - v[13] = _mm_mullo_epi32(u[12], cospi48); - x = _mm_mullo_epi32(u[13], cospi16); - v[13] = _mm_sub_epi32(v[13], x); - v[13] = _mm_add_epi32(v[13], rnding); - v[13] = _mm_srai_epi32(v[13], bit); - - v[14] = _mm_mullo_epi32(u[14], cospim48); - x = _mm_mullo_epi32(u[15], cospi16); - v[14] = _mm_add_epi32(v[14], x); - v[14] = _mm_add_epi32(v[14], rnding); - v[14] = _mm_srai_epi32(v[14], bit); - - v[15] = _mm_mullo_epi32(u[14], cospi16); - x = _mm_mullo_epi32(u[15], cospim48); - v[15] = _mm_sub_epi32(v[15], x); - v[15] = _mm_add_epi32(v[15], rnding); - v[15] = _mm_srai_epi32(v[15], bit); - - // stage 7 - addsub_sse4_1(v[0], v[2], &u[0], &u[2], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[1], v[3], &u[1], &u[3], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[4], v[6], &u[4], &u[6], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[5], v[7], &u[5], &u[7], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[8], v[10], &u[8], &u[10], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[9], v[11], &u[9], &u[11], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[12], v[14], &u[12], &u[14], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[13], v[15], &u[13], &u[15], &clamp_lo, &clamp_hi); - - // stage 8 - v[0] = u[0]; - v[1] = u[1]; - - y = _mm_mullo_epi32(u[2], cospi32); - x = _mm_mullo_epi32(u[3], cospi32); - v[2] = _mm_add_epi32(y, x); - v[2] = _mm_add_epi32(v[2], rnding); - v[2] = _mm_srai_epi32(v[2], bit); - - v[3] = _mm_sub_epi32(y, x); - v[3] = _mm_add_epi32(v[3], rnding); - v[3] = _mm_srai_epi32(v[3], bit); - - v[4] = u[4]; - v[5] = u[5]; - - y = _mm_mullo_epi32(u[6], cospi32); - x = _mm_mullo_epi32(u[7], cospi32); - v[6] = _mm_add_epi32(y, x); - v[6] = _mm_add_epi32(v[6], rnding); - v[6] = _mm_srai_epi32(v[6], bit); - - v[7] = _mm_sub_epi32(y, x); - v[7] = _mm_add_epi32(v[7], rnding); - v[7] = _mm_srai_epi32(v[7], bit); - - v[8] = u[8]; - v[9] = u[9]; - - y = _mm_mullo_epi32(u[10], cospi32); - x = _mm_mullo_epi32(u[11], cospi32); - v[10] = _mm_add_epi32(y, x); - v[10] = _mm_add_epi32(v[10], rnding); - v[10] = _mm_srai_epi32(v[10], bit); - - v[11] = _mm_sub_epi32(y, x); - v[11] = _mm_add_epi32(v[11], rnding); - v[11] = _mm_srai_epi32(v[11], bit); - - v[12] = u[12]; - v[13] = u[13]; - - y = _mm_mullo_epi32(u[14], cospi32); - x = _mm_mullo_epi32(u[15], cospi32); - v[14] = _mm_add_epi32(y, x); - v[14] = _mm_add_epi32(v[14], rnding); - v[14] = _mm_srai_epi32(v[14], bit); - - v[15] = _mm_sub_epi32(y, x); - v[15] = _mm_add_epi32(v[15], rnding); - v[15] = _mm_srai_epi32(v[15], bit); - - // stage 9 - if (do_cols) { - out[0] = v[0]; - out[1] = _mm_sub_epi32(_mm_setzero_si128(), v[8]); - out[2] = v[12]; - out[3] = _mm_sub_epi32(_mm_setzero_si128(), v[4]); - out[4] = v[6]; - out[5] = _mm_sub_epi32(_mm_setzero_si128(), v[14]); - out[6] = v[10]; - out[7] = _mm_sub_epi32(_mm_setzero_si128(), v[2]); - out[8] = v[3]; - out[9] = _mm_sub_epi32(_mm_setzero_si128(), v[11]); - out[10] = v[15]; - out[11] = _mm_sub_epi32(_mm_setzero_si128(), v[7]); - out[12] = v[5]; - out[13] = _mm_sub_epi32(_mm_setzero_si128(), v[13]); - out[14] = v[9]; - out[15] = _mm_sub_epi32(_mm_setzero_si128(), v[1]); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1))); - const __m128i clamp_hi_out = - _mm_set1_epi32((1 << (log_range_out - 1)) - 1); - - neg_shift_sse4_1(v[0], v[8], out + 0, out + 1, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(v[12], v[4], out + 2, out + 3, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(v[6], v[14], out + 4, out + 5, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(v[10], v[2], out + 6, out + 7, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(v[3], v[11], out + 8, out + 9, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(v[15], v[7], out + 10, out + 11, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(v[5], v[13], out + 12, out + 13, &clamp_lo_out, - &clamp_hi_out, out_shift); - neg_shift_sse4_1(v[9], v[1], out + 14, out + 15, &clamp_lo_out, - &clamp_hi_out, out_shift); - } - } -} - -static INLINE void idct64_stage8_sse4_1( - __m128i *u, const __m128i *cospim32, const __m128i *cospi32, - const __m128i *cospim16, const __m128i *cospi48, const __m128i *cospi16, - const __m128i *cospim48, const __m128i *clamp_lo, const __m128i *clamp_hi, - const __m128i *rnding, int bit) { - int i; - __m128i temp1, temp2, temp3, temp4; - temp1 = half_btf_sse4_1(cospim32, &u[10], cospi32, &u[13], rnding, bit); - u[13] = half_btf_sse4_1(cospi32, &u[10], cospi32, &u[13], rnding, bit); - u[10] = temp1; - temp2 = half_btf_sse4_1(cospim32, &u[11], cospi32, &u[12], rnding, bit); - u[12] = half_btf_sse4_1(cospi32, &u[11], cospi32, &u[12], rnding, bit); - u[11] = temp2; - - for (i = 16; i < 20; ++i) { - addsub_sse4_1(u[i], u[i ^ 7], &u[i], &u[i ^ 7], clamp_lo, clamp_hi); - addsub_sse4_1(u[i ^ 15], u[i ^ 8], &u[i ^ 15], &u[i ^ 8], clamp_lo, - clamp_hi); - } - - temp1 = half_btf_sse4_1(cospim16, &u[36], cospi48, &u[59], rnding, bit); - temp2 = half_btf_sse4_1(cospim16, &u[37], cospi48, &u[58], rnding, bit); - temp3 = half_btf_sse4_1(cospim16, &u[38], cospi48, &u[57], rnding, bit); - temp4 = half_btf_sse4_1(cospim16, &u[39], cospi48, &u[56], rnding, bit); - u[56] = half_btf_sse4_1(cospi48, &u[39], cospi16, &u[56], rnding, bit); - u[57] = half_btf_sse4_1(cospi48, &u[38], cospi16, &u[57], rnding, bit); - u[58] = half_btf_sse4_1(cospi48, &u[37], cospi16, &u[58], rnding, bit); - u[59] = half_btf_sse4_1(cospi48, &u[36], cospi16, &u[59], rnding, bit); - u[36] = temp1; - u[37] = temp2; - u[38] = temp3; - u[39] = temp4; - - temp1 = half_btf_sse4_1(cospim48, &u[40], cospim16, &u[55], rnding, bit); - temp2 = half_btf_sse4_1(cospim48, &u[41], cospim16, &u[54], rnding, bit); - temp3 = half_btf_sse4_1(cospim48, &u[42], cospim16, &u[53], rnding, bit); - temp4 = half_btf_sse4_1(cospim48, &u[43], cospim16, &u[52], rnding, bit); - u[52] = half_btf_sse4_1(cospim16, &u[43], cospi48, &u[52], rnding, bit); - u[53] = half_btf_sse4_1(cospim16, &u[42], cospi48, &u[53], rnding, bit); - u[54] = half_btf_sse4_1(cospim16, &u[41], cospi48, &u[54], rnding, bit); - u[55] = half_btf_sse4_1(cospim16, &u[40], cospi48, &u[55], rnding, bit); - u[40] = temp1; - u[41] = temp2; - u[42] = temp3; - u[43] = temp4; -} - -static INLINE void idct64_stage9_sse4_1(__m128i *u, const __m128i *cospim32, - const __m128i *cospi32, - const __m128i *clamp_lo, - const __m128i *clamp_hi, - const __m128i *rnding, int bit) { - int i; - __m128i temp1, temp2, temp3, temp4; - for (i = 0; i < 8; ++i) { - addsub_sse4_1(u[i], u[15 - i], &u[i], &u[15 - i], clamp_lo, clamp_hi); - } - - temp1 = half_btf_sse4_1(cospim32, &u[20], cospi32, &u[27], rnding, bit); - temp2 = half_btf_sse4_1(cospim32, &u[21], cospi32, &u[26], rnding, bit); - temp3 = half_btf_sse4_1(cospim32, &u[22], cospi32, &u[25], rnding, bit); - temp4 = half_btf_sse4_1(cospim32, &u[23], cospi32, &u[24], rnding, bit); - u[24] = half_btf_sse4_1(cospi32, &u[23], cospi32, &u[24], rnding, bit); - u[25] = half_btf_sse4_1(cospi32, &u[22], cospi32, &u[25], rnding, bit); - u[26] = half_btf_sse4_1(cospi32, &u[21], cospi32, &u[26], rnding, bit); - u[27] = half_btf_sse4_1(cospi32, &u[20], cospi32, &u[27], rnding, bit); - u[20] = temp1; - u[21] = temp2; - u[22] = temp3; - u[23] = temp4; - for (i = 32; i < 40; i++) { - addsub_sse4_1(u[i], u[i ^ 15], &u[i], &u[i ^ 15], clamp_lo, clamp_hi); - } - - for (i = 48; i < 56; i++) { - addsub_sse4_1(u[i ^ 15], u[i], &u[i ^ 15], &u[i], clamp_lo, clamp_hi); - } -} - -static INLINE void idct64_stage10_sse4_1(__m128i *u, const __m128i *cospim32, - const __m128i *cospi32, - const __m128i *clamp_lo, - const __m128i *clamp_hi, - const __m128i *rnding, int bit) { - __m128i temp1, temp2, temp3, temp4; - for (int i = 0; i < 16; i++) { - addsub_sse4_1(u[i], u[31 - i], &u[i], &u[31 - i], clamp_lo, clamp_hi); - } - - temp1 = half_btf_sse4_1(cospim32, &u[40], cospi32, &u[55], rnding, bit); - temp2 = half_btf_sse4_1(cospim32, &u[41], cospi32, &u[54], rnding, bit); - temp3 = half_btf_sse4_1(cospim32, &u[42], cospi32, &u[53], rnding, bit); - temp4 = half_btf_sse4_1(cospim32, &u[43], cospi32, &u[52], rnding, bit); - u[52] = half_btf_sse4_1(cospi32, &u[43], cospi32, &u[52], rnding, bit); - u[53] = half_btf_sse4_1(cospi32, &u[42], cospi32, &u[53], rnding, bit); - u[54] = half_btf_sse4_1(cospi32, &u[41], cospi32, &u[54], rnding, bit); - u[55] = half_btf_sse4_1(cospi32, &u[40], cospi32, &u[55], rnding, bit); - u[40] = temp1; - u[41] = temp2; - u[42] = temp3; - u[43] = temp4; - - temp1 = half_btf_sse4_1(cospim32, &u[44], cospi32, &u[51], rnding, bit); - temp2 = half_btf_sse4_1(cospim32, &u[45], cospi32, &u[50], rnding, bit); - temp3 = half_btf_sse4_1(cospim32, &u[46], cospi32, &u[49], rnding, bit); - temp4 = half_btf_sse4_1(cospim32, &u[47], cospi32, &u[48], rnding, bit); - u[48] = half_btf_sse4_1(cospi32, &u[47], cospi32, &u[48], rnding, bit); - u[49] = half_btf_sse4_1(cospi32, &u[46], cospi32, &u[49], rnding, bit); - u[50] = half_btf_sse4_1(cospi32, &u[45], cospi32, &u[50], rnding, bit); - u[51] = half_btf_sse4_1(cospi32, &u[44], cospi32, &u[51], rnding, bit); - u[44] = temp1; - u[45] = temp2; - u[46] = temp3; - u[47] = temp4; -} - -static INLINE void idct64_stage11_sse4_1(__m128i *u, __m128i *out, int do_cols, - int bd, int out_shift, - const int log_range) { - if (do_cols) { - for (int i = 0; i < 32; i++) { - addsub_no_clamp_sse4_1(u[i], u[63 - i], &out[(i)], &out[(63 - i)]); - } - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX( - -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift)))); - const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN( - (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift)))); - - for (int i = 0; i < 32; i++) { - addsub_shift_sse4_1(u[i], u[63 - i], &out[(i)], &out[(63 - i)], - &clamp_lo_out, &clamp_hi_out, out_shift); - } - } -} - -static void idct64x64_low1_sse4_1(__m128i *in, __m128i *out, int bit, - int do_cols, int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - - { - __m128i x; - - // stage 1 - // stage 2 - // stage 3 - // stage 4 - // stage 5 - // stage 6 - x = half_btf_0_sse4_1(&cospi32, &in[0], &rnding, bit); - - // stage 8 - // stage 9 - // stage 10 - // stage 11 - if (do_cols) { - x = _mm_max_epi32(x, clamp_lo); - x = _mm_min_epi32(x, clamp_hi); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX( - -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift)))); - const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN( - (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift)))); - - __m128i offset = _mm_set1_epi32((1 << out_shift) >> 1); - x = _mm_add_epi32(x, offset); - x = _mm_sra_epi32(x, _mm_cvtsi32_si128(out_shift)); - - x = _mm_max_epi32(x, clamp_lo_out); - x = _mm_min_epi32(x, clamp_hi_out); - } - - out[0] = x; - out[63] = x; - out[1] = x; - out[62] = x; - out[2] = x; - out[61] = x; - out[3] = x; - out[60] = x; - out[4] = x; - out[59] = x; - out[5] = x; - out[58] = x; - out[6] = x; - out[57] = x; - out[7] = x; - out[56] = x; - out[8] = x; - out[55] = x; - out[9] = x; - out[54] = x; - out[10] = x; - out[53] = x; - out[11] = x; - out[52] = x; - out[12] = x; - out[51] = x; - out[13] = x; - out[50] = x; - out[14] = x; - out[49] = x; - out[15] = x; - out[48] = x; - out[16] = x; - out[47] = x; - out[17] = x; - out[46] = x; - out[18] = x; - out[45] = x; - out[19] = x; - out[44] = x; - out[20] = x; - out[43] = x; - out[21] = x; - out[42] = x; - out[22] = x; - out[41] = x; - out[23] = x; - out[40] = x; - out[24] = x; - out[39] = x; - out[25] = x; - out[38] = x; - out[26] = x; - out[37] = x; - out[27] = x; - out[36] = x; - out[28] = x; - out[35] = x; - out[29] = x; - out[34] = x; - out[30] = x; - out[33] = x; - out[31] = x; - out[32] = x; - } -} - -static void idct64x64_low8_sse4_1(__m128i *in, __m128i *out, int bit, - int do_cols, int bd, int out_shift) { - int i, j; - const int32_t *cospi = cospi_arr(bit); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - - const __m128i cospi1 = _mm_set1_epi32(cospi[1]); - const __m128i cospi2 = _mm_set1_epi32(cospi[2]); - const __m128i cospi3 = _mm_set1_epi32(cospi[3]); - const __m128i cospi4 = _mm_set1_epi32(cospi[4]); - const __m128i cospi6 = _mm_set1_epi32(cospi[6]); - const __m128i cospi8 = _mm_set1_epi32(cospi[8]); - const __m128i cospi12 = _mm_set1_epi32(cospi[12]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospi20 = _mm_set1_epi32(cospi[20]); - const __m128i cospi24 = _mm_set1_epi32(cospi[24]); - const __m128i cospi28 = _mm_set1_epi32(cospi[28]); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i cospi40 = _mm_set1_epi32(cospi[40]); - const __m128i cospi44 = _mm_set1_epi32(cospi[44]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospi56 = _mm_set1_epi32(cospi[56]); - const __m128i cospi60 = _mm_set1_epi32(cospi[60]); - const __m128i cospim4 = _mm_set1_epi32(-cospi[4]); - const __m128i cospim8 = _mm_set1_epi32(-cospi[8]); - const __m128i cospim12 = _mm_set1_epi32(-cospi[12]); - const __m128i cospim16 = _mm_set1_epi32(-cospi[16]); - const __m128i cospim20 = _mm_set1_epi32(-cospi[20]); - const __m128i cospim24 = _mm_set1_epi32(-cospi[24]); - const __m128i cospim28 = _mm_set1_epi32(-cospi[28]); - const __m128i cospim32 = _mm_set1_epi32(-cospi[32]); - const __m128i cospim36 = _mm_set1_epi32(-cospi[36]); - const __m128i cospim40 = _mm_set1_epi32(-cospi[40]); - const __m128i cospim48 = _mm_set1_epi32(-cospi[48]); - const __m128i cospim52 = _mm_set1_epi32(-cospi[52]); - const __m128i cospim56 = _mm_set1_epi32(-cospi[56]); - const __m128i cospi63 = _mm_set1_epi32(cospi[63]); - const __m128i cospim57 = _mm_set1_epi32(-cospi[57]); - const __m128i cospi7 = _mm_set1_epi32(cospi[7]); - const __m128i cospi5 = _mm_set1_epi32(cospi[5]); - const __m128i cospi59 = _mm_set1_epi32(cospi[59]); - const __m128i cospim61 = _mm_set1_epi32(-cospi[61]); - const __m128i cospim58 = _mm_set1_epi32(-cospi[58]); - const __m128i cospi62 = _mm_set1_epi32(cospi[62]); - - { - __m128i u[64]; - - // stage 1 - u[0] = in[0]; - u[8] = in[4]; - u[16] = in[2]; - u[24] = in[6]; - u[32] = in[1]; - u[40] = in[5]; - u[48] = in[3]; - u[56] = in[7]; - - // stage 2 - u[63] = half_btf_0_sse4_1(&cospi1, &u[32], &rnding, bit); - u[32] = half_btf_0_sse4_1(&cospi63, &u[32], &rnding, bit); - u[39] = half_btf_0_sse4_1(&cospim57, &u[56], &rnding, bit); - u[56] = half_btf_0_sse4_1(&cospi7, &u[56], &rnding, bit); - u[55] = half_btf_0_sse4_1(&cospi5, &u[40], &rnding, bit); - u[40] = half_btf_0_sse4_1(&cospi59, &u[40], &rnding, bit); - u[47] = half_btf_0_sse4_1(&cospim61, &u[48], &rnding, bit); - u[48] = half_btf_0_sse4_1(&cospi3, &u[48], &rnding, bit); - - // stage 3 - u[31] = half_btf_0_sse4_1(&cospi2, &u[16], &rnding, bit); - u[16] = half_btf_0_sse4_1(&cospi62, &u[16], &rnding, bit); - u[23] = half_btf_0_sse4_1(&cospim58, &u[24], &rnding, bit); - u[24] = half_btf_0_sse4_1(&cospi6, &u[24], &rnding, bit); - u[33] = u[32]; - u[38] = u[39]; - u[41] = u[40]; - u[46] = u[47]; - u[49] = u[48]; - u[54] = u[55]; - u[57] = u[56]; - u[62] = u[63]; - - // stage 4 - __m128i temp1, temp2; - u[15] = half_btf_0_sse4_1(&cospi4, &u[8], &rnding, bit); - u[8] = half_btf_0_sse4_1(&cospi60, &u[8], &rnding, bit); - u[17] = u[16]; - u[22] = u[23]; - u[25] = u[24]; - u[30] = u[31]; - - temp1 = half_btf_sse4_1(&cospim4, &u[33], &cospi60, &u[62], &rnding, bit); - u[62] = half_btf_sse4_1(&cospi60, &u[33], &cospi4, &u[62], &rnding, bit); - u[33] = temp1; - - temp2 = half_btf_sse4_1(&cospim36, &u[38], &cospi28, &u[57], &rnding, bit); - u[38] = half_btf_sse4_1(&cospim28, &u[38], &cospim36, &u[57], &rnding, bit); - u[57] = temp2; - - temp1 = half_btf_sse4_1(&cospim20, &u[41], &cospi44, &u[54], &rnding, bit); - u[54] = half_btf_sse4_1(&cospi44, &u[41], &cospi20, &u[54], &rnding, bit); - u[41] = temp1; - - temp2 = half_btf_sse4_1(&cospim12, &u[46], &cospim52, &u[49], &rnding, bit); - u[49] = half_btf_sse4_1(&cospim52, &u[46], &cospi12, &u[49], &rnding, bit); - u[46] = temp2; - - // stage 5 - u[9] = u[8]; - u[14] = u[15]; - - temp1 = half_btf_sse4_1(&cospim8, &u[17], &cospi56, &u[30], &rnding, bit); - u[30] = half_btf_sse4_1(&cospi56, &u[17], &cospi8, &u[30], &rnding, bit); - u[17] = temp1; - - temp2 = half_btf_sse4_1(&cospim24, &u[22], &cospim40, &u[25], &rnding, bit); - u[25] = half_btf_sse4_1(&cospim40, &u[22], &cospi24, &u[25], &rnding, bit); - u[22] = temp2; - - u[35] = u[32]; - u[34] = u[33]; - u[36] = u[39]; - u[37] = u[38]; - u[43] = u[40]; - u[42] = u[41]; - u[44] = u[47]; - u[45] = u[46]; - u[51] = u[48]; - u[50] = u[49]; - u[52] = u[55]; - u[53] = u[54]; - u[59] = u[56]; - u[58] = u[57]; - u[60] = u[63]; - u[61] = u[62]; - - // stage 6 - temp1 = half_btf_0_sse4_1(&cospi32, &u[0], &rnding, bit); - u[1] = half_btf_0_sse4_1(&cospi32, &u[0], &rnding, bit); - u[0] = temp1; - - temp2 = half_btf_sse4_1(&cospim16, &u[9], &cospi48, &u[14], &rnding, bit); - u[14] = half_btf_sse4_1(&cospi48, &u[9], &cospi16, &u[14], &rnding, bit); - u[9] = temp2; - u[19] = u[16]; - u[18] = u[17]; - u[20] = u[23]; - u[21] = u[22]; - u[27] = u[24]; - u[26] = u[25]; - u[28] = u[31]; - u[29] = u[30]; - - temp1 = half_btf_sse4_1(&cospim8, &u[34], &cospi56, &u[61], &rnding, bit); - u[61] = half_btf_sse4_1(&cospi56, &u[34], &cospi8, &u[61], &rnding, bit); - u[34] = temp1; - temp2 = half_btf_sse4_1(&cospim8, &u[35], &cospi56, &u[60], &rnding, bit); - u[60] = half_btf_sse4_1(&cospi56, &u[35], &cospi8, &u[60], &rnding, bit); - u[35] = temp2; - temp1 = half_btf_sse4_1(&cospim56, &u[36], &cospim8, &u[59], &rnding, bit); - u[59] = half_btf_sse4_1(&cospim8, &u[36], &cospi56, &u[59], &rnding, bit); - u[36] = temp1; - temp2 = half_btf_sse4_1(&cospim56, &u[37], &cospim8, &u[58], &rnding, bit); - u[58] = half_btf_sse4_1(&cospim8, &u[37], &cospi56, &u[58], &rnding, bit); - u[37] = temp2; - temp1 = half_btf_sse4_1(&cospim40, &u[42], &cospi24, &u[53], &rnding, bit); - u[53] = half_btf_sse4_1(&cospi24, &u[42], &cospi40, &u[53], &rnding, bit); - u[42] = temp1; - temp2 = half_btf_sse4_1(&cospim40, &u[43], &cospi24, &u[52], &rnding, bit); - u[52] = half_btf_sse4_1(&cospi24, &u[43], &cospi40, &u[52], &rnding, bit); - u[43] = temp2; - temp1 = half_btf_sse4_1(&cospim24, &u[44], &cospim40, &u[51], &rnding, bit); - u[51] = half_btf_sse4_1(&cospim40, &u[44], &cospi24, &u[51], &rnding, bit); - u[44] = temp1; - temp2 = half_btf_sse4_1(&cospim24, &u[45], &cospim40, &u[50], &rnding, bit); - u[50] = half_btf_sse4_1(&cospim40, &u[45], &cospi24, &u[50], &rnding, bit); - u[45] = temp2; - - // stage 7 - u[3] = u[0]; - u[2] = u[1]; - u[11] = u[8]; - u[10] = u[9]; - u[12] = u[15]; - u[13] = u[14]; - - temp1 = half_btf_sse4_1(&cospim16, &u[18], &cospi48, &u[29], &rnding, bit); - u[29] = half_btf_sse4_1(&cospi48, &u[18], &cospi16, &u[29], &rnding, bit); - u[18] = temp1; - temp2 = half_btf_sse4_1(&cospim16, &u[19], &cospi48, &u[28], &rnding, bit); - u[28] = half_btf_sse4_1(&cospi48, &u[19], &cospi16, &u[28], &rnding, bit); - u[19] = temp2; - temp1 = half_btf_sse4_1(&cospim48, &u[20], &cospim16, &u[27], &rnding, bit); - u[27] = half_btf_sse4_1(&cospim16, &u[20], &cospi48, &u[27], &rnding, bit); - u[20] = temp1; - temp2 = half_btf_sse4_1(&cospim48, &u[21], &cospim16, &u[26], &rnding, bit); - u[26] = half_btf_sse4_1(&cospim16, &u[21], &cospi48, &u[26], &rnding, bit); - u[21] = temp2; - for (i = 32; i < 64; i += 16) { - for (j = i; j < i + 4; j++) { - addsub_sse4_1(u[j], u[j ^ 7], &u[j], &u[j ^ 7], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[j ^ 15], u[j ^ 8], &u[j ^ 15], &u[j ^ 8], &clamp_lo, - &clamp_hi); - } - } - - // stage 8 - u[7] = u[0]; - u[6] = u[1]; - u[5] = u[2]; - u[4] = u[3]; - u[9] = u[9]; - - idct64_stage8_sse4_1(u, &cospim32, &cospi32, &cospim16, &cospi48, &cospi16, - &cospim48, &clamp_lo, &clamp_hi, &rnding, bit); - - // stage 9 - idct64_stage9_sse4_1(u, &cospim32, &cospi32, &clamp_lo, &clamp_hi, &rnding, - bit); - - // stage 10 - idct64_stage10_sse4_1(u, &cospim32, &cospi32, &clamp_lo, &clamp_hi, &rnding, - bit); - - // stage 11 - idct64_stage11_sse4_1(u, out, do_cols, bd, out_shift, log_range); - } -} - -static void idct64x64_low16_sse4_1(__m128i *in, __m128i *out, int bit, - int do_cols, int bd, int out_shift) { - int i, j; - const int32_t *cospi = cospi_arr(bit); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - - const __m128i cospi1 = _mm_set1_epi32(cospi[1]); - const __m128i cospi2 = _mm_set1_epi32(cospi[2]); - const __m128i cospi3 = _mm_set1_epi32(cospi[3]); - const __m128i cospi4 = _mm_set1_epi32(cospi[4]); - const __m128i cospi5 = _mm_set1_epi32(cospi[5]); - const __m128i cospi6 = _mm_set1_epi32(cospi[6]); - const __m128i cospi7 = _mm_set1_epi32(cospi[7]); - const __m128i cospi8 = _mm_set1_epi32(cospi[8]); - const __m128i cospi9 = _mm_set1_epi32(cospi[9]); - const __m128i cospi10 = _mm_set1_epi32(cospi[10]); - const __m128i cospi11 = _mm_set1_epi32(cospi[11]); - const __m128i cospi12 = _mm_set1_epi32(cospi[12]); - const __m128i cospi13 = _mm_set1_epi32(cospi[13]); - const __m128i cospi14 = _mm_set1_epi32(cospi[14]); - const __m128i cospi15 = _mm_set1_epi32(cospi[15]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospi20 = _mm_set1_epi32(cospi[20]); - const __m128i cospi24 = _mm_set1_epi32(cospi[24]); - const __m128i cospi28 = _mm_set1_epi32(cospi[28]); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i cospi36 = _mm_set1_epi32(cospi[36]); - const __m128i cospi40 = _mm_set1_epi32(cospi[40]); - const __m128i cospi44 = _mm_set1_epi32(cospi[44]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospi51 = _mm_set1_epi32(cospi[51]); - const __m128i cospi52 = _mm_set1_epi32(cospi[52]); - const __m128i cospi54 = _mm_set1_epi32(cospi[54]); - const __m128i cospi55 = _mm_set1_epi32(cospi[55]); - const __m128i cospi56 = _mm_set1_epi32(cospi[56]); - const __m128i cospi59 = _mm_set1_epi32(cospi[59]); - const __m128i cospi60 = _mm_set1_epi32(cospi[60]); - const __m128i cospi62 = _mm_set1_epi32(cospi[62]); - const __m128i cospi63 = _mm_set1_epi32(cospi[63]); - - const __m128i cospim4 = _mm_set1_epi32(-cospi[4]); - const __m128i cospim8 = _mm_set1_epi32(-cospi[8]); - const __m128i cospim12 = _mm_set1_epi32(-cospi[12]); - const __m128i cospim16 = _mm_set1_epi32(-cospi[16]); - const __m128i cospim20 = _mm_set1_epi32(-cospi[20]); - const __m128i cospim24 = _mm_set1_epi32(-cospi[24]); - const __m128i cospim28 = _mm_set1_epi32(-cospi[28]); - const __m128i cospim32 = _mm_set1_epi32(-cospi[32]); - const __m128i cospim36 = _mm_set1_epi32(-cospi[36]); - const __m128i cospim40 = _mm_set1_epi32(-cospi[40]); - const __m128i cospim44 = _mm_set1_epi32(-cospi[44]); - const __m128i cospim48 = _mm_set1_epi32(-cospi[48]); - const __m128i cospim49 = _mm_set1_epi32(-cospi[49]); - const __m128i cospim50 = _mm_set1_epi32(-cospi[50]); - const __m128i cospim52 = _mm_set1_epi32(-cospi[52]); - const __m128i cospim53 = _mm_set1_epi32(-cospi[53]); - const __m128i cospim56 = _mm_set1_epi32(-cospi[56]); - const __m128i cospim57 = _mm_set1_epi32(-cospi[57]); - const __m128i cospim58 = _mm_set1_epi32(-cospi[58]); - const __m128i cospim60 = _mm_set1_epi32(-cospi[60]); - const __m128i cospim61 = _mm_set1_epi32(-cospi[61]); - - { - __m128i u[64]; - __m128i tmp1, tmp2, tmp3, tmp4; - // stage 1 - u[0] = in[0]; - u[32] = in[1]; - u[36] = in[9]; - u[40] = in[5]; - u[44] = in[13]; - u[48] = in[3]; - u[52] = in[11]; - u[56] = in[7]; - u[60] = in[15]; - u[16] = in[2]; - u[20] = in[10]; - u[24] = in[6]; - u[28] = in[14]; - u[4] = in[8]; - u[8] = in[4]; - u[12] = in[12]; - - // stage 2 - u[63] = half_btf_0_sse4_1(&cospi1, &u[32], &rnding, bit); - u[32] = half_btf_0_sse4_1(&cospi63, &u[32], &rnding, bit); - u[35] = half_btf_0_sse4_1(&cospim49, &u[60], &rnding, bit); - u[60] = half_btf_0_sse4_1(&cospi15, &u[60], &rnding, bit); - u[59] = half_btf_0_sse4_1(&cospi9, &u[36], &rnding, bit); - u[36] = half_btf_0_sse4_1(&cospi55, &u[36], &rnding, bit); - u[39] = half_btf_0_sse4_1(&cospim57, &u[56], &rnding, bit); - u[56] = half_btf_0_sse4_1(&cospi7, &u[56], &rnding, bit); - u[55] = half_btf_0_sse4_1(&cospi5, &u[40], &rnding, bit); - u[40] = half_btf_0_sse4_1(&cospi59, &u[40], &rnding, bit); - u[43] = half_btf_0_sse4_1(&cospim53, &u[52], &rnding, bit); - u[52] = half_btf_0_sse4_1(&cospi11, &u[52], &rnding, bit); - u[47] = half_btf_0_sse4_1(&cospim61, &u[48], &rnding, bit); - u[48] = half_btf_0_sse4_1(&cospi3, &u[48], &rnding, bit); - u[51] = half_btf_0_sse4_1(&cospi13, &u[44], &rnding, bit); - u[44] = half_btf_0_sse4_1(&cospi51, &u[44], &rnding, bit); - - // stage 3 - u[31] = half_btf_0_sse4_1(&cospi2, &u[16], &rnding, bit); - u[16] = half_btf_0_sse4_1(&cospi62, &u[16], &rnding, bit); - u[19] = half_btf_0_sse4_1(&cospim50, &u[28], &rnding, bit); - u[28] = half_btf_0_sse4_1(&cospi14, &u[28], &rnding, bit); - u[27] = half_btf_0_sse4_1(&cospi10, &u[20], &rnding, bit); - u[20] = half_btf_0_sse4_1(&cospi54, &u[20], &rnding, bit); - u[23] = half_btf_0_sse4_1(&cospim58, &u[24], &rnding, bit); - u[24] = half_btf_0_sse4_1(&cospi6, &u[24], &rnding, bit); - u[33] = u[32]; - u[34] = u[35]; - u[37] = u[36]; - u[38] = u[39]; - u[41] = u[40]; - u[42] = u[43]; - u[45] = u[44]; - u[46] = u[47]; - u[49] = u[48]; - u[50] = u[51]; - u[53] = u[52]; - u[54] = u[55]; - u[57] = u[56]; - u[58] = u[59]; - u[61] = u[60]; - u[62] = u[63]; - - // stage 4 - u[15] = half_btf_0_sse4_1(&cospi4, &u[8], &rnding, bit); - u[8] = half_btf_0_sse4_1(&cospi60, &u[8], &rnding, bit); - u[11] = half_btf_0_sse4_1(&cospim52, &u[12], &rnding, bit); - u[12] = half_btf_0_sse4_1(&cospi12, &u[12], &rnding, bit); - - u[17] = u[16]; - u[18] = u[19]; - u[21] = u[20]; - u[22] = u[23]; - u[25] = u[24]; - u[26] = u[27]; - u[29] = u[28]; - u[30] = u[31]; - - tmp1 = half_btf_sse4_1(&cospim4, &u[33], &cospi60, &u[62], &rnding, bit); - tmp2 = half_btf_sse4_1(&cospim60, &u[34], &cospim4, &u[61], &rnding, bit); - tmp3 = half_btf_sse4_1(&cospim36, &u[37], &cospi28, &u[58], &rnding, bit); - tmp4 = half_btf_sse4_1(&cospim28, &u[38], &cospim36, &u[57], &rnding, bit); - u[57] = half_btf_sse4_1(&cospim36, &u[38], &cospi28, &u[57], &rnding, bit); - u[58] = half_btf_sse4_1(&cospi28, &u[37], &cospi36, &u[58], &rnding, bit); - u[61] = half_btf_sse4_1(&cospim4, &u[34], &cospi60, &u[61], &rnding, bit); - u[62] = half_btf_sse4_1(&cospi60, &u[33], &cospi4, &u[62], &rnding, bit); - u[33] = tmp1; - u[34] = tmp2; - u[37] = tmp3; - u[38] = tmp4; - - tmp1 = half_btf_sse4_1(&cospim20, &u[41], &cospi44, &u[54], &rnding, bit); - tmp2 = half_btf_sse4_1(&cospim44, &u[42], &cospim20, &u[53], &rnding, bit); - tmp3 = half_btf_sse4_1(&cospim52, &u[45], &cospi12, &u[50], &rnding, bit); - tmp4 = half_btf_sse4_1(&cospim12, &u[46], &cospim52, &u[49], &rnding, bit); - u[49] = half_btf_sse4_1(&cospim52, &u[46], &cospi12, &u[49], &rnding, bit); - u[50] = half_btf_sse4_1(&cospi12, &u[45], &cospi52, &u[50], &rnding, bit); - u[53] = half_btf_sse4_1(&cospim20, &u[42], &cospi44, &u[53], &rnding, bit); - u[54] = half_btf_sse4_1(&cospi44, &u[41], &cospi20, &u[54], &rnding, bit); - u[41] = tmp1; - u[42] = tmp2; - u[45] = tmp3; - u[46] = tmp4; - - // stage 5 - u[7] = half_btf_0_sse4_1(&cospi8, &u[4], &rnding, bit); - u[4] = half_btf_0_sse4_1(&cospi56, &u[4], &rnding, bit); - - u[9] = u[8]; - u[10] = u[11]; - u[13] = u[12]; - u[14] = u[15]; - - tmp1 = half_btf_sse4_1(&cospim8, &u[17], &cospi56, &u[30], &rnding, bit); - tmp2 = half_btf_sse4_1(&cospim56, &u[18], &cospim8, &u[29], &rnding, bit); - tmp3 = half_btf_sse4_1(&cospim40, &u[21], &cospi24, &u[26], &rnding, bit); - tmp4 = half_btf_sse4_1(&cospim24, &u[22], &cospim40, &u[25], &rnding, bit); - u[25] = half_btf_sse4_1(&cospim40, &u[22], &cospi24, &u[25], &rnding, bit); - u[26] = half_btf_sse4_1(&cospi24, &u[21], &cospi40, &u[26], &rnding, bit); - u[29] = half_btf_sse4_1(&cospim8, &u[18], &cospi56, &u[29], &rnding, bit); - u[30] = half_btf_sse4_1(&cospi56, &u[17], &cospi8, &u[30], &rnding, bit); - u[17] = tmp1; - u[18] = tmp2; - u[21] = tmp3; - u[22] = tmp4; - - for (i = 32; i < 64; i += 8) { - addsub_sse4_1(u[i + 0], u[i + 3], &u[i + 0], &u[i + 3], &clamp_lo, - &clamp_hi); - addsub_sse4_1(u[i + 1], u[i + 2], &u[i + 1], &u[i + 2], &clamp_lo, - &clamp_hi); - - addsub_sse4_1(u[i + 7], u[i + 4], &u[i + 7], &u[i + 4], &clamp_lo, - &clamp_hi); - addsub_sse4_1(u[i + 6], u[i + 5], &u[i + 6], &u[i + 5], &clamp_lo, - &clamp_hi); - } - - // stage 6 - tmp1 = half_btf_0_sse4_1(&cospi32, &u[0], &rnding, bit); - u[1] = half_btf_0_sse4_1(&cospi32, &u[0], &rnding, bit); - u[0] = tmp1; - u[5] = u[4]; - u[6] = u[7]; - - tmp1 = half_btf_sse4_1(&cospim16, &u[9], &cospi48, &u[14], &rnding, bit); - u[14] = half_btf_sse4_1(&cospi48, &u[9], &cospi16, &u[14], &rnding, bit); - u[9] = tmp1; - tmp2 = half_btf_sse4_1(&cospim48, &u[10], &cospim16, &u[13], &rnding, bit); - u[13] = half_btf_sse4_1(&cospim16, &u[10], &cospi48, &u[13], &rnding, bit); - u[10] = tmp2; - - for (i = 16; i < 32; i += 8) { - addsub_sse4_1(u[i + 0], u[i + 3], &u[i + 0], &u[i + 3], &clamp_lo, - &clamp_hi); - addsub_sse4_1(u[i + 1], u[i + 2], &u[i + 1], &u[i + 2], &clamp_lo, - &clamp_hi); - - addsub_sse4_1(u[i + 7], u[i + 4], &u[i + 7], &u[i + 4], &clamp_lo, - &clamp_hi); - addsub_sse4_1(u[i + 6], u[i + 5], &u[i + 6], &u[i + 5], &clamp_lo, - &clamp_hi); - } - - tmp1 = half_btf_sse4_1(&cospim8, &u[34], &cospi56, &u[61], &rnding, bit); - tmp2 = half_btf_sse4_1(&cospim8, &u[35], &cospi56, &u[60], &rnding, bit); - tmp3 = half_btf_sse4_1(&cospim56, &u[36], &cospim8, &u[59], &rnding, bit); - tmp4 = half_btf_sse4_1(&cospim56, &u[37], &cospim8, &u[58], &rnding, bit); - u[58] = half_btf_sse4_1(&cospim8, &u[37], &cospi56, &u[58], &rnding, bit); - u[59] = half_btf_sse4_1(&cospim8, &u[36], &cospi56, &u[59], &rnding, bit); - u[60] = half_btf_sse4_1(&cospi56, &u[35], &cospi8, &u[60], &rnding, bit); - u[61] = half_btf_sse4_1(&cospi56, &u[34], &cospi8, &u[61], &rnding, bit); - u[34] = tmp1; - u[35] = tmp2; - u[36] = tmp3; - u[37] = tmp4; - - tmp1 = half_btf_sse4_1(&cospim40, &u[42], &cospi24, &u[53], &rnding, bit); - tmp2 = half_btf_sse4_1(&cospim40, &u[43], &cospi24, &u[52], &rnding, bit); - tmp3 = half_btf_sse4_1(&cospim24, &u[44], &cospim40, &u[51], &rnding, bit); - tmp4 = half_btf_sse4_1(&cospim24, &u[45], &cospim40, &u[50], &rnding, bit); - u[50] = half_btf_sse4_1(&cospim40, &u[45], &cospi24, &u[50], &rnding, bit); - u[51] = half_btf_sse4_1(&cospim40, &u[44], &cospi24, &u[51], &rnding, bit); - u[52] = half_btf_sse4_1(&cospi24, &u[43], &cospi40, &u[52], &rnding, bit); - u[53] = half_btf_sse4_1(&cospi24, &u[42], &cospi40, &u[53], &rnding, bit); - u[42] = tmp1; - u[43] = tmp2; - u[44] = tmp3; - u[45] = tmp4; - - // stage 7 - u[3] = u[0]; - u[2] = u[1]; - tmp1 = half_btf_sse4_1(&cospim32, &u[5], &cospi32, &u[6], &rnding, bit); - u[6] = half_btf_sse4_1(&cospi32, &u[5], &cospi32, &u[6], &rnding, bit); - u[5] = tmp1; - addsub_sse4_1(u[8], u[11], &u[8], &u[11], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[9], u[10], &u[9], &u[10], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[15], u[12], &u[15], &u[12], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[14], u[13], &u[14], &u[13], &clamp_lo, &clamp_hi); - - tmp1 = half_btf_sse4_1(&cospim16, &u[18], &cospi48, &u[29], &rnding, bit); - tmp2 = half_btf_sse4_1(&cospim16, &u[19], &cospi48, &u[28], &rnding, bit); - tmp3 = half_btf_sse4_1(&cospim48, &u[20], &cospim16, &u[27], &rnding, bit); - tmp4 = half_btf_sse4_1(&cospim48, &u[21], &cospim16, &u[26], &rnding, bit); - u[26] = half_btf_sse4_1(&cospim16, &u[21], &cospi48, &u[26], &rnding, bit); - u[27] = half_btf_sse4_1(&cospim16, &u[20], &cospi48, &u[27], &rnding, bit); - u[28] = half_btf_sse4_1(&cospi48, &u[19], &cospi16, &u[28], &rnding, bit); - u[29] = half_btf_sse4_1(&cospi48, &u[18], &cospi16, &u[29], &rnding, bit); - u[18] = tmp1; - u[19] = tmp2; - u[20] = tmp3; - u[21] = tmp4; - - for (i = 32; i < 64; i += 16) { - for (j = i; j < i + 4; j++) { - addsub_sse4_1(u[j], u[j ^ 7], &u[j], &u[j ^ 7], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[j ^ 15], u[j ^ 8], &u[j ^ 15], &u[j ^ 8], &clamp_lo, - &clamp_hi); - } - } - - // stage 8 - for (i = 0; i < 4; ++i) { - addsub_sse4_1(u[i], u[7 - i], &u[i], &u[7 - i], &clamp_lo, &clamp_hi); - } - - idct64_stage8_sse4_1(u, &cospim32, &cospi32, &cospim16, &cospi48, &cospi16, - &cospim48, &clamp_lo, &clamp_hi, &rnding, bit); - - // stage 9 - idct64_stage9_sse4_1(u, &cospim32, &cospi32, &clamp_lo, &clamp_hi, &rnding, - bit); - - // stage 10 - idct64_stage10_sse4_1(u, &cospim32, &cospi32, &clamp_lo, &clamp_hi, &rnding, - bit); - - // stage 11 - idct64_stage11_sse4_1(u, out, do_cols, bd, out_shift, log_range); - } -} - -static void idct64x64_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols, - int bd, int out_shift) { - int i, j; - const int32_t *cospi = cospi_arr(bit); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - - const __m128i cospi1 = _mm_set1_epi32(cospi[1]); - const __m128i cospi2 = _mm_set1_epi32(cospi[2]); - const __m128i cospi3 = _mm_set1_epi32(cospi[3]); - const __m128i cospi4 = _mm_set1_epi32(cospi[4]); - const __m128i cospi5 = _mm_set1_epi32(cospi[5]); - const __m128i cospi6 = _mm_set1_epi32(cospi[6]); - const __m128i cospi7 = _mm_set1_epi32(cospi[7]); - const __m128i cospi8 = _mm_set1_epi32(cospi[8]); - const __m128i cospi9 = _mm_set1_epi32(cospi[9]); - const __m128i cospi10 = _mm_set1_epi32(cospi[10]); - const __m128i cospi11 = _mm_set1_epi32(cospi[11]); - const __m128i cospi12 = _mm_set1_epi32(cospi[12]); - const __m128i cospi13 = _mm_set1_epi32(cospi[13]); - const __m128i cospi14 = _mm_set1_epi32(cospi[14]); - const __m128i cospi15 = _mm_set1_epi32(cospi[15]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospi17 = _mm_set1_epi32(cospi[17]); - const __m128i cospi18 = _mm_set1_epi32(cospi[18]); - const __m128i cospi19 = _mm_set1_epi32(cospi[19]); - const __m128i cospi20 = _mm_set1_epi32(cospi[20]); - const __m128i cospi21 = _mm_set1_epi32(cospi[21]); - const __m128i cospi22 = _mm_set1_epi32(cospi[22]); - const __m128i cospi23 = _mm_set1_epi32(cospi[23]); - const __m128i cospi24 = _mm_set1_epi32(cospi[24]); - const __m128i cospi25 = _mm_set1_epi32(cospi[25]); - const __m128i cospi26 = _mm_set1_epi32(cospi[26]); - const __m128i cospi27 = _mm_set1_epi32(cospi[27]); - const __m128i cospi28 = _mm_set1_epi32(cospi[28]); - const __m128i cospi29 = _mm_set1_epi32(cospi[29]); - const __m128i cospi30 = _mm_set1_epi32(cospi[30]); - const __m128i cospi31 = _mm_set1_epi32(cospi[31]); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i cospi35 = _mm_set1_epi32(cospi[35]); - const __m128i cospi36 = _mm_set1_epi32(cospi[36]); - const __m128i cospi38 = _mm_set1_epi32(cospi[38]); - const __m128i cospi39 = _mm_set1_epi32(cospi[39]); - const __m128i cospi40 = _mm_set1_epi32(cospi[40]); - const __m128i cospi43 = _mm_set1_epi32(cospi[43]); - const __m128i cospi44 = _mm_set1_epi32(cospi[44]); - const __m128i cospi46 = _mm_set1_epi32(cospi[46]); - const __m128i cospi47 = _mm_set1_epi32(cospi[47]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospi51 = _mm_set1_epi32(cospi[51]); - const __m128i cospi52 = _mm_set1_epi32(cospi[52]); - const __m128i cospi54 = _mm_set1_epi32(cospi[54]); - const __m128i cospi55 = _mm_set1_epi32(cospi[55]); - const __m128i cospi56 = _mm_set1_epi32(cospi[56]); - const __m128i cospi59 = _mm_set1_epi32(cospi[59]); - const __m128i cospi60 = _mm_set1_epi32(cospi[60]); - const __m128i cospi62 = _mm_set1_epi32(cospi[62]); - const __m128i cospi63 = _mm_set1_epi32(cospi[63]); - - const __m128i cospim4 = _mm_set1_epi32(-cospi[4]); - const __m128i cospim8 = _mm_set1_epi32(-cospi[8]); - const __m128i cospim12 = _mm_set1_epi32(-cospi[12]); - const __m128i cospim16 = _mm_set1_epi32(-cospi[16]); - const __m128i cospim20 = _mm_set1_epi32(-cospi[20]); - const __m128i cospim24 = _mm_set1_epi32(-cospi[24]); - const __m128i cospim28 = _mm_set1_epi32(-cospi[28]); - const __m128i cospim32 = _mm_set1_epi32(-cospi[32]); - const __m128i cospim33 = _mm_set1_epi32(-cospi[33]); - const __m128i cospim34 = _mm_set1_epi32(-cospi[34]); - const __m128i cospim36 = _mm_set1_epi32(-cospi[36]); - const __m128i cospim37 = _mm_set1_epi32(-cospi[37]); - const __m128i cospim40 = _mm_set1_epi32(-cospi[40]); - const __m128i cospim41 = _mm_set1_epi32(-cospi[41]); - const __m128i cospim42 = _mm_set1_epi32(-cospi[42]); - const __m128i cospim44 = _mm_set1_epi32(-cospi[44]); - const __m128i cospim45 = _mm_set1_epi32(-cospi[45]); - const __m128i cospim48 = _mm_set1_epi32(-cospi[48]); - const __m128i cospim49 = _mm_set1_epi32(-cospi[49]); - const __m128i cospim50 = _mm_set1_epi32(-cospi[50]); - const __m128i cospim52 = _mm_set1_epi32(-cospi[52]); - const __m128i cospim53 = _mm_set1_epi32(-cospi[53]); - const __m128i cospim56 = _mm_set1_epi32(-cospi[56]); - const __m128i cospim57 = _mm_set1_epi32(-cospi[57]); - const __m128i cospim58 = _mm_set1_epi32(-cospi[58]); - const __m128i cospim60 = _mm_set1_epi32(-cospi[60]); - const __m128i cospim61 = _mm_set1_epi32(-cospi[61]); - - { - __m128i u[64], v[64]; - - // stage 1 - u[32] = in[1]; - u[34] = in[17]; - u[36] = in[9]; - u[38] = in[25]; - u[40] = in[5]; - u[42] = in[21]; - u[44] = in[13]; - u[46] = in[29]; - u[48] = in[3]; - u[50] = in[19]; - u[52] = in[11]; - u[54] = in[27]; - u[56] = in[7]; - u[58] = in[23]; - u[60] = in[15]; - u[62] = in[31]; - - v[16] = in[2]; - v[18] = in[18]; - v[20] = in[10]; - v[22] = in[26]; - v[24] = in[6]; - v[26] = in[22]; - v[28] = in[14]; - v[30] = in[30]; - - u[8] = in[4]; - u[10] = in[20]; - u[12] = in[12]; - u[14] = in[28]; - - v[4] = in[8]; - v[6] = in[24]; - - u[0] = in[0]; - u[2] = in[16]; - - // stage 2 - v[32] = half_btf_0_sse4_1(&cospi63, &u[32], &rnding, bit); - v[33] = half_btf_0_sse4_1(&cospim33, &u[62], &rnding, bit); - v[34] = half_btf_0_sse4_1(&cospi47, &u[34], &rnding, bit); - v[35] = half_btf_0_sse4_1(&cospim49, &u[60], &rnding, bit); - v[36] = half_btf_0_sse4_1(&cospi55, &u[36], &rnding, bit); - v[37] = half_btf_0_sse4_1(&cospim41, &u[58], &rnding, bit); - v[38] = half_btf_0_sse4_1(&cospi39, &u[38], &rnding, bit); - v[39] = half_btf_0_sse4_1(&cospim57, &u[56], &rnding, bit); - v[40] = half_btf_0_sse4_1(&cospi59, &u[40], &rnding, bit); - v[41] = half_btf_0_sse4_1(&cospim37, &u[54], &rnding, bit); - v[42] = half_btf_0_sse4_1(&cospi43, &u[42], &rnding, bit); - v[43] = half_btf_0_sse4_1(&cospim53, &u[52], &rnding, bit); - v[44] = half_btf_0_sse4_1(&cospi51, &u[44], &rnding, bit); - v[45] = half_btf_0_sse4_1(&cospim45, &u[50], &rnding, bit); - v[46] = half_btf_0_sse4_1(&cospi35, &u[46], &rnding, bit); - v[47] = half_btf_0_sse4_1(&cospim61, &u[48], &rnding, bit); - v[48] = half_btf_0_sse4_1(&cospi3, &u[48], &rnding, bit); - v[49] = half_btf_0_sse4_1(&cospi29, &u[46], &rnding, bit); - v[50] = half_btf_0_sse4_1(&cospi19, &u[50], &rnding, bit); - v[51] = half_btf_0_sse4_1(&cospi13, &u[44], &rnding, bit); - v[52] = half_btf_0_sse4_1(&cospi11, &u[52], &rnding, bit); - v[53] = half_btf_0_sse4_1(&cospi21, &u[42], &rnding, bit); - v[54] = half_btf_0_sse4_1(&cospi27, &u[54], &rnding, bit); - v[55] = half_btf_0_sse4_1(&cospi5, &u[40], &rnding, bit); - v[56] = half_btf_0_sse4_1(&cospi7, &u[56], &rnding, bit); - v[57] = half_btf_0_sse4_1(&cospi25, &u[38], &rnding, bit); - v[58] = half_btf_0_sse4_1(&cospi23, &u[58], &rnding, bit); - v[59] = half_btf_0_sse4_1(&cospi9, &u[36], &rnding, bit); - v[60] = half_btf_0_sse4_1(&cospi15, &u[60], &rnding, bit); - v[61] = half_btf_0_sse4_1(&cospi17, &u[34], &rnding, bit); - v[62] = half_btf_0_sse4_1(&cospi31, &u[62], &rnding, bit); - v[63] = half_btf_0_sse4_1(&cospi1, &u[32], &rnding, bit); - - // stage 3 - u[16] = half_btf_0_sse4_1(&cospi62, &v[16], &rnding, bit); - u[17] = half_btf_0_sse4_1(&cospim34, &v[30], &rnding, bit); - u[18] = half_btf_0_sse4_1(&cospi46, &v[18], &rnding, bit); - u[19] = half_btf_0_sse4_1(&cospim50, &v[28], &rnding, bit); - u[20] = half_btf_0_sse4_1(&cospi54, &v[20], &rnding, bit); - u[21] = half_btf_0_sse4_1(&cospim42, &v[26], &rnding, bit); - u[22] = half_btf_0_sse4_1(&cospi38, &v[22], &rnding, bit); - u[23] = half_btf_0_sse4_1(&cospim58, &v[24], &rnding, bit); - u[24] = half_btf_0_sse4_1(&cospi6, &v[24], &rnding, bit); - u[25] = half_btf_0_sse4_1(&cospi26, &v[22], &rnding, bit); - u[26] = half_btf_0_sse4_1(&cospi22, &v[26], &rnding, bit); - u[27] = half_btf_0_sse4_1(&cospi10, &v[20], &rnding, bit); - u[28] = half_btf_0_sse4_1(&cospi14, &v[28], &rnding, bit); - u[29] = half_btf_0_sse4_1(&cospi18, &v[18], &rnding, bit); - u[30] = half_btf_0_sse4_1(&cospi30, &v[30], &rnding, bit); - u[31] = half_btf_0_sse4_1(&cospi2, &v[16], &rnding, bit); - - for (i = 32; i < 64; i += 4) { - addsub_sse4_1(v[i + 0], v[i + 1], &u[i + 0], &u[i + 1], &clamp_lo, - &clamp_hi); - addsub_sse4_1(v[i + 3], v[i + 2], &u[i + 3], &u[i + 2], &clamp_lo, - &clamp_hi); - } - - // stage 4 - v[8] = half_btf_0_sse4_1(&cospi60, &u[8], &rnding, bit); - v[9] = half_btf_0_sse4_1(&cospim36, &u[14], &rnding, bit); - v[10] = half_btf_0_sse4_1(&cospi44, &u[10], &rnding, bit); - v[11] = half_btf_0_sse4_1(&cospim52, &u[12], &rnding, bit); - v[12] = half_btf_0_sse4_1(&cospi12, &u[12], &rnding, bit); - v[13] = half_btf_0_sse4_1(&cospi20, &u[10], &rnding, bit); - v[14] = half_btf_0_sse4_1(&cospi28, &u[14], &rnding, bit); - v[15] = half_btf_0_sse4_1(&cospi4, &u[8], &rnding, bit); - - for (i = 16; i < 32; i += 4) { - addsub_sse4_1(u[i + 0], u[i + 1], &v[i + 0], &v[i + 1], &clamp_lo, - &clamp_hi); - addsub_sse4_1(u[i + 3], u[i + 2], &v[i + 3], &v[i + 2], &clamp_lo, - &clamp_hi); - } - - for (i = 32; i < 64; i += 4) { - v[i + 0] = u[i + 0]; - v[i + 3] = u[i + 3]; - } - - v[33] = half_btf_sse4_1(&cospim4, &u[33], &cospi60, &u[62], &rnding, bit); - v[34] = half_btf_sse4_1(&cospim60, &u[34], &cospim4, &u[61], &rnding, bit); - v[37] = half_btf_sse4_1(&cospim36, &u[37], &cospi28, &u[58], &rnding, bit); - v[38] = half_btf_sse4_1(&cospim28, &u[38], &cospim36, &u[57], &rnding, bit); - v[41] = half_btf_sse4_1(&cospim20, &u[41], &cospi44, &u[54], &rnding, bit); - v[42] = half_btf_sse4_1(&cospim44, &u[42], &cospim20, &u[53], &rnding, bit); - v[45] = half_btf_sse4_1(&cospim52, &u[45], &cospi12, &u[50], &rnding, bit); - v[46] = half_btf_sse4_1(&cospim12, &u[46], &cospim52, &u[49], &rnding, bit); - v[49] = half_btf_sse4_1(&cospim52, &u[46], &cospi12, &u[49], &rnding, bit); - v[50] = half_btf_sse4_1(&cospi12, &u[45], &cospi52, &u[50], &rnding, bit); - v[53] = half_btf_sse4_1(&cospim20, &u[42], &cospi44, &u[53], &rnding, bit); - v[54] = half_btf_sse4_1(&cospi44, &u[41], &cospi20, &u[54], &rnding, bit); - v[57] = half_btf_sse4_1(&cospim36, &u[38], &cospi28, &u[57], &rnding, bit); - v[58] = half_btf_sse4_1(&cospi28, &u[37], &cospi36, &u[58], &rnding, bit); - v[61] = half_btf_sse4_1(&cospim4, &u[34], &cospi60, &u[61], &rnding, bit); - v[62] = half_btf_sse4_1(&cospi60, &u[33], &cospi4, &u[62], &rnding, bit); - - // stage 5 - u[4] = half_btf_0_sse4_1(&cospi56, &v[4], &rnding, bit); - u[5] = half_btf_0_sse4_1(&cospim40, &v[6], &rnding, bit); - u[6] = half_btf_0_sse4_1(&cospi24, &v[6], &rnding, bit); - u[7] = half_btf_0_sse4_1(&cospi8, &v[4], &rnding, bit); - - for (i = 8; i < 16; i += 4) { - addsub_sse4_1(v[i + 0], v[i + 1], &u[i + 0], &u[i + 1], &clamp_lo, - &clamp_hi); - addsub_sse4_1(v[i + 3], v[i + 2], &u[i + 3], &u[i + 2], &clamp_lo, - &clamp_hi); - } - - for (i = 16; i < 32; i += 4) { - u[i + 0] = v[i + 0]; - u[i + 3] = v[i + 3]; - } - - u[17] = half_btf_sse4_1(&cospim8, &v[17], &cospi56, &v[30], &rnding, bit); - u[18] = half_btf_sse4_1(&cospim56, &v[18], &cospim8, &v[29], &rnding, bit); - u[21] = half_btf_sse4_1(&cospim40, &v[21], &cospi24, &v[26], &rnding, bit); - u[22] = half_btf_sse4_1(&cospim24, &v[22], &cospim40, &v[25], &rnding, bit); - u[25] = half_btf_sse4_1(&cospim40, &v[22], &cospi24, &v[25], &rnding, bit); - u[26] = half_btf_sse4_1(&cospi24, &v[21], &cospi40, &v[26], &rnding, bit); - u[29] = half_btf_sse4_1(&cospim8, &v[18], &cospi56, &v[29], &rnding, bit); - u[30] = half_btf_sse4_1(&cospi56, &v[17], &cospi8, &v[30], &rnding, bit); - - for (i = 32; i < 64; i += 8) { - addsub_sse4_1(v[i + 0], v[i + 3], &u[i + 0], &u[i + 3], &clamp_lo, - &clamp_hi); - addsub_sse4_1(v[i + 1], v[i + 2], &u[i + 1], &u[i + 2], &clamp_lo, - &clamp_hi); - - addsub_sse4_1(v[i + 7], v[i + 4], &u[i + 7], &u[i + 4], &clamp_lo, - &clamp_hi); - addsub_sse4_1(v[i + 6], v[i + 5], &u[i + 6], &u[i + 5], &clamp_lo, - &clamp_hi); - } - - // stage 6 - v[0] = half_btf_0_sse4_1(&cospi32, &u[0], &rnding, bit); - v[1] = half_btf_0_sse4_1(&cospi32, &u[0], &rnding, bit); - v[2] = half_btf_0_sse4_1(&cospi48, &u[2], &rnding, bit); - v[3] = half_btf_0_sse4_1(&cospi16, &u[2], &rnding, bit); - - addsub_sse4_1(u[4], u[5], &v[4], &v[5], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[7], u[6], &v[7], &v[6], &clamp_lo, &clamp_hi); - - for (i = 8; i < 16; i += 4) { - v[i + 0] = u[i + 0]; - v[i + 3] = u[i + 3]; - } - - v[9] = half_btf_sse4_1(&cospim16, &u[9], &cospi48, &u[14], &rnding, bit); - v[10] = half_btf_sse4_1(&cospim48, &u[10], &cospim16, &u[13], &rnding, bit); - v[13] = half_btf_sse4_1(&cospim16, &u[10], &cospi48, &u[13], &rnding, bit); - v[14] = half_btf_sse4_1(&cospi48, &u[9], &cospi16, &u[14], &rnding, bit); - - for (i = 16; i < 32; i += 8) { - addsub_sse4_1(u[i + 0], u[i + 3], &v[i + 0], &v[i + 3], &clamp_lo, - &clamp_hi); - addsub_sse4_1(u[i + 1], u[i + 2], &v[i + 1], &v[i + 2], &clamp_lo, - &clamp_hi); - - addsub_sse4_1(u[i + 7], u[i + 4], &v[i + 7], &v[i + 4], &clamp_lo, - &clamp_hi); - addsub_sse4_1(u[i + 6], u[i + 5], &v[i + 6], &v[i + 5], &clamp_lo, - &clamp_hi); - } - - for (i = 32; i < 64; i += 8) { - v[i + 0] = u[i + 0]; - v[i + 1] = u[i + 1]; - v[i + 6] = u[i + 6]; - v[i + 7] = u[i + 7]; - } - - v[34] = half_btf_sse4_1(&cospim8, &u[34], &cospi56, &u[61], &rnding, bit); - v[35] = half_btf_sse4_1(&cospim8, &u[35], &cospi56, &u[60], &rnding, bit); - v[36] = half_btf_sse4_1(&cospim56, &u[36], &cospim8, &u[59], &rnding, bit); - v[37] = half_btf_sse4_1(&cospim56, &u[37], &cospim8, &u[58], &rnding, bit); - v[42] = half_btf_sse4_1(&cospim40, &u[42], &cospi24, &u[53], &rnding, bit); - v[43] = half_btf_sse4_1(&cospim40, &u[43], &cospi24, &u[52], &rnding, bit); - v[44] = half_btf_sse4_1(&cospim24, &u[44], &cospim40, &u[51], &rnding, bit); - v[45] = half_btf_sse4_1(&cospim24, &u[45], &cospim40, &u[50], &rnding, bit); - v[50] = half_btf_sse4_1(&cospim40, &u[45], &cospi24, &u[50], &rnding, bit); - v[51] = half_btf_sse4_1(&cospim40, &u[44], &cospi24, &u[51], &rnding, bit); - v[52] = half_btf_sse4_1(&cospi24, &u[43], &cospi40, &u[52], &rnding, bit); - v[53] = half_btf_sse4_1(&cospi24, &u[42], &cospi40, &u[53], &rnding, bit); - v[58] = half_btf_sse4_1(&cospim8, &u[37], &cospi56, &u[58], &rnding, bit); - v[59] = half_btf_sse4_1(&cospim8, &u[36], &cospi56, &u[59], &rnding, bit); - v[60] = half_btf_sse4_1(&cospi56, &u[35], &cospi8, &u[60], &rnding, bit); - v[61] = half_btf_sse4_1(&cospi56, &u[34], &cospi8, &u[61], &rnding, bit); - - // stage 7 - addsub_sse4_1(v[0], v[3], &u[0], &u[3], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[1], v[2], &u[1], &u[2], &clamp_lo, &clamp_hi); - - u[4] = v[4]; - u[7] = v[7]; - u[5] = half_btf_sse4_1(&cospim32, &v[5], &cospi32, &v[6], &rnding, bit); - u[6] = half_btf_sse4_1(&cospi32, &v[5], &cospi32, &v[6], &rnding, bit); - - addsub_sse4_1(v[8], v[11], &u[8], &u[11], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[9], v[10], &u[9], &u[10], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[15], v[12], &u[15], &u[12], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[14], v[13], &u[14], &u[13], &clamp_lo, &clamp_hi); - - for (i = 16; i < 32; i += 8) { - u[i + 0] = v[i + 0]; - u[i + 1] = v[i + 1]; - u[i + 6] = v[i + 6]; - u[i + 7] = v[i + 7]; - } - - u[18] = half_btf_sse4_1(&cospim16, &v[18], &cospi48, &v[29], &rnding, bit); - u[19] = half_btf_sse4_1(&cospim16, &v[19], &cospi48, &v[28], &rnding, bit); - u[20] = half_btf_sse4_1(&cospim48, &v[20], &cospim16, &v[27], &rnding, bit); - u[21] = half_btf_sse4_1(&cospim48, &v[21], &cospim16, &v[26], &rnding, bit); - u[26] = half_btf_sse4_1(&cospim16, &v[21], &cospi48, &v[26], &rnding, bit); - u[27] = half_btf_sse4_1(&cospim16, &v[20], &cospi48, &v[27], &rnding, bit); - u[28] = half_btf_sse4_1(&cospi48, &v[19], &cospi16, &v[28], &rnding, bit); - u[29] = half_btf_sse4_1(&cospi48, &v[18], &cospi16, &v[29], &rnding, bit); - - for (i = 32; i < 64; i += 16) { - for (j = i; j < i + 4; j++) { - addsub_sse4_1(v[j], v[j ^ 7], &u[j], &u[j ^ 7], &clamp_lo, &clamp_hi); - addsub_sse4_1(v[j ^ 15], v[j ^ 8], &u[j ^ 15], &u[j ^ 8], &clamp_lo, - &clamp_hi); - } - } - - // stage 8 - for (i = 0; i < 4; ++i) { - addsub_sse4_1(u[i], u[7 - i], &v[i], &v[7 - i], &clamp_lo, &clamp_hi); - } - - v[8] = u[8]; - v[9] = u[9]; - v[14] = u[14]; - v[15] = u[15]; - - v[10] = half_btf_sse4_1(&cospim32, &u[10], &cospi32, &u[13], &rnding, bit); - v[11] = half_btf_sse4_1(&cospim32, &u[11], &cospi32, &u[12], &rnding, bit); - v[12] = half_btf_sse4_1(&cospi32, &u[11], &cospi32, &u[12], &rnding, bit); - v[13] = half_btf_sse4_1(&cospi32, &u[10], &cospi32, &u[13], &rnding, bit); - - for (i = 16; i < 20; ++i) { - addsub_sse4_1(u[i], u[i ^ 7], &v[i], &v[i ^ 7], &clamp_lo, &clamp_hi); - addsub_sse4_1(u[i ^ 15], u[i ^ 8], &v[i ^ 15], &v[i ^ 8], &clamp_lo, - &clamp_hi); - } - - for (i = 32; i < 36; ++i) { - v[i] = u[i]; - v[i + 12] = u[i + 12]; - v[i + 16] = u[i + 16]; - v[i + 28] = u[i + 28]; - } - - v[36] = half_btf_sse4_1(&cospim16, &u[36], &cospi48, &u[59], &rnding, bit); - v[37] = half_btf_sse4_1(&cospim16, &u[37], &cospi48, &u[58], &rnding, bit); - v[38] = half_btf_sse4_1(&cospim16, &u[38], &cospi48, &u[57], &rnding, bit); - v[39] = half_btf_sse4_1(&cospim16, &u[39], &cospi48, &u[56], &rnding, bit); - v[40] = half_btf_sse4_1(&cospim48, &u[40], &cospim16, &u[55], &rnding, bit); - v[41] = half_btf_sse4_1(&cospim48, &u[41], &cospim16, &u[54], &rnding, bit); - v[42] = half_btf_sse4_1(&cospim48, &u[42], &cospim16, &u[53], &rnding, bit); - v[43] = half_btf_sse4_1(&cospim48, &u[43], &cospim16, &u[52], &rnding, bit); - v[52] = half_btf_sse4_1(&cospim16, &u[43], &cospi48, &u[52], &rnding, bit); - v[53] = half_btf_sse4_1(&cospim16, &u[42], &cospi48, &u[53], &rnding, bit); - v[54] = half_btf_sse4_1(&cospim16, &u[41], &cospi48, &u[54], &rnding, bit); - v[55] = half_btf_sse4_1(&cospim16, &u[40], &cospi48, &u[55], &rnding, bit); - v[56] = half_btf_sse4_1(&cospi48, &u[39], &cospi16, &u[56], &rnding, bit); - v[57] = half_btf_sse4_1(&cospi48, &u[38], &cospi16, &u[57], &rnding, bit); - v[58] = half_btf_sse4_1(&cospi48, &u[37], &cospi16, &u[58], &rnding, bit); - v[59] = half_btf_sse4_1(&cospi48, &u[36], &cospi16, &u[59], &rnding, bit); - - // stage 9 - for (i = 0; i < 8; ++i) { - addsub_sse4_1(v[i], v[15 - i], &u[i], &u[15 - i], &clamp_lo, &clamp_hi); - } - - for (i = 16; i < 20; ++i) { - u[i] = v[i]; - u[i + 12] = v[i + 12]; - } - - u[20] = half_btf_sse4_1(&cospim32, &v[20], &cospi32, &v[27], &rnding, bit); - u[21] = half_btf_sse4_1(&cospim32, &v[21], &cospi32, &v[26], &rnding, bit); - u[22] = half_btf_sse4_1(&cospim32, &v[22], &cospi32, &v[25], &rnding, bit); - u[23] = half_btf_sse4_1(&cospim32, &v[23], &cospi32, &v[24], &rnding, bit); - u[24] = half_btf_sse4_1(&cospi32, &v[23], &cospi32, &v[24], &rnding, bit); - u[25] = half_btf_sse4_1(&cospi32, &v[22], &cospi32, &v[25], &rnding, bit); - u[26] = half_btf_sse4_1(&cospi32, &v[21], &cospi32, &v[26], &rnding, bit); - u[27] = half_btf_sse4_1(&cospi32, &v[20], &cospi32, &v[27], &rnding, bit); - - for (i = 32; i < 40; i++) { - addsub_sse4_1(v[i], v[i ^ 15], &u[i], &u[i ^ 15], &clamp_lo, &clamp_hi); - } - - for (i = 48; i < 56; i++) { - addsub_sse4_1(v[i ^ 15], v[i], &u[i ^ 15], &u[i], &clamp_lo, &clamp_hi); - } - - // stage 10 - for (i = 0; i < 16; i++) { - addsub_sse4_1(u[i], u[31 - i], &v[i], &v[31 - i], &clamp_lo, &clamp_hi); - } - - for (i = 32; i < 40; i++) v[i] = u[i]; - - v[40] = half_btf_sse4_1(&cospim32, &u[40], &cospi32, &u[55], &rnding, bit); - v[41] = half_btf_sse4_1(&cospim32, &u[41], &cospi32, &u[54], &rnding, bit); - v[42] = half_btf_sse4_1(&cospim32, &u[42], &cospi32, &u[53], &rnding, bit); - v[43] = half_btf_sse4_1(&cospim32, &u[43], &cospi32, &u[52], &rnding, bit); - v[44] = half_btf_sse4_1(&cospim32, &u[44], &cospi32, &u[51], &rnding, bit); - v[45] = half_btf_sse4_1(&cospim32, &u[45], &cospi32, &u[50], &rnding, bit); - v[46] = half_btf_sse4_1(&cospim32, &u[46], &cospi32, &u[49], &rnding, bit); - v[47] = half_btf_sse4_1(&cospim32, &u[47], &cospi32, &u[48], &rnding, bit); - v[48] = half_btf_sse4_1(&cospi32, &u[47], &cospi32, &u[48], &rnding, bit); - v[49] = half_btf_sse4_1(&cospi32, &u[46], &cospi32, &u[49], &rnding, bit); - v[50] = half_btf_sse4_1(&cospi32, &u[45], &cospi32, &u[50], &rnding, bit); - v[51] = half_btf_sse4_1(&cospi32, &u[44], &cospi32, &u[51], &rnding, bit); - v[52] = half_btf_sse4_1(&cospi32, &u[43], &cospi32, &u[52], &rnding, bit); - v[53] = half_btf_sse4_1(&cospi32, &u[42], &cospi32, &u[53], &rnding, bit); - v[54] = half_btf_sse4_1(&cospi32, &u[41], &cospi32, &u[54], &rnding, bit); - v[55] = half_btf_sse4_1(&cospi32, &u[40], &cospi32, &u[55], &rnding, bit); - - for (i = 56; i < 64; i++) v[i] = u[i]; - - // stage 11 - if (do_cols) { - for (i = 0; i < 32; i++) { - addsub_no_clamp_sse4_1(v[i], v[63 - i], &out[(i)], &out[(63 - i)]); - } - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX( - -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift)))); - const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN( - (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift)))); - - for (i = 0; i < 32; i++) { - addsub_shift_sse4_1(v[i], v[63 - i], &out[(i)], &out[(63 - i)], - &clamp_lo_out, &clamp_hi_out, out_shift); - } - } - } -} - -static void idct32x32_low1_sse4_1(__m128i *in, __m128i *out, int bit, - int do_cols, int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i rounding = _mm_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - __m128i bf1; - - // stage 0 - // stage 1 - bf1 = in[0]; - - // stage 2 - // stage 3 - // stage 4 - // stage 5 - bf1 = half_btf_0_sse4_1(&cospi32, &bf1, &rounding, bit); - - // stage 6 - // stage 7 - // stage 8 - // stage 9 - if (do_cols) { - bf1 = _mm_max_epi32(bf1, clamp_lo); - bf1 = _mm_min_epi32(bf1, clamp_hi); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX( - -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift)))); - const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN( - (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift)))); - - __m128i offset = _mm_set1_epi32((1 << out_shift) >> 1); - bf1 = _mm_add_epi32(bf1, offset); - bf1 = _mm_sra_epi32(bf1, _mm_cvtsi32_si128(out_shift)); - bf1 = _mm_max_epi32(bf1, clamp_lo_out); - bf1 = _mm_min_epi32(bf1, clamp_hi_out); - } - out[0] = bf1; - out[1] = bf1; - out[2] = bf1; - out[3] = bf1; - out[4] = bf1; - out[5] = bf1; - out[6] = bf1; - out[7] = bf1; - out[8] = bf1; - out[9] = bf1; - out[10] = bf1; - out[11] = bf1; - out[12] = bf1; - out[13] = bf1; - out[14] = bf1; - out[15] = bf1; - out[16] = bf1; - out[17] = bf1; - out[18] = bf1; - out[19] = bf1; - out[20] = bf1; - out[21] = bf1; - out[22] = bf1; - out[23] = bf1; - out[24] = bf1; - out[25] = bf1; - out[26] = bf1; - out[27] = bf1; - out[28] = bf1; - out[29] = bf1; - out[30] = bf1; - out[31] = bf1; -} - -static void idct32x32_low8_sse4_1(__m128i *in, __m128i *out, int bit, - int do_cols, int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi62 = _mm_set1_epi32(cospi[62]); - const __m128i cospi14 = _mm_set1_epi32(cospi[14]); - const __m128i cospi54 = _mm_set1_epi32(cospi[54]); - const __m128i cospi6 = _mm_set1_epi32(cospi[6]); - const __m128i cospi10 = _mm_set1_epi32(cospi[10]); - const __m128i cospi2 = _mm_set1_epi32(cospi[2]); - const __m128i cospim58 = _mm_set1_epi32(-cospi[58]); - const __m128i cospim50 = _mm_set1_epi32(-cospi[50]); - const __m128i cospi60 = _mm_set1_epi32(cospi[60]); - const __m128i cospi12 = _mm_set1_epi32(cospi[12]); - const __m128i cospi4 = _mm_set1_epi32(cospi[4]); - const __m128i cospim52 = _mm_set1_epi32(-cospi[52]); - const __m128i cospi56 = _mm_set1_epi32(cospi[56]); - const __m128i cospi24 = _mm_set1_epi32(cospi[24]); - const __m128i cospi40 = _mm_set1_epi32(cospi[40]); - const __m128i cospi8 = _mm_set1_epi32(cospi[8]); - const __m128i cospim40 = _mm_set1_epi32(-cospi[40]); - const __m128i cospim8 = _mm_set1_epi32(-cospi[8]); - const __m128i cospim56 = _mm_set1_epi32(-cospi[56]); - const __m128i cospim24 = _mm_set1_epi32(-cospi[24]); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i cospim32 = _mm_set1_epi32(-cospi[32]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospim48 = _mm_set1_epi32(-cospi[48]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospim16 = _mm_set1_epi32(-cospi[16]); - const __m128i rounding = _mm_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - __m128i bf1[32]; - - // stage 0 - // stage 1 - bf1[0] = in[0]; - bf1[4] = in[4]; - bf1[8] = in[2]; - bf1[12] = in[6]; - bf1[16] = in[1]; - bf1[20] = in[5]; - bf1[24] = in[3]; - bf1[28] = in[7]; - - // stage 2 - bf1[31] = half_btf_0_sse4_1(&cospi2, &bf1[16], &rounding, bit); - bf1[16] = half_btf_0_sse4_1(&cospi62, &bf1[16], &rounding, bit); - bf1[19] = half_btf_0_sse4_1(&cospim50, &bf1[28], &rounding, bit); - bf1[28] = half_btf_0_sse4_1(&cospi14, &bf1[28], &rounding, bit); - bf1[27] = half_btf_0_sse4_1(&cospi10, &bf1[20], &rounding, bit); - bf1[20] = half_btf_0_sse4_1(&cospi54, &bf1[20], &rounding, bit); - bf1[23] = half_btf_0_sse4_1(&cospim58, &bf1[24], &rounding, bit); - bf1[24] = half_btf_0_sse4_1(&cospi6, &bf1[24], &rounding, bit); - - // stage 3 - bf1[15] = half_btf_0_sse4_1(&cospi4, &bf1[8], &rounding, bit); - bf1[8] = half_btf_0_sse4_1(&cospi60, &bf1[8], &rounding, bit); - - bf1[11] = half_btf_0_sse4_1(&cospim52, &bf1[12], &rounding, bit); - bf1[12] = half_btf_0_sse4_1(&cospi12, &bf1[12], &rounding, bit); - bf1[17] = bf1[16]; - bf1[18] = bf1[19]; - bf1[21] = bf1[20]; - bf1[22] = bf1[23]; - bf1[25] = bf1[24]; - bf1[26] = bf1[27]; - bf1[29] = bf1[28]; - bf1[30] = bf1[31]; - - // stage 4 : - bf1[7] = half_btf_0_sse4_1(&cospi8, &bf1[4], &rounding, bit); - bf1[4] = half_btf_0_sse4_1(&cospi56, &bf1[4], &rounding, bit); - - bf1[9] = bf1[8]; - bf1[10] = bf1[11]; - bf1[13] = bf1[12]; - bf1[14] = bf1[15]; - - idct32_stage4_sse4_1(bf1, &cospim8, &cospi56, &cospi8, &cospim56, &cospim40, - &cospi24, &cospi40, &cospim24, &rounding, bit); - - // stage 5 - bf1[0] = half_btf_0_sse4_1(&cospi32, &bf1[0], &rounding, bit); - bf1[1] = bf1[0]; - bf1[5] = bf1[4]; - bf1[6] = bf1[7]; - - idct32_stage5_sse4_1(bf1, &cospim16, &cospi48, &cospi16, &cospim48, &clamp_lo, - &clamp_hi, &rounding, bit); - - // stage 6 - bf1[3] = bf1[0]; - bf1[2] = bf1[1]; - - idct32_stage6_sse4_1(bf1, &cospim32, &cospi32, &cospim16, &cospi48, &cospi16, - &cospim48, &clamp_lo, &clamp_hi, &rounding, bit); - - // stage 7 - idct32_stage7_sse4_1(bf1, &cospim32, &cospi32, &clamp_lo, &clamp_hi, - &rounding, bit); - - // stage 8 - idct32_stage8_sse4_1(bf1, &cospim32, &cospi32, &clamp_lo, &clamp_hi, - &rounding, bit); - - // stage 9 - idct32_stage9_sse4_1(bf1, out, do_cols, bd, out_shift, log_range); -} - -static void idct32x32_low16_sse4_1(__m128i *in, __m128i *out, int bit, - int do_cols, int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi62 = _mm_set1_epi32(cospi[62]); - const __m128i cospi30 = _mm_set1_epi32(cospi[30]); - const __m128i cospi46 = _mm_set1_epi32(cospi[46]); - const __m128i cospi14 = _mm_set1_epi32(cospi[14]); - const __m128i cospi54 = _mm_set1_epi32(cospi[54]); - const __m128i cospi22 = _mm_set1_epi32(cospi[22]); - const __m128i cospi38 = _mm_set1_epi32(cospi[38]); - const __m128i cospi6 = _mm_set1_epi32(cospi[6]); - const __m128i cospi26 = _mm_set1_epi32(cospi[26]); - const __m128i cospi10 = _mm_set1_epi32(cospi[10]); - const __m128i cospi18 = _mm_set1_epi32(cospi[18]); - const __m128i cospi2 = _mm_set1_epi32(cospi[2]); - const __m128i cospim58 = _mm_set1_epi32(-cospi[58]); - const __m128i cospim42 = _mm_set1_epi32(-cospi[42]); - const __m128i cospim50 = _mm_set1_epi32(-cospi[50]); - const __m128i cospim34 = _mm_set1_epi32(-cospi[34]); - const __m128i cospi60 = _mm_set1_epi32(cospi[60]); - const __m128i cospi28 = _mm_set1_epi32(cospi[28]); - const __m128i cospi44 = _mm_set1_epi32(cospi[44]); - const __m128i cospi12 = _mm_set1_epi32(cospi[12]); - const __m128i cospi20 = _mm_set1_epi32(cospi[20]); - const __m128i cospi4 = _mm_set1_epi32(cospi[4]); - const __m128i cospim52 = _mm_set1_epi32(-cospi[52]); - const __m128i cospim36 = _mm_set1_epi32(-cospi[36]); - const __m128i cospi56 = _mm_set1_epi32(cospi[56]); - const __m128i cospi24 = _mm_set1_epi32(cospi[24]); - const __m128i cospi40 = _mm_set1_epi32(cospi[40]); - const __m128i cospi8 = _mm_set1_epi32(cospi[8]); - const __m128i cospim40 = _mm_set1_epi32(-cospi[40]); - const __m128i cospim8 = _mm_set1_epi32(-cospi[8]); - const __m128i cospim56 = _mm_set1_epi32(-cospi[56]); - const __m128i cospim24 = _mm_set1_epi32(-cospi[24]); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i cospim32 = _mm_set1_epi32(-cospi[32]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospim48 = _mm_set1_epi32(-cospi[48]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospim16 = _mm_set1_epi32(-cospi[16]); - const __m128i rounding = _mm_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - __m128i bf1[32]; - - // stage 0 - // stage 1 - - bf1[0] = in[0]; - bf1[2] = in[8]; - bf1[4] = in[4]; - bf1[6] = in[12]; - bf1[8] = in[2]; - bf1[10] = in[10]; - bf1[12] = in[6]; - bf1[14] = in[14]; - bf1[16] = in[1]; - bf1[18] = in[9]; - bf1[20] = in[5]; - bf1[22] = in[13]; - bf1[24] = in[3]; - bf1[26] = in[11]; - bf1[28] = in[7]; - bf1[30] = in[15]; - - // stage 2 - bf1[31] = half_btf_0_sse4_1(&cospi2, &bf1[16], &rounding, bit); - bf1[16] = half_btf_0_sse4_1(&cospi62, &bf1[16], &rounding, bit); - bf1[17] = half_btf_0_sse4_1(&cospim34, &bf1[30], &rounding, bit); - bf1[30] = half_btf_0_sse4_1(&cospi30, &bf1[30], &rounding, bit); - bf1[29] = half_btf_0_sse4_1(&cospi18, &bf1[18], &rounding, bit); - bf1[18] = half_btf_0_sse4_1(&cospi46, &bf1[18], &rounding, bit); - bf1[19] = half_btf_0_sse4_1(&cospim50, &bf1[28], &rounding, bit); - bf1[28] = half_btf_0_sse4_1(&cospi14, &bf1[28], &rounding, bit); - bf1[27] = half_btf_0_sse4_1(&cospi10, &bf1[20], &rounding, bit); - bf1[20] = half_btf_0_sse4_1(&cospi54, &bf1[20], &rounding, bit); - bf1[21] = half_btf_0_sse4_1(&cospim42, &bf1[26], &rounding, bit); - bf1[26] = half_btf_0_sse4_1(&cospi22, &bf1[26], &rounding, bit); - bf1[25] = half_btf_0_sse4_1(&cospi26, &bf1[22], &rounding, bit); - bf1[22] = half_btf_0_sse4_1(&cospi38, &bf1[22], &rounding, bit); - bf1[23] = half_btf_0_sse4_1(&cospim58, &bf1[24], &rounding, bit); - bf1[24] = half_btf_0_sse4_1(&cospi6, &bf1[24], &rounding, bit); - - // stage 3 - bf1[15] = half_btf_0_sse4_1(&cospi4, &bf1[8], &rounding, bit); - bf1[8] = half_btf_0_sse4_1(&cospi60, &bf1[8], &rounding, bit); - bf1[9] = half_btf_0_sse4_1(&cospim36, &bf1[14], &rounding, bit); - bf1[14] = half_btf_0_sse4_1(&cospi28, &bf1[14], &rounding, bit); - bf1[13] = half_btf_0_sse4_1(&cospi20, &bf1[10], &rounding, bit); - bf1[10] = half_btf_0_sse4_1(&cospi44, &bf1[10], &rounding, bit); - bf1[11] = half_btf_0_sse4_1(&cospim52, &bf1[12], &rounding, bit); - bf1[12] = half_btf_0_sse4_1(&cospi12, &bf1[12], &rounding, bit); - - addsub_sse4_1(bf1[16], bf1[17], bf1 + 16, bf1 + 17, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[19], bf1[18], bf1 + 19, bf1 + 18, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[20], bf1[21], bf1 + 20, bf1 + 21, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[23], bf1[22], bf1 + 23, bf1 + 22, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[24], bf1[25], bf1 + 24, bf1 + 25, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[27], bf1[26], bf1 + 27, bf1 + 26, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[28], bf1[29], bf1 + 28, bf1 + 29, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[31], bf1[30], bf1 + 31, bf1 + 30, &clamp_lo, &clamp_hi); - // stage 4 - bf1[7] = half_btf_0_sse4_1(&cospi8, &bf1[4], &rounding, bit); - bf1[4] = half_btf_0_sse4_1(&cospi56, &bf1[4], &rounding, bit); - bf1[5] = half_btf_0_sse4_1(&cospim40, &bf1[6], &rounding, bit); - bf1[6] = half_btf_0_sse4_1(&cospi24, &bf1[6], &rounding, bit); - - addsub_sse4_1(bf1[8], bf1[9], bf1 + 8, bf1 + 9, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[11], bf1[10], bf1 + 11, bf1 + 10, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[12], bf1[13], bf1 + 12, bf1 + 13, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[15], bf1[14], bf1 + 15, bf1 + 14, &clamp_lo, &clamp_hi); - - idct32_stage4_sse4_1(bf1, &cospim8, &cospi56, &cospi8, &cospim56, &cospim40, - &cospi24, &cospi40, &cospim24, &rounding, bit); - - // stage 5 - bf1[0] = half_btf_0_sse4_1(&cospi32, &bf1[0], &rounding, bit); - bf1[1] = bf1[0]; - bf1[3] = half_btf_0_sse4_1(&cospi16, &bf1[2], &rounding, bit); - bf1[2] = half_btf_0_sse4_1(&cospi48, &bf1[2], &rounding, bit); - - addsub_sse4_1(bf1[4], bf1[5], bf1 + 4, bf1 + 5, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[7], bf1[6], bf1 + 7, bf1 + 6, &clamp_lo, &clamp_hi); - - idct32_stage5_sse4_1(bf1, &cospim16, &cospi48, &cospi16, &cospim48, &clamp_lo, - &clamp_hi, &rounding, bit); - - // stage 6 - addsub_sse4_1(bf1[0], bf1[3], bf1 + 0, bf1 + 3, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[1], bf1[2], bf1 + 1, bf1 + 2, &clamp_lo, &clamp_hi); - - idct32_stage6_sse4_1(bf1, &cospim32, &cospi32, &cospim16, &cospi48, &cospi16, - &cospim48, &clamp_lo, &clamp_hi, &rounding, bit); - - // stage 7 - idct32_stage7_sse4_1(bf1, &cospim32, &cospi32, &clamp_lo, &clamp_hi, - &rounding, bit); - - // stage 8 - idct32_stage8_sse4_1(bf1, &cospim32, &cospi32, &clamp_lo, &clamp_hi, - &rounding, bit); - - // stage 9 - idct32_stage9_sse4_1(bf1, out, do_cols, bd, out_shift, log_range); -} - -static void idct32x32_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols, - int bd, int out_shift) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi62 = _mm_set1_epi32(cospi[62]); - const __m128i cospi30 = _mm_set1_epi32(cospi[30]); - const __m128i cospi46 = _mm_set1_epi32(cospi[46]); - const __m128i cospi14 = _mm_set1_epi32(cospi[14]); - const __m128i cospi54 = _mm_set1_epi32(cospi[54]); - const __m128i cospi22 = _mm_set1_epi32(cospi[22]); - const __m128i cospi38 = _mm_set1_epi32(cospi[38]); - const __m128i cospi6 = _mm_set1_epi32(cospi[6]); - const __m128i cospi58 = _mm_set1_epi32(cospi[58]); - const __m128i cospi26 = _mm_set1_epi32(cospi[26]); - const __m128i cospi42 = _mm_set1_epi32(cospi[42]); - const __m128i cospi10 = _mm_set1_epi32(cospi[10]); - const __m128i cospi50 = _mm_set1_epi32(cospi[50]); - const __m128i cospi18 = _mm_set1_epi32(cospi[18]); - const __m128i cospi34 = _mm_set1_epi32(cospi[34]); - const __m128i cospi2 = _mm_set1_epi32(cospi[2]); - const __m128i cospim58 = _mm_set1_epi32(-cospi[58]); - const __m128i cospim26 = _mm_set1_epi32(-cospi[26]); - const __m128i cospim42 = _mm_set1_epi32(-cospi[42]); - const __m128i cospim10 = _mm_set1_epi32(-cospi[10]); - const __m128i cospim50 = _mm_set1_epi32(-cospi[50]); - const __m128i cospim18 = _mm_set1_epi32(-cospi[18]); - const __m128i cospim34 = _mm_set1_epi32(-cospi[34]); - const __m128i cospim2 = _mm_set1_epi32(-cospi[2]); - const __m128i cospi60 = _mm_set1_epi32(cospi[60]); - const __m128i cospi28 = _mm_set1_epi32(cospi[28]); - const __m128i cospi44 = _mm_set1_epi32(cospi[44]); - const __m128i cospi12 = _mm_set1_epi32(cospi[12]); - const __m128i cospi52 = _mm_set1_epi32(cospi[52]); - const __m128i cospi20 = _mm_set1_epi32(cospi[20]); - const __m128i cospi36 = _mm_set1_epi32(cospi[36]); - const __m128i cospi4 = _mm_set1_epi32(cospi[4]); - const __m128i cospim52 = _mm_set1_epi32(-cospi[52]); - const __m128i cospim20 = _mm_set1_epi32(-cospi[20]); - const __m128i cospim36 = _mm_set1_epi32(-cospi[36]); - const __m128i cospim4 = _mm_set1_epi32(-cospi[4]); - const __m128i cospi56 = _mm_set1_epi32(cospi[56]); - const __m128i cospi24 = _mm_set1_epi32(cospi[24]); - const __m128i cospi40 = _mm_set1_epi32(cospi[40]); - const __m128i cospi8 = _mm_set1_epi32(cospi[8]); - const __m128i cospim40 = _mm_set1_epi32(-cospi[40]); - const __m128i cospim8 = _mm_set1_epi32(-cospi[8]); - const __m128i cospim56 = _mm_set1_epi32(-cospi[56]); - const __m128i cospim24 = _mm_set1_epi32(-cospi[24]); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i cospim32 = _mm_set1_epi32(-cospi[32]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospim48 = _mm_set1_epi32(-cospi[48]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospim16 = _mm_set1_epi32(-cospi[16]); - const __m128i rounding = _mm_set1_epi32(1 << (bit - 1)); - const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8)); - const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1))); - const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1); - __m128i bf1[32], bf0[32]; - - // stage 0 - // stage 1 - bf1[0] = in[0]; - bf1[1] = in[16]; - bf1[2] = in[8]; - bf1[3] = in[24]; - bf1[4] = in[4]; - bf1[5] = in[20]; - bf1[6] = in[12]; - bf1[7] = in[28]; - bf1[8] = in[2]; - bf1[9] = in[18]; - bf1[10] = in[10]; - bf1[11] = in[26]; - bf1[12] = in[6]; - bf1[13] = in[22]; - bf1[14] = in[14]; - bf1[15] = in[30]; - bf1[16] = in[1]; - bf1[17] = in[17]; - bf1[18] = in[9]; - bf1[19] = in[25]; - bf1[20] = in[5]; - bf1[21] = in[21]; - bf1[22] = in[13]; - bf1[23] = in[29]; - bf1[24] = in[3]; - bf1[25] = in[19]; - bf1[26] = in[11]; - bf1[27] = in[27]; - bf1[28] = in[7]; - bf1[29] = in[23]; - bf1[30] = in[15]; - bf1[31] = in[31]; - - // stage 2 - bf0[0] = bf1[0]; - bf0[1] = bf1[1]; - bf0[2] = bf1[2]; - bf0[3] = bf1[3]; - bf0[4] = bf1[4]; - bf0[5] = bf1[5]; - bf0[6] = bf1[6]; - bf0[7] = bf1[7]; - bf0[8] = bf1[8]; - bf0[9] = bf1[9]; - bf0[10] = bf1[10]; - bf0[11] = bf1[11]; - bf0[12] = bf1[12]; - bf0[13] = bf1[13]; - bf0[14] = bf1[14]; - bf0[15] = bf1[15]; - bf0[16] = - half_btf_sse4_1(&cospi62, &bf1[16], &cospim2, &bf1[31], &rounding, bit); - bf0[17] = - half_btf_sse4_1(&cospi30, &bf1[17], &cospim34, &bf1[30], &rounding, bit); - bf0[18] = - half_btf_sse4_1(&cospi46, &bf1[18], &cospim18, &bf1[29], &rounding, bit); - bf0[19] = - half_btf_sse4_1(&cospi14, &bf1[19], &cospim50, &bf1[28], &rounding, bit); - bf0[20] = - half_btf_sse4_1(&cospi54, &bf1[20], &cospim10, &bf1[27], &rounding, bit); - bf0[21] = - half_btf_sse4_1(&cospi22, &bf1[21], &cospim42, &bf1[26], &rounding, bit); - bf0[22] = - half_btf_sse4_1(&cospi38, &bf1[22], &cospim26, &bf1[25], &rounding, bit); - bf0[23] = - half_btf_sse4_1(&cospi6, &bf1[23], &cospim58, &bf1[24], &rounding, bit); - bf0[24] = - half_btf_sse4_1(&cospi58, &bf1[23], &cospi6, &bf1[24], &rounding, bit); - bf0[25] = - half_btf_sse4_1(&cospi26, &bf1[22], &cospi38, &bf1[25], &rounding, bit); - bf0[26] = - half_btf_sse4_1(&cospi42, &bf1[21], &cospi22, &bf1[26], &rounding, bit); - bf0[27] = - half_btf_sse4_1(&cospi10, &bf1[20], &cospi54, &bf1[27], &rounding, bit); - bf0[28] = - half_btf_sse4_1(&cospi50, &bf1[19], &cospi14, &bf1[28], &rounding, bit); - bf0[29] = - half_btf_sse4_1(&cospi18, &bf1[18], &cospi46, &bf1[29], &rounding, bit); - bf0[30] = - half_btf_sse4_1(&cospi34, &bf1[17], &cospi30, &bf1[30], &rounding, bit); - bf0[31] = - half_btf_sse4_1(&cospi2, &bf1[16], &cospi62, &bf1[31], &rounding, bit); - - // stage 3 - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = bf0[6]; - bf1[7] = bf0[7]; - bf1[8] = - half_btf_sse4_1(&cospi60, &bf0[8], &cospim4, &bf0[15], &rounding, bit); - bf1[9] = - half_btf_sse4_1(&cospi28, &bf0[9], &cospim36, &bf0[14], &rounding, bit); - bf1[10] = - half_btf_sse4_1(&cospi44, &bf0[10], &cospim20, &bf0[13], &rounding, bit); - bf1[11] = - half_btf_sse4_1(&cospi12, &bf0[11], &cospim52, &bf0[12], &rounding, bit); - bf1[12] = - half_btf_sse4_1(&cospi52, &bf0[11], &cospi12, &bf0[12], &rounding, bit); - bf1[13] = - half_btf_sse4_1(&cospi20, &bf0[10], &cospi44, &bf0[13], &rounding, bit); - bf1[14] = - half_btf_sse4_1(&cospi36, &bf0[9], &cospi28, &bf0[14], &rounding, bit); - bf1[15] = - half_btf_sse4_1(&cospi4, &bf0[8], &cospi60, &bf0[15], &rounding, bit); - - addsub_sse4_1(bf0[16], bf0[17], bf1 + 16, bf1 + 17, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[19], bf0[18], bf1 + 19, bf1 + 18, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[20], bf0[21], bf1 + 20, bf1 + 21, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[23], bf0[22], bf1 + 23, bf1 + 22, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[24], bf0[25], bf1 + 24, bf1 + 25, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[27], bf0[26], bf1 + 27, bf1 + 26, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[28], bf0[29], bf1 + 28, bf1 + 29, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[31], bf0[30], bf1 + 31, bf1 + 30, &clamp_lo, &clamp_hi); - - // stage 4 - bf0[0] = bf1[0]; - bf0[1] = bf1[1]; - bf0[2] = bf1[2]; - bf0[3] = bf1[3]; - bf0[4] = - half_btf_sse4_1(&cospi56, &bf1[4], &cospim8, &bf1[7], &rounding, bit); - bf0[5] = - half_btf_sse4_1(&cospi24, &bf1[5], &cospim40, &bf1[6], &rounding, bit); - bf0[6] = - half_btf_sse4_1(&cospi40, &bf1[5], &cospi24, &bf1[6], &rounding, bit); - bf0[7] = half_btf_sse4_1(&cospi8, &bf1[4], &cospi56, &bf1[7], &rounding, bit); - - addsub_sse4_1(bf1[8], bf1[9], bf0 + 8, bf0 + 9, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[11], bf1[10], bf0 + 11, bf0 + 10, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[12], bf1[13], bf0 + 12, bf0 + 13, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[15], bf1[14], bf0 + 15, bf0 + 14, &clamp_lo, &clamp_hi); - - bf0[16] = bf1[16]; - bf0[17] = - half_btf_sse4_1(&cospim8, &bf1[17], &cospi56, &bf1[30], &rounding, bit); - bf0[18] = - half_btf_sse4_1(&cospim56, &bf1[18], &cospim8, &bf1[29], &rounding, bit); - bf0[19] = bf1[19]; - bf0[20] = bf1[20]; - bf0[21] = - half_btf_sse4_1(&cospim40, &bf1[21], &cospi24, &bf1[26], &rounding, bit); - bf0[22] = - half_btf_sse4_1(&cospim24, &bf1[22], &cospim40, &bf1[25], &rounding, bit); - bf0[23] = bf1[23]; - bf0[24] = bf1[24]; - bf0[25] = - half_btf_sse4_1(&cospim40, &bf1[22], &cospi24, &bf1[25], &rounding, bit); - bf0[26] = - half_btf_sse4_1(&cospi24, &bf1[21], &cospi40, &bf1[26], &rounding, bit); - bf0[27] = bf1[27]; - bf0[28] = bf1[28]; - bf0[29] = - half_btf_sse4_1(&cospim8, &bf1[18], &cospi56, &bf1[29], &rounding, bit); - bf0[30] = - half_btf_sse4_1(&cospi56, &bf1[17], &cospi8, &bf1[30], &rounding, bit); - bf0[31] = bf1[31]; - - // stage 5 - bf1[0] = - half_btf_sse4_1(&cospi32, &bf0[0], &cospi32, &bf0[1], &rounding, bit); - bf1[1] = - half_btf_sse4_1(&cospi32, &bf0[0], &cospim32, &bf0[1], &rounding, bit); - bf1[2] = - half_btf_sse4_1(&cospi48, &bf0[2], &cospim16, &bf0[3], &rounding, bit); - bf1[3] = - half_btf_sse4_1(&cospi16, &bf0[2], &cospi48, &bf0[3], &rounding, bit); - addsub_sse4_1(bf0[4], bf0[5], bf1 + 4, bf1 + 5, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[7], bf0[6], bf1 + 7, bf1 + 6, &clamp_lo, &clamp_hi); - bf1[8] = bf0[8]; - bf1[9] = - half_btf_sse4_1(&cospim16, &bf0[9], &cospi48, &bf0[14], &rounding, bit); - bf1[10] = - half_btf_sse4_1(&cospim48, &bf0[10], &cospim16, &bf0[13], &rounding, bit); - bf1[11] = bf0[11]; - bf1[12] = bf0[12]; - bf1[13] = - half_btf_sse4_1(&cospim16, &bf0[10], &cospi48, &bf0[13], &rounding, bit); - bf1[14] = - half_btf_sse4_1(&cospi48, &bf0[9], &cospi16, &bf0[14], &rounding, bit); - bf1[15] = bf0[15]; - addsub_sse4_1(bf0[16], bf0[19], bf1 + 16, bf1 + 19, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[17], bf0[18], bf1 + 17, bf1 + 18, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[23], bf0[20], bf1 + 23, bf1 + 20, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[22], bf0[21], bf1 + 22, bf1 + 21, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[24], bf0[27], bf1 + 24, bf1 + 27, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[25], bf0[26], bf1 + 25, bf1 + 26, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[31], bf0[28], bf1 + 31, bf1 + 28, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[30], bf0[29], bf1 + 30, bf1 + 29, &clamp_lo, &clamp_hi); - - // stage 6 - addsub_sse4_1(bf1[0], bf1[3], bf0 + 0, bf0 + 3, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[1], bf1[2], bf0 + 1, bf0 + 2, &clamp_lo, &clamp_hi); - bf0[4] = bf1[4]; - bf0[5] = - half_btf_sse4_1(&cospim32, &bf1[5], &cospi32, &bf1[6], &rounding, bit); - bf0[6] = - half_btf_sse4_1(&cospi32, &bf1[5], &cospi32, &bf1[6], &rounding, bit); - bf0[7] = bf1[7]; - addsub_sse4_1(bf1[8], bf1[11], bf0 + 8, bf0 + 11, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[9], bf1[10], bf0 + 9, bf0 + 10, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[15], bf1[12], bf0 + 15, bf0 + 12, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[14], bf1[13], bf0 + 14, bf0 + 13, &clamp_lo, &clamp_hi); - bf0[16] = bf1[16]; - bf0[17] = bf1[17]; - bf0[18] = - half_btf_sse4_1(&cospim16, &bf1[18], &cospi48, &bf1[29], &rounding, bit); - bf0[19] = - half_btf_sse4_1(&cospim16, &bf1[19], &cospi48, &bf1[28], &rounding, bit); - bf0[20] = - half_btf_sse4_1(&cospim48, &bf1[20], &cospim16, &bf1[27], &rounding, bit); - bf0[21] = - half_btf_sse4_1(&cospim48, &bf1[21], &cospim16, &bf1[26], &rounding, bit); - bf0[22] = bf1[22]; - bf0[23] = bf1[23]; - bf0[24] = bf1[24]; - bf0[25] = bf1[25]; - bf0[26] = - half_btf_sse4_1(&cospim16, &bf1[21], &cospi48, &bf1[26], &rounding, bit); - bf0[27] = - half_btf_sse4_1(&cospim16, &bf1[20], &cospi48, &bf1[27], &rounding, bit); - bf0[28] = - half_btf_sse4_1(&cospi48, &bf1[19], &cospi16, &bf1[28], &rounding, bit); - bf0[29] = - half_btf_sse4_1(&cospi48, &bf1[18], &cospi16, &bf1[29], &rounding, bit); - bf0[30] = bf1[30]; - bf0[31] = bf1[31]; - - // stage 7 - addsub_sse4_1(bf0[0], bf0[7], bf1 + 0, bf1 + 7, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[1], bf0[6], bf1 + 1, bf1 + 6, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[2], bf0[5], bf1 + 2, bf1 + 5, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[3], bf0[4], bf1 + 3, bf1 + 4, &clamp_lo, &clamp_hi); - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = - half_btf_sse4_1(&cospim32, &bf0[10], &cospi32, &bf0[13], &rounding, bit); - bf1[11] = - half_btf_sse4_1(&cospim32, &bf0[11], &cospi32, &bf0[12], &rounding, bit); - bf1[12] = - half_btf_sse4_1(&cospi32, &bf0[11], &cospi32, &bf0[12], &rounding, bit); - bf1[13] = - half_btf_sse4_1(&cospi32, &bf0[10], &cospi32, &bf0[13], &rounding, bit); - bf1[14] = bf0[14]; - bf1[15] = bf0[15]; - addsub_sse4_1(bf0[16], bf0[23], bf1 + 16, bf1 + 23, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[17], bf0[22], bf1 + 17, bf1 + 22, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[18], bf0[21], bf1 + 18, bf1 + 21, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[19], bf0[20], bf1 + 19, bf1 + 20, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[31], bf0[24], bf1 + 31, bf1 + 24, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[30], bf0[25], bf1 + 30, bf1 + 25, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[29], bf0[26], bf1 + 29, bf1 + 26, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf0[28], bf0[27], bf1 + 28, bf1 + 27, &clamp_lo, &clamp_hi); - - // stage 8 - addsub_sse4_1(bf1[0], bf1[15], bf0 + 0, bf0 + 15, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[1], bf1[14], bf0 + 1, bf0 + 14, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[2], bf1[13], bf0 + 2, bf0 + 13, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[3], bf1[12], bf0 + 3, bf0 + 12, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[4], bf1[11], bf0 + 4, bf0 + 11, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[5], bf1[10], bf0 + 5, bf0 + 10, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[6], bf1[9], bf0 + 6, bf0 + 9, &clamp_lo, &clamp_hi); - addsub_sse4_1(bf1[7], bf1[8], bf0 + 7, bf0 + 8, &clamp_lo, &clamp_hi); - bf0[16] = bf1[16]; - bf0[17] = bf1[17]; - bf0[18] = bf1[18]; - bf0[19] = bf1[19]; - bf0[20] = - half_btf_sse4_1(&cospim32, &bf1[20], &cospi32, &bf1[27], &rounding, bit); - bf0[21] = - half_btf_sse4_1(&cospim32, &bf1[21], &cospi32, &bf1[26], &rounding, bit); - bf0[22] = - half_btf_sse4_1(&cospim32, &bf1[22], &cospi32, &bf1[25], &rounding, bit); - bf0[23] = - half_btf_sse4_1(&cospim32, &bf1[23], &cospi32, &bf1[24], &rounding, bit); - bf0[24] = - half_btf_sse4_1(&cospi32, &bf1[23], &cospi32, &bf1[24], &rounding, bit); - bf0[25] = - half_btf_sse4_1(&cospi32, &bf1[22], &cospi32, &bf1[25], &rounding, bit); - bf0[26] = - half_btf_sse4_1(&cospi32, &bf1[21], &cospi32, &bf1[26], &rounding, bit); - bf0[27] = - half_btf_sse4_1(&cospi32, &bf1[20], &cospi32, &bf1[27], &rounding, bit); - bf0[28] = bf1[28]; - bf0[29] = bf1[29]; - bf0[30] = bf1[30]; - bf0[31] = bf1[31]; - - // stage 9 - if (do_cols) { - addsub_no_clamp_sse4_1(bf0[0], bf0[31], out + 0, out + 31); - addsub_no_clamp_sse4_1(bf0[1], bf0[30], out + 1, out + 30); - addsub_no_clamp_sse4_1(bf0[2], bf0[29], out + 2, out + 29); - addsub_no_clamp_sse4_1(bf0[3], bf0[28], out + 3, out + 28); - addsub_no_clamp_sse4_1(bf0[4], bf0[27], out + 4, out + 27); - addsub_no_clamp_sse4_1(bf0[5], bf0[26], out + 5, out + 26); - addsub_no_clamp_sse4_1(bf0[6], bf0[25], out + 6, out + 25); - addsub_no_clamp_sse4_1(bf0[7], bf0[24], out + 7, out + 24); - addsub_no_clamp_sse4_1(bf0[8], bf0[23], out + 8, out + 23); - addsub_no_clamp_sse4_1(bf0[9], bf0[22], out + 9, out + 22); - addsub_no_clamp_sse4_1(bf0[10], bf0[21], out + 10, out + 21); - addsub_no_clamp_sse4_1(bf0[11], bf0[20], out + 11, out + 20); - addsub_no_clamp_sse4_1(bf0[12], bf0[19], out + 12, out + 19); - addsub_no_clamp_sse4_1(bf0[13], bf0[18], out + 13, out + 18); - addsub_no_clamp_sse4_1(bf0[14], bf0[17], out + 14, out + 17); - addsub_no_clamp_sse4_1(bf0[15], bf0[16], out + 15, out + 16); - } else { - const int log_range_out = AOMMAX(16, bd + 6); - const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX( - -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift)))); - const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN( - (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift)))); - - addsub_shift_sse4_1(bf0[0], bf0[31], out + 0, out + 31, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf0[1], bf0[30], out + 1, out + 30, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf0[2], bf0[29], out + 2, out + 29, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf0[3], bf0[28], out + 3, out + 28, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf0[4], bf0[27], out + 4, out + 27, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf0[5], bf0[26], out + 5, out + 26, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf0[6], bf0[25], out + 6, out + 25, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf0[7], bf0[24], out + 7, out + 24, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf0[8], bf0[23], out + 8, out + 23, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf0[9], bf0[22], out + 9, out + 22, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf0[10], bf0[21], out + 10, out + 21, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf0[11], bf0[20], out + 11, out + 20, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf0[12], bf0[19], out + 12, out + 19, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf0[13], bf0[18], out + 13, out + 18, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf0[14], bf0[17], out + 14, out + 17, &clamp_lo_out, - &clamp_hi_out, out_shift); - addsub_shift_sse4_1(bf0[15], bf0[16], out + 15, out + 16, &clamp_lo_out, - &clamp_hi_out, out_shift); - } -} - -void av1_highbd_inv_txfm_add_8x8_sse4_1(const tran_low_t *input, uint8_t *dest, - int stride, - const TxfmParam *txfm_param) { - int bd = txfm_param->bd; - const TX_TYPE tx_type = txfm_param->tx_type; - const int32_t *src = cast_to_int32(input); - switch (tx_type) { - // Assembly version doesn't support some transform types, so use C version - // for those. - case V_DCT: - case H_DCT: - case V_ADST: - case H_ADST: - case V_FLIPADST: - case H_FLIPADST: - case IDTX: - av1_inv_txfm2d_add_8x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, - bd); - break; - default: - av1_inv_txfm2d_add_8x8_sse4_1(src, CONVERT_TO_SHORTPTR(dest), stride, - tx_type, bd); - break; - } -} - -void av1_highbd_inv_txfm_add_16x8_sse4_1(const tran_low_t *input, uint8_t *dest, - int stride, - const TxfmParam *txfm_param) { - int bd = txfm_param->bd; - const TX_TYPE tx_type = txfm_param->tx_type; - const int32_t *src = cast_to_int32(input); - switch (tx_type) { - // Assembly version doesn't support some transform types, so use C version - // for those. - case V_DCT: - case H_DCT: - case V_ADST: - case H_ADST: - case V_FLIPADST: - case H_FLIPADST: - case IDTX: - av1_inv_txfm2d_add_16x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, - txfm_param->tx_type, txfm_param->bd); - break; - default: - av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type, - txfm_param->tx_size, - txfm_param->eob, bd); - break; - } -} - -void av1_highbd_inv_txfm_add_8x16_sse4_1(const tran_low_t *input, uint8_t *dest, - int stride, - const TxfmParam *txfm_param) { - int bd = txfm_param->bd; - const TX_TYPE tx_type = txfm_param->tx_type; - const int32_t *src = cast_to_int32(input); - switch (tx_type) { - // Assembly version doesn't support some transform types, so use C version - // for those. - case V_DCT: - case H_DCT: - case V_ADST: - case H_ADST: - case V_FLIPADST: - case H_FLIPADST: - case IDTX: - av1_inv_txfm2d_add_8x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, - txfm_param->tx_type, txfm_param->bd); - break; - default: - av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type, - txfm_param->tx_size, - txfm_param->eob, bd); - break; - } -} - -void av1_highbd_inv_txfm_add_16x16_sse4_1(const tran_low_t *input, - uint8_t *dest, int stride, - const TxfmParam *txfm_param) { - int bd = txfm_param->bd; - const TX_TYPE tx_type = txfm_param->tx_type; - const int32_t *src = cast_to_int32(input); - switch (tx_type) { - // Assembly version doesn't support some transform types, so use C version - // for those. - case V_DCT: - case H_DCT: - case V_ADST: - case H_ADST: - case V_FLIPADST: - case H_FLIPADST: - case IDTX: - av1_inv_txfm2d_add_16x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, - tx_type, bd); - break; - default: - av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type, - txfm_param->tx_size, - txfm_param->eob, bd); - break; - } -} - -void av1_highbd_inv_txfm_add_32x32_sse4_1(const tran_low_t *input, - uint8_t *dest, int stride, - const TxfmParam *txfm_param) { - int bd = txfm_param->bd; - const TX_TYPE tx_type = txfm_param->tx_type; - const int32_t *src = cast_to_int32(input); - switch (tx_type) { - case DCT_DCT: - av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type, - txfm_param->tx_size, - txfm_param->eob, bd); - break; - // Assembly version doesn't support IDTX, so use C version for it. - case IDTX: - av1_inv_txfm2d_add_32x32_c(src, CONVERT_TO_SHORTPTR(dest), stride, - tx_type, bd); - break; - default: assert(0); - } -} - -void av1_highbd_inv_txfm_add_4x4_sse4_1(const tran_low_t *input, uint8_t *dest, - int stride, - const TxfmParam *txfm_param) { - assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); - int eob = txfm_param->eob; - int bd = txfm_param->bd; - int lossless = txfm_param->lossless; - const int32_t *src = cast_to_int32(input); - const TX_TYPE tx_type = txfm_param->tx_type; - if (lossless) { - assert(tx_type == DCT_DCT); - av1_highbd_iwht4x4_add(input, dest, stride, eob, bd); - return; - } - switch (tx_type) { - // Assembly version doesn't support some transform types, so use C version - // for those. - case V_DCT: - case H_DCT: - case V_ADST: - case H_ADST: - case V_FLIPADST: - case H_FLIPADST: - case IDTX: - av1_inv_txfm2d_add_4x4_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, - bd); - break; - default: - av1_inv_txfm2d_add_4x4_sse4_1(src, CONVERT_TO_SHORTPTR(dest), stride, - tx_type, bd); - break; - } -} - -static const transform_1d_sse4_1 - highbd_txfm_all_1d_zeros_w8_arr[TX_SIZES][ITX_TYPES_1D][4] = { - { - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL }, - }, - { { idct8x8_low1_sse4_1, idct8x8_new_sse4_1, NULL, NULL }, - { iadst8x8_low1_sse4_1, iadst8x8_new_sse4_1, NULL, NULL }, - { NULL, NULL, NULL, NULL } }, - { - { idct16x16_low1_sse4_1, idct16x16_low8_sse4_1, idct16x16_sse4_1, - NULL }, - { iadst16x16_low1_sse4_1, iadst16x16_low8_sse4_1, iadst16x16_sse4_1, - NULL }, - { NULL, NULL, NULL, NULL }, - }, - { { idct32x32_low1_sse4_1, idct32x32_low8_sse4_1, idct32x32_low16_sse4_1, - idct32x32_sse4_1 }, - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL } }, - { { idct64x64_low1_sse4_1, idct64x64_low8_sse4_1, idct64x64_low16_sse4_1, - idct64x64_sse4_1 }, - { NULL, NULL, NULL, NULL }, - { NULL, NULL, NULL, NULL } } - }; - -static void highbd_inv_txfm2d_add_no_identity_sse41(const int32_t *input, - uint16_t *output, - int stride, TX_TYPE tx_type, - TX_SIZE tx_size, int eob, - const int bd) { - __m128i buf1[64 * 16]; - int eobx, eoby; - get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob); - const int8_t *shift = inv_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int txfm_size_col = tx_size_wide[tx_size]; - const int txfm_size_row = tx_size_high[tx_size]; - const int buf_size_w_div8 = txfm_size_col >> 2; - const int buf_size_nonzero_w_div8 = (eobx + 8) >> 3; - const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3; - const int input_stride = AOMMIN(32, txfm_size_col); - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - - const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx]; - const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby]; - const transform_1d_sse4_1 row_txfm = - highbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x]; - const transform_1d_sse4_1 col_txfm = - highbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y]; - - assert(col_txfm != NULL); - assert(row_txfm != NULL); - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - // 1st stage: column transform - for (int i = 0; i < buf_size_nonzero_h_div8 << 1; i++) { - __m128i buf0[64]; - const int32_t *input_row = input + i * input_stride * 4; - for (int j = 0; j < buf_size_nonzero_w_div8 << 1; ++j) { - __m128i *buf0_cur = buf0 + j * 4; - load_buffer_32bit_input(input_row + j * 4, input_stride, buf0_cur, 4); - - TRANSPOSE_4X4(buf0_cur[0], buf0_cur[1], buf0_cur[2], buf0_cur[3], - buf0_cur[0], buf0_cur[1], buf0_cur[2], buf0_cur[3]); - } - if (rect_type == 1 || rect_type == -1) { - av1_round_shift_rect_array_32_sse4_1( - buf0, buf0, buf_size_nonzero_w_div8 << 3, 0, NewInvSqrt2); - } - row_txfm(buf0, buf0, inv_cos_bit_row[txw_idx][txh_idx], 0, bd, -shift[0]); - - __m128i *_buf1 = buf1 + i * 4; - if (lr_flip) { - for (int j = 0; j < buf_size_w_div8; ++j) { - TRANSPOSE_4X4(buf0[4 * j + 3], buf0[4 * j + 2], buf0[4 * j + 1], - buf0[4 * j], - _buf1[txfm_size_row * (buf_size_w_div8 - 1 - j) + 0], - _buf1[txfm_size_row * (buf_size_w_div8 - 1 - j) + 1], - _buf1[txfm_size_row * (buf_size_w_div8 - 1 - j) + 2], - _buf1[txfm_size_row * (buf_size_w_div8 - 1 - j) + 3]); - } - } else { - for (int j = 0; j < buf_size_w_div8; ++j) { - TRANSPOSE_4X4( - buf0[j * 4 + 0], buf0[j * 4 + 1], buf0[j * 4 + 2], buf0[j * 4 + 3], - _buf1[j * txfm_size_row + 0], _buf1[j * txfm_size_row + 1], - _buf1[j * txfm_size_row + 2], _buf1[j * txfm_size_row + 3]); - } - } - } - // 2nd stage: column transform - for (int i = 0; i < buf_size_w_div8; i++) { - col_txfm(buf1 + i * txfm_size_row, buf1 + i * txfm_size_row, - inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0); - - av1_round_shift_array_32_sse4_1(buf1 + i * txfm_size_row, - buf1 + i * txfm_size_row, txfm_size_row, - -shift[1]); - } - - // write to buffer - { - for (int i = 0; i < (txfm_size_col >> 3); i++) { - highbd_write_buffer_8xn_sse4_1(buf1 + i * txfm_size_row * 2, - output + 8 * i, stride, ud_flip, - txfm_size_row, bd); - } - } -} - -void av1_highbd_inv_txfm2d_add_universe_sse4_1(const int32_t *input, - uint8_t *output, int stride, - TX_TYPE tx_type, TX_SIZE tx_size, - int eob, const int bd) { - switch (tx_type) { - case DCT_DCT: - case ADST_DCT: - case DCT_ADST: - case ADST_ADST: - case FLIPADST_DCT: - case DCT_FLIPADST: - case FLIPADST_FLIPADST: - case ADST_FLIPADST: - case FLIPADST_ADST: - highbd_inv_txfm2d_add_no_identity_sse41( - input, CONVERT_TO_SHORTPTR(output), stride, tx_type, tx_size, eob, - bd); - break; - default: assert(0); break; - } -} - -void av1_highbd_inv_txfm_add_sse4_1(const tran_low_t *input, uint8_t *dest, - int stride, const TxfmParam *txfm_param) { - assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); - const TX_SIZE tx_size = txfm_param->tx_size; - switch (tx_size) { - case TX_32X32: - av1_highbd_inv_txfm_add_32x32_sse4_1(input, dest, stride, txfm_param); - break; - case TX_16X16: - av1_highbd_inv_txfm_add_16x16_sse4_1(input, dest, stride, txfm_param); - break; - case TX_8X8: - av1_highbd_inv_txfm_add_8x8_sse4_1(input, dest, stride, txfm_param); - break; - case TX_4X8: - av1_highbd_inv_txfm_add_4x8(input, dest, stride, txfm_param); - break; - case TX_8X4: - av1_highbd_inv_txfm_add_8x4(input, dest, stride, txfm_param); - break; - case TX_8X16: - av1_highbd_inv_txfm_add_8x16_sse4_1(input, dest, stride, txfm_param); - break; - case TX_16X8: - av1_highbd_inv_txfm_add_16x8_sse4_1(input, dest, stride, txfm_param); - break; - case TX_16X32: - av1_highbd_inv_txfm_add_16x32(input, dest, stride, txfm_param); - break; - case TX_32X16: - av1_highbd_inv_txfm_add_32x16(input, dest, stride, txfm_param); - break; - case TX_32X64: - av1_highbd_inv_txfm_add_32x64(input, dest, stride, txfm_param); - break; - case TX_64X32: - av1_highbd_inv_txfm_add_64x32(input, dest, stride, txfm_param); - break; - case TX_4X4: - av1_highbd_inv_txfm_add_4x4_sse4_1(input, dest, stride, txfm_param); - break; - case TX_16X4: - av1_highbd_inv_txfm_add_16x4(input, dest, stride, txfm_param); - break; - case TX_4X16: - av1_highbd_inv_txfm_add_4x16(input, dest, stride, txfm_param); - break; - case TX_8X32: - av1_highbd_inv_txfm_add_8x32(input, dest, stride, txfm_param); - break; - case TX_32X8: - av1_highbd_inv_txfm_add_32x8(input, dest, stride, txfm_param); - break; - case TX_64X64: - case TX_16X64: - case TX_64X16: - av1_highbd_inv_txfm2d_add_universe_sse4_1( - input, dest, stride, txfm_param->tx_type, txfm_param->tx_size, - txfm_param->eob, txfm_param->bd); - break; - default: assert(0 && "Invalid transform size"); break; - } -} diff --git a/third_party/aom/av1/common/x86/highbd_jnt_convolve_avx2.c b/third_party/aom/av1/common/x86/highbd_jnt_convolve_avx2.c deleted file mode 100644 index e298cf653..000000000 --- a/third_party/aom/av1/common/x86/highbd_jnt_convolve_avx2.c +++ /dev/null @@ -1,846 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <immintrin.h> -#include <assert.h> - -#include "config/aom_dsp_rtcd.h" - -#include "aom_dsp/x86/convolve_avx2.h" -#include "aom_dsp/x86/convolve_common_intrin.h" -#include "aom_dsp/x86/convolve_sse4_1.h" -#include "aom_dsp/x86/synonyms.h" -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/aom_filter.h" -#include "av1/common/convolve.h" - -void av1_highbd_jnt_convolve_2d_copy_avx2( - const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w, - int h, const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, const int subpel_x_q4, - const int subpel_y_q4, ConvolveParams *conv_params, int bd) { - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - (void)filter_params_x; - (void)filter_params_y; - (void)subpel_x_q4; - (void)subpel_y_q4; - - const int bits = - FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0; - const __m128i left_shift = _mm_cvtsi32_si128(bits); - const int do_average = conv_params->do_average; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - const int w0 = conv_params->fwd_offset; - const int w1 = conv_params->bck_offset; - const __m256i wt0 = _mm256_set1_epi32(w0); - const __m256i wt1 = _mm256_set1_epi32(w1); - const __m256i zero = _mm256_setzero_si256(); - int i, j; - - const int offset_0 = - bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset = (1 << offset_0) + (1 << (offset_0 - 1)); - const __m256i offset_const = _mm256_set1_epi32(offset); - const __m256i offset_const_16b = _mm256_set1_epi16(offset); - const int rounding_shift = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const __m256i rounding_const = _mm256_set1_epi32((1 << rounding_shift) >> 1); - const __m256i clip_pixel_to_bd = - _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255)); - - assert(bits <= 4); - - if (!(w % 16)) { - for (i = 0; i < h; i += 1) { - for (j = 0; j < w; j += 16) { - const __m256i src_16bit = - _mm256_loadu_si256((__m256i *)(&src[i * src_stride + j])); - - const __m256i res = _mm256_sll_epi16(src_16bit, left_shift); - - if (do_average) { - const __m256i data_0 = - _mm256_loadu_si256((__m256i *)(&dst[i * dst_stride + j])); - - const __m256i data_ref_0_lo = _mm256_unpacklo_epi16(data_0, zero); - const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_0, zero); - - const __m256i res_32b_lo = _mm256_unpacklo_epi16(res, zero); - const __m256i res_unsigned_lo = - _mm256_add_epi32(res_32b_lo, offset_const); - - const __m256i comp_avg_res_lo = highbd_comp_avg( - &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg); - - const __m256i res_32b_hi = _mm256_unpackhi_epi16(res, zero); - const __m256i res_unsigned_hi = - _mm256_add_epi32(res_32b_hi, offset_const); - - const __m256i comp_avg_res_hi = highbd_comp_avg( - &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg); - - const __m256i round_result_lo = highbd_convolve_rounding( - &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift); - const __m256i round_result_hi = highbd_convolve_rounding( - &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift); - - const __m256i res_16b = - _mm256_packus_epi32(round_result_lo, round_result_hi); - const __m256i res_clip = _mm256_min_epi16(res_16b, clip_pixel_to_bd); - - _mm256_store_si256((__m256i *)(&dst0[i * dst_stride0 + j]), res_clip); - } else { - const __m256i res_unsigned_16b = - _mm256_adds_epu16(res, offset_const_16b); - - _mm256_store_si256((__m256i *)(&dst[i * dst_stride + j]), - res_unsigned_16b); - } - } - } - } else if (!(w % 4)) { - for (i = 0; i < h; i += 2) { - for (j = 0; j < w; j += 8) { - const __m128i src_row_0 = - _mm_loadu_si128((__m128i *)(&src[i * src_stride + j])); - const __m128i src_row_1 = - _mm_loadu_si128((__m128i *)(&src[i * src_stride + j + src_stride])); - // since not all compilers yet support _mm256_set_m128i() - const __m256i src_10 = _mm256_insertf128_si256( - _mm256_castsi128_si256(src_row_0), src_row_1, 1); - - const __m256i res = _mm256_sll_epi16(src_10, left_shift); - - if (w - j < 8) { - if (do_average) { - const __m256i data_0 = _mm256_castsi128_si256( - _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j]))); - const __m256i data_1 = _mm256_castsi128_si256(_mm_loadl_epi64( - (__m128i *)(&dst[i * dst_stride + j + dst_stride]))); - const __m256i data_01 = - _mm256_permute2x128_si256(data_0, data_1, 0x20); - - const __m256i data_ref_0 = _mm256_unpacklo_epi16(data_01, zero); - - const __m256i res_32b = _mm256_unpacklo_epi16(res, zero); - const __m256i res_unsigned_lo = - _mm256_add_epi32(res_32b, offset_const); - - const __m256i comp_avg_res = highbd_comp_avg( - &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg); - - const __m256i round_result = highbd_convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m256i res_16b = - _mm256_packus_epi32(round_result, round_result); - const __m256i res_clip = - _mm256_min_epi16(res_16b, clip_pixel_to_bd); - - const __m128i res_0 = _mm256_castsi256_si128(res_clip); - const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1); - - _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_storel_epi64( - (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1); - } else { - const __m256i res_unsigned_16b = - _mm256_adds_epu16(res, offset_const_16b); - - const __m128i res_0 = _mm256_castsi256_si128(res_unsigned_16b); - const __m128i res_1 = _mm256_extracti128_si256(res_unsigned_16b, 1); - - _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]), res_0); - _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); - } - } else { - if (do_average) { - const __m256i data_0 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]))); - const __m256i data_1 = _mm256_castsi128_si256(_mm_loadu_si128( - (__m128i *)(&dst[i * dst_stride + j + dst_stride]))); - const __m256i data_01 = - _mm256_permute2x128_si256(data_0, data_1, 0x20); - - const __m256i data_ref_0_lo = _mm256_unpacklo_epi16(data_01, zero); - const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_01, zero); - - const __m256i res_32b_lo = _mm256_unpacklo_epi16(res, zero); - const __m256i res_unsigned_lo = - _mm256_add_epi32(res_32b_lo, offset_const); - - const __m256i comp_avg_res_lo = highbd_comp_avg( - &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg); - - const __m256i res_32b_hi = _mm256_unpackhi_epi16(res, zero); - const __m256i res_unsigned_hi = - _mm256_add_epi32(res_32b_hi, offset_const); - - const __m256i comp_avg_res_hi = highbd_comp_avg( - &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg); - - const __m256i round_result_lo = - highbd_convolve_rounding(&comp_avg_res_lo, &offset_const, - &rounding_const, rounding_shift); - const __m256i round_result_hi = - highbd_convolve_rounding(&comp_avg_res_hi, &offset_const, - &rounding_const, rounding_shift); - - const __m256i res_16b = - _mm256_packus_epi32(round_result_lo, round_result_hi); - const __m256i res_clip = - _mm256_min_epi16(res_16b, clip_pixel_to_bd); - - const __m128i res_0 = _mm256_castsi256_si128(res_clip); - const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1); - - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_store_si128( - (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1); - } else { - const __m256i res_unsigned_16b = - _mm256_adds_epu16(res, offset_const_16b); - const __m128i res_0 = _mm256_castsi256_si128(res_unsigned_16b); - const __m128i res_1 = _mm256_extracti128_si256(res_unsigned_16b, 1); - - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); - } - } - } - } - } -} - -void av1_highbd_jnt_convolve_2d_avx2( - const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w, - int h, const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, const int subpel_x_q4, - const int subpel_y_q4, ConvolveParams *conv_params, int bd) { - DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]); - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - int im_h = h + filter_params_y->taps - 1; - int im_stride = 8; - int i, j; - const int fo_vert = filter_params_y->taps / 2 - 1; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz; - - // Check that, even with 12-bit input, the intermediate values will fit - // into an unsigned 16-bit intermediate array. - assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16); - - __m256i s[8], coeffs_y[4], coeffs_x[4]; - const int do_average = conv_params->do_average; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - - const int w0 = conv_params->fwd_offset; - const int w1 = conv_params->bck_offset; - const __m256i wt0 = _mm256_set1_epi32(w0); - const __m256i wt1 = _mm256_set1_epi32(w1); - const __m256i zero = _mm256_setzero_si256(); - - const __m256i round_const_x = _mm256_set1_epi32( - ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1))); - const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0); - - const __m256i round_const_y = _mm256_set1_epi32( - ((1 << conv_params->round_1) >> 1) - - (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1))); - const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1); - - const int offset_0 = - bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset = (1 << offset_0) + (1 << (offset_0 - 1)); - const __m256i offset_const = _mm256_set1_epi32(offset); - const int rounding_shift = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const __m256i rounding_const = _mm256_set1_epi32((1 << rounding_shift) >> 1); - - const __m256i clip_pixel_to_bd = - _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255)); - - prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x); - prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y); - - for (j = 0; j < w; j += 8) { - /* Horizontal filter */ - { - for (i = 0; i < im_h; i += 2) { - const __m256i row0 = - _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]); - __m256i row1 = _mm256_set1_epi16(0); - if (i + 1 < im_h) - row1 = - _mm256_loadu_si256((__m256i *)&src_ptr[(i + 1) * src_stride + j]); - - const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20); - const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31); - - // even pixels - s[0] = _mm256_alignr_epi8(r1, r0, 0); - s[1] = _mm256_alignr_epi8(r1, r0, 4); - s[2] = _mm256_alignr_epi8(r1, r0, 8); - s[3] = _mm256_alignr_epi8(r1, r0, 12); - - __m256i res_even = convolve(s, coeffs_x); - res_even = _mm256_sra_epi32(_mm256_add_epi32(res_even, round_const_x), - round_shift_x); - - // odd pixels - s[0] = _mm256_alignr_epi8(r1, r0, 2); - s[1] = _mm256_alignr_epi8(r1, r0, 6); - s[2] = _mm256_alignr_epi8(r1, r0, 10); - s[3] = _mm256_alignr_epi8(r1, r0, 14); - - __m256i res_odd = convolve(s, coeffs_x); - res_odd = _mm256_sra_epi32(_mm256_add_epi32(res_odd, round_const_x), - round_shift_x); - - __m256i res_even1 = _mm256_packs_epi32(res_even, res_even); - __m256i res_odd1 = _mm256_packs_epi32(res_odd, res_odd); - __m256i res = _mm256_unpacklo_epi16(res_even1, res_odd1); - - _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); - } - } - - /* Vertical filter */ - { - __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); - __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); - __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); - __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); - __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); - __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride)); - - s[0] = _mm256_unpacklo_epi16(s0, s1); - s[1] = _mm256_unpacklo_epi16(s2, s3); - s[2] = _mm256_unpacklo_epi16(s4, s5); - - s[4] = _mm256_unpackhi_epi16(s0, s1); - s[5] = _mm256_unpackhi_epi16(s2, s3); - s[6] = _mm256_unpackhi_epi16(s4, s5); - - for (i = 0; i < h; i += 2) { - const int16_t *data = &im_block[i * im_stride]; - - const __m256i s6 = - _mm256_loadu_si256((__m256i *)(data + 6 * im_stride)); - const __m256i s7 = - _mm256_loadu_si256((__m256i *)(data + 7 * im_stride)); - - s[3] = _mm256_unpacklo_epi16(s6, s7); - s[7] = _mm256_unpackhi_epi16(s6, s7); - - const __m256i res_a = convolve(s, coeffs_y); - - const __m256i res_a_round = _mm256_sra_epi32( - _mm256_add_epi32(res_a, round_const_y), round_shift_y); - - const __m256i res_unsigned_lo = - _mm256_add_epi32(res_a_round, offset_const); - - if (w - j < 8) { - if (do_average) { - const __m256i data_0 = _mm256_castsi128_si256( - _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j]))); - const __m256i data_1 = _mm256_castsi128_si256(_mm_loadl_epi64( - (__m128i *)(&dst[i * dst_stride + j + dst_stride]))); - const __m256i data_01 = - _mm256_permute2x128_si256(data_0, data_1, 0x20); - - const __m256i data_ref_0 = _mm256_unpacklo_epi16(data_01, zero); - - const __m256i comp_avg_res = highbd_comp_avg( - &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg); - - const __m256i round_result = highbd_convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m256i res_16b = - _mm256_packus_epi32(round_result, round_result); - const __m256i res_clip = - _mm256_min_epi16(res_16b, clip_pixel_to_bd); - - const __m128i res_0 = _mm256_castsi256_si128(res_clip); - const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1); - - _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_storel_epi64( - (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1); - } else { - __m256i res_16b = - _mm256_packus_epi32(res_unsigned_lo, res_unsigned_lo); - const __m128i res_0 = _mm256_castsi256_si128(res_16b); - const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1); - - _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]), res_0); - _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); - } - } else { - const __m256i res_b = convolve(s + 4, coeffs_y); - const __m256i res_b_round = _mm256_sra_epi32( - _mm256_add_epi32(res_b, round_const_y), round_shift_y); - - __m256i res_unsigned_hi = _mm256_add_epi32(res_b_round, offset_const); - - if (do_average) { - const __m256i data_0 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]))); - const __m256i data_1 = _mm256_castsi128_si256(_mm_loadu_si128( - (__m128i *)(&dst[i * dst_stride + j + dst_stride]))); - const __m256i data_01 = - _mm256_permute2x128_si256(data_0, data_1, 0x20); - - const __m256i data_ref_0_lo = _mm256_unpacklo_epi16(data_01, zero); - const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_01, zero); - - const __m256i comp_avg_res_lo = highbd_comp_avg( - &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg); - const __m256i comp_avg_res_hi = highbd_comp_avg( - &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg); - - const __m256i round_result_lo = - highbd_convolve_rounding(&comp_avg_res_lo, &offset_const, - &rounding_const, rounding_shift); - const __m256i round_result_hi = - highbd_convolve_rounding(&comp_avg_res_hi, &offset_const, - &rounding_const, rounding_shift); - - const __m256i res_16b = - _mm256_packus_epi32(round_result_lo, round_result_hi); - const __m256i res_clip = - _mm256_min_epi16(res_16b, clip_pixel_to_bd); - - const __m128i res_0 = _mm256_castsi256_si128(res_clip); - const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1); - - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_store_si128( - (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1); - } else { - __m256i res_16b = - _mm256_packus_epi32(res_unsigned_lo, res_unsigned_hi); - const __m128i res_0 = _mm256_castsi256_si128(res_16b); - const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1); - - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); - } - } - - s[0] = s[1]; - s[1] = s[2]; - s[2] = s[3]; - - s[4] = s[5]; - s[5] = s[6]; - s[6] = s[7]; - } - } - } -} - -void av1_highbd_jnt_convolve_x_avx2( - const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w, - int h, const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, const int subpel_x_q4, - const int subpel_y_q4, ConvolveParams *conv_params, int bd) { - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const uint16_t *const src_ptr = src - fo_horiz; - const int bits = FILTER_BITS - conv_params->round_1; - (void)filter_params_y; - (void)subpel_y_q4; - - int i, j; - __m256i s[4], coeffs_x[4]; - - const int do_average = conv_params->do_average; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - const int w0 = conv_params->fwd_offset; - const int w1 = conv_params->bck_offset; - const __m256i wt0 = _mm256_set1_epi32(w0); - const __m256i wt1 = _mm256_set1_epi32(w1); - const __m256i zero = _mm256_setzero_si256(); - - const __m256i round_const_x = - _mm256_set1_epi32(((1 << conv_params->round_0) >> 1)); - const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0); - const __m128i round_shift_bits = _mm_cvtsi32_si128(bits); - - const int offset_0 = - bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset = (1 << offset_0) + (1 << (offset_0 - 1)); - const __m256i offset_const = _mm256_set1_epi32(offset); - const int rounding_shift = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const __m256i rounding_const = _mm256_set1_epi32((1 << rounding_shift) >> 1); - const __m256i clip_pixel_to_bd = - _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255)); - - assert(bits >= 0); - prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x); - - for (j = 0; j < w; j += 8) { - /* Horizontal filter */ - for (i = 0; i < h; i += 2) { - const __m256i row0 = - _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]); - __m256i row1 = - _mm256_loadu_si256((__m256i *)&src_ptr[(i + 1) * src_stride + j]); - - const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20); - const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31); - - // even pixels - s[0] = _mm256_alignr_epi8(r1, r0, 0); - s[1] = _mm256_alignr_epi8(r1, r0, 4); - s[2] = _mm256_alignr_epi8(r1, r0, 8); - s[3] = _mm256_alignr_epi8(r1, r0, 12); - - __m256i res_even = convolve(s, coeffs_x); - res_even = _mm256_sra_epi32(_mm256_add_epi32(res_even, round_const_x), - round_shift_x); - - // odd pixels - s[0] = _mm256_alignr_epi8(r1, r0, 2); - s[1] = _mm256_alignr_epi8(r1, r0, 6); - s[2] = _mm256_alignr_epi8(r1, r0, 10); - s[3] = _mm256_alignr_epi8(r1, r0, 14); - - __m256i res_odd = convolve(s, coeffs_x); - res_odd = _mm256_sra_epi32(_mm256_add_epi32(res_odd, round_const_x), - round_shift_x); - - res_even = _mm256_sll_epi32(res_even, round_shift_bits); - res_odd = _mm256_sll_epi32(res_odd, round_shift_bits); - - __m256i res1 = _mm256_unpacklo_epi32(res_even, res_odd); - - __m256i res_unsigned_lo = _mm256_add_epi32(res1, offset_const); - - if (w - j < 8) { - if (do_average) { - const __m256i data_0 = _mm256_castsi128_si256( - _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j]))); - const __m256i data_1 = _mm256_castsi128_si256(_mm_loadl_epi64( - (__m128i *)(&dst[i * dst_stride + j + dst_stride]))); - const __m256i data_01 = - _mm256_permute2x128_si256(data_0, data_1, 0x20); - - const __m256i data_ref_0 = _mm256_unpacklo_epi16(data_01, zero); - - const __m256i comp_avg_res = highbd_comp_avg( - &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg); - - const __m256i round_result = highbd_convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m256i res_16b = - _mm256_packus_epi32(round_result, round_result); - const __m256i res_clip = _mm256_min_epi16(res_16b, clip_pixel_to_bd); - - const __m128i res_0 = _mm256_castsi256_si128(res_clip); - const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1); - - _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_storel_epi64( - (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1); - } else { - __m256i res_16b = - _mm256_packus_epi32(res_unsigned_lo, res_unsigned_lo); - const __m128i res_0 = _mm256_castsi256_si128(res_16b); - const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1); - - _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]), res_0); - _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); - } - } else { - __m256i res2 = _mm256_unpackhi_epi32(res_even, res_odd); - __m256i res_unsigned_hi = _mm256_add_epi32(res2, offset_const); - - if (do_average) { - const __m256i data_0 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]))); - const __m256i data_1 = _mm256_castsi128_si256(_mm_loadu_si128( - (__m128i *)(&dst[i * dst_stride + j + dst_stride]))); - const __m256i data_01 = - _mm256_permute2x128_si256(data_0, data_1, 0x20); - - const __m256i data_ref_0_lo = _mm256_unpacklo_epi16(data_01, zero); - const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_01, zero); - - const __m256i comp_avg_res_lo = highbd_comp_avg( - &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg); - const __m256i comp_avg_res_hi = highbd_comp_avg( - &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg); - - const __m256i round_result_lo = highbd_convolve_rounding( - &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift); - const __m256i round_result_hi = highbd_convolve_rounding( - &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift); - - const __m256i res_16b = - _mm256_packus_epi32(round_result_lo, round_result_hi); - const __m256i res_clip = _mm256_min_epi16(res_16b, clip_pixel_to_bd); - - const __m128i res_0 = _mm256_castsi256_si128(res_clip); - const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1); - - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), - res_1); - } else { - __m256i res_16b = - _mm256_packus_epi32(res_unsigned_lo, res_unsigned_hi); - const __m128i res_0 = _mm256_castsi256_si128(res_16b); - const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1); - - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); - } - } - } - } -} - -void av1_highbd_jnt_convolve_y_avx2( - const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w, - int h, const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, const int subpel_x_q4, - const int subpel_y_q4, ConvolveParams *conv_params, int bd) { - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - const int fo_vert = filter_params_y->taps / 2 - 1; - const uint16_t *const src_ptr = src - fo_vert * src_stride; - const int bits = FILTER_BITS - conv_params->round_0; - (void)filter_params_x; - (void)subpel_x_q4; - - assert(bits >= 0); - int i, j; - __m256i s[8], coeffs_y[4]; - const int do_average = conv_params->do_average; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - - const int w0 = conv_params->fwd_offset; - const int w1 = conv_params->bck_offset; - const __m256i wt0 = _mm256_set1_epi32(w0); - const __m256i wt1 = _mm256_set1_epi32(w1); - const __m256i round_const_y = - _mm256_set1_epi32(((1 << conv_params->round_1) >> 1)); - const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1); - const __m128i round_shift_bits = _mm_cvtsi32_si128(bits); - - const int offset_0 = - bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset = (1 << offset_0) + (1 << (offset_0 - 1)); - const __m256i offset_const = _mm256_set1_epi32(offset); - const int rounding_shift = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const __m256i rounding_const = _mm256_set1_epi32((1 << rounding_shift) >> 1); - const __m256i clip_pixel_to_bd = - _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255)); - const __m256i zero = _mm256_setzero_si256(); - - prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y); - - for (j = 0; j < w; j += 8) { - const uint16_t *data = &src_ptr[j]; - /* Vertical filter */ - { - __m256i src6; - __m256i s01 = _mm256_permute2x128_si256( - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 0 * src_stride))), - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 1 * src_stride))), - 0x20); - __m256i s12 = _mm256_permute2x128_si256( - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 1 * src_stride))), - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 2 * src_stride))), - 0x20); - __m256i s23 = _mm256_permute2x128_si256( - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 2 * src_stride))), - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 3 * src_stride))), - 0x20); - __m256i s34 = _mm256_permute2x128_si256( - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 3 * src_stride))), - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 4 * src_stride))), - 0x20); - __m256i s45 = _mm256_permute2x128_si256( - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 4 * src_stride))), - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 5 * src_stride))), - 0x20); - src6 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 6 * src_stride))); - __m256i s56 = _mm256_permute2x128_si256( - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 5 * src_stride))), - src6, 0x20); - - s[0] = _mm256_unpacklo_epi16(s01, s12); - s[1] = _mm256_unpacklo_epi16(s23, s34); - s[2] = _mm256_unpacklo_epi16(s45, s56); - - s[4] = _mm256_unpackhi_epi16(s01, s12); - s[5] = _mm256_unpackhi_epi16(s23, s34); - s[6] = _mm256_unpackhi_epi16(s45, s56); - - for (i = 0; i < h; i += 2) { - data = &src_ptr[i * src_stride + j]; - - const __m256i s67 = _mm256_permute2x128_si256( - src6, - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 7 * src_stride))), - 0x20); - - src6 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 8 * src_stride))); - - const __m256i s78 = _mm256_permute2x128_si256( - _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + 7 * src_stride))), - src6, 0x20); - - s[3] = _mm256_unpacklo_epi16(s67, s78); - s[7] = _mm256_unpackhi_epi16(s67, s78); - - const __m256i res_a = convolve(s, coeffs_y); - - __m256i res_a_round = _mm256_sll_epi32(res_a, round_shift_bits); - res_a_round = _mm256_sra_epi32( - _mm256_add_epi32(res_a_round, round_const_y), round_shift_y); - - __m256i res_unsigned_lo = _mm256_add_epi32(res_a_round, offset_const); - - if (w - j < 8) { - if (do_average) { - const __m256i data_0 = _mm256_castsi128_si256( - _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j]))); - const __m256i data_1 = _mm256_castsi128_si256(_mm_loadl_epi64( - (__m128i *)(&dst[i * dst_stride + j + dst_stride]))); - const __m256i data_01 = - _mm256_permute2x128_si256(data_0, data_1, 0x20); - - const __m256i data_ref_0 = _mm256_unpacklo_epi16(data_01, zero); - - const __m256i comp_avg_res = highbd_comp_avg( - &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg); - - const __m256i round_result = highbd_convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m256i res_16b = - _mm256_packus_epi32(round_result, round_result); - const __m256i res_clip = - _mm256_min_epi16(res_16b, clip_pixel_to_bd); - - const __m128i res_0 = _mm256_castsi256_si128(res_clip); - const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1); - - _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_storel_epi64( - (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1); - } else { - __m256i res_16b = - _mm256_packus_epi32(res_unsigned_lo, res_unsigned_lo); - const __m128i res_0 = _mm256_castsi256_si128(res_16b); - const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1); - - _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]), res_0); - _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); - } - } else { - const __m256i res_b = convolve(s + 4, coeffs_y); - __m256i res_b_round = _mm256_sll_epi32(res_b, round_shift_bits); - res_b_round = _mm256_sra_epi32( - _mm256_add_epi32(res_b_round, round_const_y), round_shift_y); - - __m256i res_unsigned_hi = _mm256_add_epi32(res_b_round, offset_const); - - if (do_average) { - const __m256i data_0 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]))); - const __m256i data_1 = _mm256_castsi128_si256(_mm_loadu_si128( - (__m128i *)(&dst[i * dst_stride + j + dst_stride]))); - const __m256i data_01 = - _mm256_permute2x128_si256(data_0, data_1, 0x20); - - const __m256i data_ref_0_lo = _mm256_unpacklo_epi16(data_01, zero); - const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_01, zero); - - const __m256i comp_avg_res_lo = highbd_comp_avg( - &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg); - const __m256i comp_avg_res_hi = highbd_comp_avg( - &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg); - - const __m256i round_result_lo = - highbd_convolve_rounding(&comp_avg_res_lo, &offset_const, - &rounding_const, rounding_shift); - const __m256i round_result_hi = - highbd_convolve_rounding(&comp_avg_res_hi, &offset_const, - &rounding_const, rounding_shift); - - const __m256i res_16b = - _mm256_packus_epi32(round_result_lo, round_result_hi); - const __m256i res_clip = - _mm256_min_epi16(res_16b, clip_pixel_to_bd); - - const __m128i res_0 = _mm256_castsi256_si128(res_clip); - const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1); - - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_store_si128( - (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1); - } else { - __m256i res_16b = - _mm256_packus_epi32(res_unsigned_lo, res_unsigned_hi); - const __m128i res_0 = _mm256_castsi256_si128(res_16b); - const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1); - - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); - } - } - s[0] = s[1]; - s[1] = s[2]; - s[2] = s[3]; - - s[4] = s[5]; - s[5] = s[6]; - s[6] = s[7]; - } - } - } -} diff --git a/third_party/aom/av1/common/x86/highbd_jnt_convolve_sse4.c b/third_party/aom/av1/common/x86/highbd_jnt_convolve_sse4.c deleted file mode 100644 index 1a29985b5..000000000 --- a/third_party/aom/av1/common/x86/highbd_jnt_convolve_sse4.c +++ /dev/null @@ -1,383 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <smmintrin.h> -#include <assert.h> - -#include "config/aom_dsp_rtcd.h" - -#include "aom_dsp/x86/convolve_sse2.h" -#include "aom_dsp/x86/convolve_sse4_1.h" - -void av1_highbd_jnt_convolve_y_sse4_1( - const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w, - int h, const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, const int subpel_x_q4, - const int subpel_y_q4, ConvolveParams *conv_params, int bd) { - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - const int fo_vert = filter_params_y->taps / 2 - 1; - const uint16_t *const src_ptr = src - fo_vert * src_stride; - const int bits = FILTER_BITS - conv_params->round_0; - (void)filter_params_x; - (void)subpel_x_q4; - - assert(bits >= 0); - int i, j; - const int do_average = conv_params->do_average; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - - const int w0 = conv_params->fwd_offset; - const int w1 = conv_params->bck_offset; - const __m128i wt0 = _mm_set1_epi32(w0); - const __m128i wt1 = _mm_set1_epi32(w1); - const __m128i round_const_y = - _mm_set1_epi32(((1 << conv_params->round_1) >> 1)); - const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1); - const __m128i round_shift_bits = _mm_cvtsi32_si128(bits); - - const int offset_0 = - bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset = (1 << offset_0) + (1 << (offset_0 - 1)); - const __m128i offset_const = _mm_set1_epi32(offset); - const int rounding_shift = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const __m128i rounding_const = _mm_set1_epi32((1 << rounding_shift) >> 1); - const __m128i clip_pixel_to_bd = - _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255)); - const __m128i zero = _mm_setzero_si128(); - __m128i s[16], coeffs_y[4]; - - prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y); - - for (j = 0; j < w; j += 8) { - const uint16_t *data = &src_ptr[j]; - /* Vertical filter */ - { - __m128i s0 = _mm_loadu_si128((__m128i *)(data + 0 * src_stride)); - __m128i s1 = _mm_loadu_si128((__m128i *)(data + 1 * src_stride)); - __m128i s2 = _mm_loadu_si128((__m128i *)(data + 2 * src_stride)); - __m128i s3 = _mm_loadu_si128((__m128i *)(data + 3 * src_stride)); - __m128i s4 = _mm_loadu_si128((__m128i *)(data + 4 * src_stride)); - __m128i s5 = _mm_loadu_si128((__m128i *)(data + 5 * src_stride)); - __m128i s6 = _mm_loadu_si128((__m128i *)(data + 6 * src_stride)); - - s[0] = _mm_unpacklo_epi16(s0, s1); - s[1] = _mm_unpacklo_epi16(s2, s3); - s[2] = _mm_unpacklo_epi16(s4, s5); - - s[4] = _mm_unpackhi_epi16(s0, s1); - s[5] = _mm_unpackhi_epi16(s2, s3); - s[6] = _mm_unpackhi_epi16(s4, s5); - - s[0 + 8] = _mm_unpacklo_epi16(s1, s2); - s[1 + 8] = _mm_unpacklo_epi16(s3, s4); - s[2 + 8] = _mm_unpacklo_epi16(s5, s6); - - s[4 + 8] = _mm_unpackhi_epi16(s1, s2); - s[5 + 8] = _mm_unpackhi_epi16(s3, s4); - s[6 + 8] = _mm_unpackhi_epi16(s5, s6); - - for (i = 0; i < h; i += 2) { - data = &src_ptr[i * src_stride + j]; - - __m128i s7 = _mm_loadu_si128((__m128i *)(data + 7 * src_stride)); - __m128i s8 = _mm_loadu_si128((__m128i *)(data + 8 * src_stride)); - - s[3] = _mm_unpacklo_epi16(s6, s7); - s[7] = _mm_unpackhi_epi16(s6, s7); - - s[3 + 8] = _mm_unpacklo_epi16(s7, s8); - s[7 + 8] = _mm_unpackhi_epi16(s7, s8); - - const __m128i res_a0 = convolve(s, coeffs_y); - __m128i res_a_round0 = _mm_sll_epi32(res_a0, round_shift_bits); - res_a_round0 = _mm_sra_epi32(_mm_add_epi32(res_a_round0, round_const_y), - round_shift_y); - - const __m128i res_a1 = convolve(s + 8, coeffs_y); - __m128i res_a_round1 = _mm_sll_epi32(res_a1, round_shift_bits); - res_a_round1 = _mm_sra_epi32(_mm_add_epi32(res_a_round1, round_const_y), - round_shift_y); - - __m128i res_unsigned_lo_0 = _mm_add_epi32(res_a_round0, offset_const); - __m128i res_unsigned_lo_1 = _mm_add_epi32(res_a_round1, offset_const); - - if (w - j < 8) { - if (do_average) { - const __m128i data_0 = - _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j])); - const __m128i data_1 = _mm_loadl_epi64( - (__m128i *)(&dst[i * dst_stride + j + dst_stride])); - - const __m128i data_ref_0 = _mm_unpacklo_epi16(data_0, zero); - const __m128i data_ref_1 = _mm_unpacklo_epi16(data_1, zero); - - const __m128i comp_avg_res_0 = highbd_comp_avg_sse4_1( - &data_ref_0, &res_unsigned_lo_0, &wt0, &wt1, use_jnt_comp_avg); - const __m128i comp_avg_res_1 = highbd_comp_avg_sse4_1( - &data_ref_1, &res_unsigned_lo_1, &wt0, &wt1, use_jnt_comp_avg); - - const __m128i round_result_0 = - highbd_convolve_rounding_sse2(&comp_avg_res_0, &offset_const, - &rounding_const, rounding_shift); - const __m128i round_result_1 = - highbd_convolve_rounding_sse2(&comp_avg_res_1, &offset_const, - &rounding_const, rounding_shift); - - const __m128i res_16b_0 = - _mm_packus_epi32(round_result_0, round_result_0); - const __m128i res_clip_0 = - _mm_min_epi16(res_16b_0, clip_pixel_to_bd); - const __m128i res_16b_1 = - _mm_packus_epi32(round_result_1, round_result_1); - const __m128i res_clip_1 = - _mm_min_epi16(res_16b_1, clip_pixel_to_bd); - - _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), - res_clip_0); - _mm_storel_epi64( - (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), - res_clip_1); - - } else { - __m128i res_16b_0 = - _mm_packus_epi32(res_unsigned_lo_0, res_unsigned_lo_0); - - __m128i res_16b_1 = - _mm_packus_epi32(res_unsigned_lo_1, res_unsigned_lo_1); - - _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res_16b_0); - _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j + dst_stride], - res_16b_1); - } - } else { - const __m128i res_b0 = convolve(s + 4, coeffs_y); - __m128i res_b_round0 = _mm_sll_epi32(res_b0, round_shift_bits); - res_b_round0 = _mm_sra_epi32( - _mm_add_epi32(res_b_round0, round_const_y), round_shift_y); - - const __m128i res_b1 = convolve(s + 4 + 8, coeffs_y); - __m128i res_b_round1 = _mm_sll_epi32(res_b1, round_shift_bits); - res_b_round1 = _mm_sra_epi32( - _mm_add_epi32(res_b_round1, round_const_y), round_shift_y); - - __m128i res_unsigned_hi_0 = _mm_add_epi32(res_b_round0, offset_const); - __m128i res_unsigned_hi_1 = _mm_add_epi32(res_b_round1, offset_const); - - if (do_average) { - const __m128i data_0 = - _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j])); - const __m128i data_1 = _mm_loadu_si128( - (__m128i *)(&dst[i * dst_stride + j + dst_stride])); - const __m128i data_ref_0_lo_0 = _mm_unpacklo_epi16(data_0, zero); - const __m128i data_ref_0_lo_1 = _mm_unpacklo_epi16(data_1, zero); - - const __m128i data_ref_0_hi_0 = _mm_unpackhi_epi16(data_0, zero); - const __m128i data_ref_0_hi_1 = _mm_unpackhi_epi16(data_1, zero); - - const __m128i comp_avg_res_lo_0 = - highbd_comp_avg_sse4_1(&data_ref_0_lo_0, &res_unsigned_lo_0, - &wt0, &wt1, use_jnt_comp_avg); - const __m128i comp_avg_res_lo_1 = - highbd_comp_avg_sse4_1(&data_ref_0_lo_1, &res_unsigned_lo_1, - &wt0, &wt1, use_jnt_comp_avg); - const __m128i comp_avg_res_hi_0 = - highbd_comp_avg_sse4_1(&data_ref_0_hi_0, &res_unsigned_hi_0, - &wt0, &wt1, use_jnt_comp_avg); - const __m128i comp_avg_res_hi_1 = - highbd_comp_avg_sse4_1(&data_ref_0_hi_1, &res_unsigned_hi_1, - &wt0, &wt1, use_jnt_comp_avg); - - const __m128i round_result_lo_0 = - highbd_convolve_rounding_sse2(&comp_avg_res_lo_0, &offset_const, - &rounding_const, rounding_shift); - const __m128i round_result_lo_1 = - highbd_convolve_rounding_sse2(&comp_avg_res_lo_1, &offset_const, - &rounding_const, rounding_shift); - const __m128i round_result_hi_0 = - highbd_convolve_rounding_sse2(&comp_avg_res_hi_0, &offset_const, - &rounding_const, rounding_shift); - const __m128i round_result_hi_1 = - highbd_convolve_rounding_sse2(&comp_avg_res_hi_1, &offset_const, - &rounding_const, rounding_shift); - - const __m128i res_16b_0 = - _mm_packus_epi32(round_result_lo_0, round_result_hi_0); - const __m128i res_clip_0 = - _mm_min_epi16(res_16b_0, clip_pixel_to_bd); - - const __m128i res_16b_1 = - _mm_packus_epi32(round_result_lo_1, round_result_hi_1); - const __m128i res_clip_1 = - _mm_min_epi16(res_16b_1, clip_pixel_to_bd); - - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), - res_clip_0); - _mm_store_si128( - (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), - res_clip_1); - } else { - __m128i res_16bit0 = - _mm_packus_epi32(res_unsigned_lo_0, res_unsigned_hi_0); - __m128i res_16bit1 = - _mm_packus_epi32(res_unsigned_lo_1, res_unsigned_hi_1); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_16bit0); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_16bit1); - } - } - s[0] = s[1]; - s[1] = s[2]; - s[2] = s[3]; - - s[4] = s[5]; - s[5] = s[6]; - s[6] = s[7]; - - s[0 + 8] = s[1 + 8]; - s[1 + 8] = s[2 + 8]; - s[2 + 8] = s[3 + 8]; - - s[4 + 8] = s[5 + 8]; - s[5 + 8] = s[6 + 8]; - s[6 + 8] = s[7 + 8]; - - s6 = s8; - } - } - } -} - -void av1_highbd_jnt_convolve_x_sse4_1( - const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w, - int h, const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, const int subpel_x_q4, - const int subpel_y_q4, ConvolveParams *conv_params, int bd) { - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const uint16_t *const src_ptr = src - fo_horiz; - const int bits = FILTER_BITS - conv_params->round_1; - (void)filter_params_y; - (void)subpel_y_q4; - - int i, j; - __m128i s[4], coeffs_x[4]; - - const int do_average = conv_params->do_average; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - const int w0 = conv_params->fwd_offset; - const int w1 = conv_params->bck_offset; - const __m128i wt0 = _mm_set1_epi32(w0); - const __m128i wt1 = _mm_set1_epi32(w1); - const __m128i zero = _mm_setzero_si128(); - - const __m128i round_const_x = - _mm_set1_epi32(((1 << conv_params->round_0) >> 1)); - const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0); - const __m128i round_shift_bits = _mm_cvtsi32_si128(bits); - - const int offset_0 = - bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset = (1 << offset_0) + (1 << (offset_0 - 1)); - const __m128i offset_const = _mm_set1_epi32(offset); - const int rounding_shift = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const __m128i rounding_const = _mm_set1_epi32((1 << rounding_shift) >> 1); - const __m128i clip_pixel_to_bd = - _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255)); - - assert(bits >= 0); - prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x); - - for (j = 0; j < w; j += 8) { - /* Horizontal filter */ - for (i = 0; i < h; i += 1) { - const __m128i row00 = - _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]); - const __m128i row01 = - _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + (j + 8)]); - - // even pixels - s[0] = _mm_alignr_epi8(row01, row00, 0); - s[1] = _mm_alignr_epi8(row01, row00, 4); - s[2] = _mm_alignr_epi8(row01, row00, 8); - s[3] = _mm_alignr_epi8(row01, row00, 12); - - __m128i res_even = convolve(s, coeffs_x); - res_even = - _mm_sra_epi32(_mm_add_epi32(res_even, round_const_x), round_shift_x); - - // odd pixels - s[0] = _mm_alignr_epi8(row01, row00, 2); - s[1] = _mm_alignr_epi8(row01, row00, 6); - s[2] = _mm_alignr_epi8(row01, row00, 10); - s[3] = _mm_alignr_epi8(row01, row00, 14); - - __m128i res_odd = convolve(s, coeffs_x); - res_odd = - _mm_sra_epi32(_mm_add_epi32(res_odd, round_const_x), round_shift_x); - - res_even = _mm_sll_epi32(res_even, round_shift_bits); - res_odd = _mm_sll_epi32(res_odd, round_shift_bits); - - __m128i res1 = _mm_unpacklo_epi32(res_even, res_odd); - __m128i res_unsigned_lo = _mm_add_epi32(res1, offset_const); - if (w - j < 8) { - if (do_average) { - const __m128i data_0 = - _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j])); - const __m128i data_ref_0 = _mm_unpacklo_epi16(data_0, zero); - - const __m128i comp_avg_res = highbd_comp_avg_sse4_1( - &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg); - const __m128i round_result = highbd_convolve_rounding_sse2( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m128i res_16b = _mm_packus_epi32(round_result, round_result); - const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd); - _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip); - } else { - __m128i res_16b = _mm_packus_epi32(res_unsigned_lo, res_unsigned_lo); - _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res_16b); - } - } else { - __m128i res2 = _mm_unpackhi_epi32(res_even, res_odd); - __m128i res_unsigned_hi = _mm_add_epi32(res2, offset_const); - if (do_average) { - const __m128i data_0 = - _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j])); - const __m128i data_ref_0_lo = _mm_unpacklo_epi16(data_0, zero); - const __m128i data_ref_0_hi = _mm_unpackhi_epi16(data_0, zero); - - const __m128i comp_avg_res_lo = highbd_comp_avg_sse4_1( - &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg); - const __m128i comp_avg_res_hi = highbd_comp_avg_sse4_1( - &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg); - - const __m128i round_result_lo = highbd_convolve_rounding_sse2( - &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift); - const __m128i round_result_hi = highbd_convolve_rounding_sse2( - &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift); - - const __m128i res_16b = - _mm_packus_epi32(round_result_lo, round_result_hi); - const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd); - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip); - } else { - __m128i res_16b = _mm_packus_epi32(res_unsigned_lo, res_unsigned_hi); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_16b); - } - } - } - } -} diff --git a/third_party/aom/av1/common/x86/highbd_txfm_utility_sse4.h b/third_party/aom/av1/common/x86/highbd_txfm_utility_sse4.h deleted file mode 100644 index 6f24e5948..000000000 --- a/third_party/aom/av1/common/x86/highbd_txfm_utility_sse4.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_COMMON_X86_HIGHBD_TXFM_UTILITY_SSE4_H_ -#define AOM_AV1_COMMON_X86_HIGHBD_TXFM_UTILITY_SSE4_H_ - -#include <smmintrin.h> /* SSE4.1 */ - -#define TRANSPOSE_4X4(x0, x1, x2, x3, y0, y1, y2, y3) \ - do { \ - __m128i u0, u1, u2, u3; \ - u0 = _mm_unpacklo_epi32(x0, x1); \ - u1 = _mm_unpackhi_epi32(x0, x1); \ - u2 = _mm_unpacklo_epi32(x2, x3); \ - u3 = _mm_unpackhi_epi32(x2, x3); \ - y0 = _mm_unpacklo_epi64(u0, u2); \ - y1 = _mm_unpackhi_epi64(u0, u2); \ - y2 = _mm_unpacklo_epi64(u1, u3); \ - y3 = _mm_unpackhi_epi64(u1, u3); \ - } while (0) - -static INLINE void transpose_8x8(const __m128i *in, __m128i *out) { - TRANSPOSE_4X4(in[0], in[2], in[4], in[6], out[0], out[2], out[4], out[6]); - TRANSPOSE_4X4(in[1], in[3], in[5], in[7], out[8], out[10], out[12], out[14]); - TRANSPOSE_4X4(in[8], in[10], in[12], in[14], out[1], out[3], out[5], out[7]); - TRANSPOSE_4X4(in[9], in[11], in[13], in[15], out[9], out[11], out[13], - out[15]); -} - -static INLINE void transpose_16x16(const __m128i *in, __m128i *out) { - // Upper left 8x8 - TRANSPOSE_4X4(in[0], in[4], in[8], in[12], out[0], out[4], out[8], out[12]); - TRANSPOSE_4X4(in[1], in[5], in[9], in[13], out[16], out[20], out[24], - out[28]); - TRANSPOSE_4X4(in[16], in[20], in[24], in[28], out[1], out[5], out[9], - out[13]); - TRANSPOSE_4X4(in[17], in[21], in[25], in[29], out[17], out[21], out[25], - out[29]); - - // Upper right 8x8 - TRANSPOSE_4X4(in[2], in[6], in[10], in[14], out[32], out[36], out[40], - out[44]); - TRANSPOSE_4X4(in[3], in[7], in[11], in[15], out[48], out[52], out[56], - out[60]); - TRANSPOSE_4X4(in[18], in[22], in[26], in[30], out[33], out[37], out[41], - out[45]); - TRANSPOSE_4X4(in[19], in[23], in[27], in[31], out[49], out[53], out[57], - out[61]); - - // Lower left 8x8 - TRANSPOSE_4X4(in[32], in[36], in[40], in[44], out[2], out[6], out[10], - out[14]); - TRANSPOSE_4X4(in[33], in[37], in[41], in[45], out[18], out[22], out[26], - out[30]); - TRANSPOSE_4X4(in[48], in[52], in[56], in[60], out[3], out[7], out[11], - out[15]); - TRANSPOSE_4X4(in[49], in[53], in[57], in[61], out[19], out[23], out[27], - out[31]); - // Lower right 8x8 - TRANSPOSE_4X4(in[34], in[38], in[42], in[46], out[34], out[38], out[42], - out[46]); - TRANSPOSE_4X4(in[35], in[39], in[43], in[47], out[50], out[54], out[58], - out[62]); - TRANSPOSE_4X4(in[50], in[54], in[58], in[62], out[35], out[39], out[43], - out[47]); - TRANSPOSE_4X4(in[51], in[55], in[59], in[63], out[51], out[55], out[59], - out[63]); -} - -static INLINE void transpose_32x32(const __m128i *input, __m128i *output) { - for (int j = 0; j < 8; j++) { - for (int i = 0; i < 8; i++) { - TRANSPOSE_4X4(input[i * 32 + j + 0], input[i * 32 + j + 8], - input[i * 32 + j + 16], input[i * 32 + j + 24], - output[j * 32 + i + 0], output[j * 32 + i + 8], - output[j * 32 + i + 16], output[j * 32 + i + 24]); - } - } -} - -// Note: -// rounding = 1 << (bit - 1) -static INLINE __m128i half_btf_sse4_1(const __m128i *w0, const __m128i *n0, - const __m128i *w1, const __m128i *n1, - const __m128i *rounding, int bit) { - __m128i x, y; - - x = _mm_mullo_epi32(*w0, *n0); - y = _mm_mullo_epi32(*w1, *n1); - x = _mm_add_epi32(x, y); - x = _mm_add_epi32(x, *rounding); - x = _mm_srai_epi32(x, bit); - return x; -} - -static INLINE __m128i half_btf_0_sse4_1(const __m128i *w0, const __m128i *n0, - const __m128i *rounding, int bit) { - __m128i x; - - x = _mm_mullo_epi32(*w0, *n0); - x = _mm_add_epi32(x, *rounding); - x = _mm_srai_epi32(x, bit); - return x; -} - -typedef void (*transform_1d_sse4_1)(__m128i *in, __m128i *out, int bit, - int do_cols, int bd, int out_shift); - -typedef void (*fwd_transform_1d_sse4_1)(__m128i *in, __m128i *out, int bit, - const int num_cols); - -void av1_highbd_inv_txfm2d_add_universe_sse4_1(const int32_t *input, - uint8_t *output, int stride, - TX_TYPE tx_type, TX_SIZE tx_size, - int eob, const int bd); - -#endif // AOM_AV1_COMMON_X86_HIGHBD_TXFM_UTILITY_SSE4_H_ diff --git a/third_party/aom/av1/common/x86/highbd_warp_plane_sse4.c b/third_party/aom/av1/common/x86/highbd_warp_plane_sse4.c deleted file mode 100644 index 4bcab0564..000000000 --- a/third_party/aom/av1/common/x86/highbd_warp_plane_sse4.c +++ /dev/null @@ -1,624 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <smmintrin.h> - -#include "config/av1_rtcd.h" - -#include "av1/common/warped_motion.h" - -static const uint8_t warp_highbd_arrange_bytes[16] = { - 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 -}; - -static const uint8_t highbd_shuffle_alpha0_mask0[16] = { - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 -}; -static const uint8_t highbd_shuffle_alpha0_mask1[16] = { - 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7 -}; -static const uint8_t highbd_shuffle_alpha0_mask2[16] = { - 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11 -}; -static const uint8_t highbd_shuffle_alpha0_mask3[16] = { - 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15 -}; - -static INLINE void highbd_prepare_horizontal_filter_coeff(int alpha, int sx, - __m128i *coeff) { - // Filter even-index pixels - const __m128i tmp_0 = _mm_loadu_si128( - (__m128i *)(warped_filter + ((sx + 0 * alpha) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_2 = _mm_loadu_si128( - (__m128i *)(warped_filter + ((sx + 2 * alpha) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_4 = _mm_loadu_si128( - (__m128i *)(warped_filter + ((sx + 4 * alpha) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_6 = _mm_loadu_si128( - (__m128i *)(warped_filter + ((sx + 6 * alpha) >> WARPEDDIFF_PREC_BITS))); - - // coeffs 0 1 0 1 2 3 2 3 for pixels 0, 2 - const __m128i tmp_8 = _mm_unpacklo_epi32(tmp_0, tmp_2); - // coeffs 0 1 0 1 2 3 2 3 for pixels 4, 6 - const __m128i tmp_10 = _mm_unpacklo_epi32(tmp_4, tmp_6); - // coeffs 4 5 4 5 6 7 6 7 for pixels 0, 2 - const __m128i tmp_12 = _mm_unpackhi_epi32(tmp_0, tmp_2); - // coeffs 4 5 4 5 6 7 6 7 for pixels 4, 6 - const __m128i tmp_14 = _mm_unpackhi_epi32(tmp_4, tmp_6); - - // coeffs 0 1 0 1 0 1 0 1 for pixels 0, 2, 4, 6 - coeff[0] = _mm_unpacklo_epi64(tmp_8, tmp_10); - // coeffs 2 3 2 3 2 3 2 3 for pixels 0, 2, 4, 6 - coeff[2] = _mm_unpackhi_epi64(tmp_8, tmp_10); - // coeffs 4 5 4 5 4 5 4 5 for pixels 0, 2, 4, 6 - coeff[4] = _mm_unpacklo_epi64(tmp_12, tmp_14); - // coeffs 6 7 6 7 6 7 6 7 for pixels 0, 2, 4, 6 - coeff[6] = _mm_unpackhi_epi64(tmp_12, tmp_14); - - // Filter odd-index pixels - const __m128i tmp_1 = _mm_loadu_si128( - (__m128i *)(warped_filter + ((sx + 1 * alpha) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_3 = _mm_loadu_si128( - (__m128i *)(warped_filter + ((sx + 3 * alpha) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_5 = _mm_loadu_si128( - (__m128i *)(warped_filter + ((sx + 5 * alpha) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_7 = _mm_loadu_si128( - (__m128i *)(warped_filter + ((sx + 7 * alpha) >> WARPEDDIFF_PREC_BITS))); - - const __m128i tmp_9 = _mm_unpacklo_epi32(tmp_1, tmp_3); - const __m128i tmp_11 = _mm_unpacklo_epi32(tmp_5, tmp_7); - const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_1, tmp_3); - const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_5, tmp_7); - - coeff[1] = _mm_unpacklo_epi64(tmp_9, tmp_11); - coeff[3] = _mm_unpackhi_epi64(tmp_9, tmp_11); - coeff[5] = _mm_unpacklo_epi64(tmp_13, tmp_15); - coeff[7] = _mm_unpackhi_epi64(tmp_13, tmp_15); -} - -static INLINE void highbd_prepare_horizontal_filter_coeff_alpha0( - int sx, __m128i *coeff) { - // Filter coeff - const __m128i tmp_0 = _mm_loadu_si128( - (__m128i *)(warped_filter + (sx >> WARPEDDIFF_PREC_BITS))); - - coeff[0] = _mm_shuffle_epi8( - tmp_0, _mm_loadu_si128((__m128i *)highbd_shuffle_alpha0_mask0)); - coeff[2] = _mm_shuffle_epi8( - tmp_0, _mm_loadu_si128((__m128i *)highbd_shuffle_alpha0_mask1)); - coeff[4] = _mm_shuffle_epi8( - tmp_0, _mm_loadu_si128((__m128i *)highbd_shuffle_alpha0_mask2)); - coeff[6] = _mm_shuffle_epi8( - tmp_0, _mm_loadu_si128((__m128i *)highbd_shuffle_alpha0_mask3)); - - coeff[1] = coeff[0]; - coeff[3] = coeff[2]; - coeff[5] = coeff[4]; - coeff[7] = coeff[6]; -} - -static INLINE void highbd_filter_src_pixels( - const __m128i *src, const __m128i *src2, __m128i *tmp, __m128i *coeff, - const int offset_bits_horiz, const int reduce_bits_horiz, int k) { - const __m128i src_1 = *src; - const __m128i src2_1 = *src2; - - const __m128i round_const = _mm_set1_epi32((1 << offset_bits_horiz) + - ((1 << reduce_bits_horiz) >> 1)); - - const __m128i res_0 = _mm_madd_epi16(src_1, coeff[0]); - const __m128i res_2 = - _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 4), coeff[2]); - const __m128i res_4 = - _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 8), coeff[4]); - const __m128i res_6 = - _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 12), coeff[6]); - - __m128i res_even = - _mm_add_epi32(_mm_add_epi32(res_0, res_4), _mm_add_epi32(res_2, res_6)); - res_even = _mm_sra_epi32(_mm_add_epi32(res_even, round_const), - _mm_cvtsi32_si128(reduce_bits_horiz)); - - const __m128i res_1 = - _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 2), coeff[1]); - const __m128i res_3 = - _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 6), coeff[3]); - const __m128i res_5 = - _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 10), coeff[5]); - const __m128i res_7 = - _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 14), coeff[7]); - - __m128i res_odd = - _mm_add_epi32(_mm_add_epi32(res_1, res_5), _mm_add_epi32(res_3, res_7)); - res_odd = _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), - _mm_cvtsi32_si128(reduce_bits_horiz)); - - // Combine results into one register. - // We store the columns in the order 0, 2, 4, 6, 1, 3, 5, 7 - // as this order helps with the vertical filter. - tmp[k + 7] = _mm_packs_epi32(res_even, res_odd); -} - -static INLINE void highbd_horiz_filter(const __m128i *src, const __m128i *src2, - __m128i *tmp, int sx, int alpha, int k, - const int offset_bits_horiz, - const int reduce_bits_horiz) { - __m128i coeff[8]; - highbd_prepare_horizontal_filter_coeff(alpha, sx, coeff); - highbd_filter_src_pixels(src, src2, tmp, coeff, offset_bits_horiz, - reduce_bits_horiz, k); -} - -static INLINE void highbd_warp_horizontal_filter_alpha0_beta0( - const uint16_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4, - int32_t sx4, int alpha, int beta, int p_height, int height, int i, - const int offset_bits_horiz, const int reduce_bits_horiz) { - (void)beta; - (void)alpha; - int k; - - __m128i coeff[8]; - highbd_prepare_horizontal_filter_coeff_alpha0(sx4, coeff); - - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - - // Load source pixels - const __m128i src = - _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7)); - const __m128i src2 = - _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 + 1)); - highbd_filter_src_pixels(&src, &src2, tmp, coeff, offset_bits_horiz, - reduce_bits_horiz, k); - } -} - -static INLINE void highbd_warp_horizontal_filter_alpha0( - const uint16_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4, - int32_t sx4, int alpha, int beta, int p_height, int height, int i, - const int offset_bits_horiz, const int reduce_bits_horiz) { - (void)alpha; - int k; - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - int sx = sx4 + beta * (k + 4); - - // Load source pixels - const __m128i src = - _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7)); - const __m128i src2 = - _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 + 1)); - - __m128i coeff[8]; - highbd_prepare_horizontal_filter_coeff_alpha0(sx, coeff); - highbd_filter_src_pixels(&src, &src2, tmp, coeff, offset_bits_horiz, - reduce_bits_horiz, k); - } -} - -static INLINE void highbd_warp_horizontal_filter_beta0( - const uint16_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4, - int32_t sx4, int alpha, int beta, int p_height, int height, int i, - const int offset_bits_horiz, const int reduce_bits_horiz) { - (void)beta; - int k; - __m128i coeff[8]; - highbd_prepare_horizontal_filter_coeff(alpha, sx4, coeff); - - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - - // Load source pixels - const __m128i src = - _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7)); - const __m128i src2 = - _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 + 1)); - highbd_filter_src_pixels(&src, &src2, tmp, coeff, offset_bits_horiz, - reduce_bits_horiz, k); - } -} - -static INLINE void highbd_warp_horizontal_filter( - const uint16_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4, - int32_t sx4, int alpha, int beta, int p_height, int height, int i, - const int offset_bits_horiz, const int reduce_bits_horiz) { - int k; - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - int sx = sx4 + beta * (k + 4); - - // Load source pixels - const __m128i src = - _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7)); - const __m128i src2 = - _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 + 1)); - - highbd_horiz_filter(&src, &src2, tmp, sx, alpha, k, offset_bits_horiz, - reduce_bits_horiz); - } -} - -static INLINE void highbd_prepare_warp_horizontal_filter( - const uint16_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4, - int32_t sx4, int alpha, int beta, int p_height, int height, int i, - const int offset_bits_horiz, const int reduce_bits_horiz) { - if (alpha == 0 && beta == 0) - highbd_warp_horizontal_filter_alpha0_beta0( - ref, tmp, stride, ix4, iy4, sx4, alpha, beta, p_height, height, i, - offset_bits_horiz, reduce_bits_horiz); - - else if (alpha == 0 && beta != 0) - highbd_warp_horizontal_filter_alpha0(ref, tmp, stride, ix4, iy4, sx4, alpha, - beta, p_height, height, i, - offset_bits_horiz, reduce_bits_horiz); - - else if (alpha != 0 && beta == 0) - highbd_warp_horizontal_filter_beta0(ref, tmp, stride, ix4, iy4, sx4, alpha, - beta, p_height, height, i, - offset_bits_horiz, reduce_bits_horiz); - else - highbd_warp_horizontal_filter(ref, tmp, stride, ix4, iy4, sx4, alpha, beta, - p_height, height, i, offset_bits_horiz, - reduce_bits_horiz); -} - -void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, - int width, int height, int stride, - uint16_t *pred, int p_col, int p_row, - int p_width, int p_height, int p_stride, - int subsampling_x, int subsampling_y, int bd, - ConvolveParams *conv_params, int16_t alpha, - int16_t beta, int16_t gamma, int16_t delta) { - __m128i tmp[15]; - int i, j, k; - const int reduce_bits_horiz = - conv_params->round_0 + - AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0); - const int reduce_bits_vert = conv_params->is_compound - ? conv_params->round_1 - : 2 * FILTER_BITS - reduce_bits_horiz; - const int offset_bits_horiz = bd + FILTER_BITS - 1; - assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL)); - assert(!(bd == 12 && reduce_bits_horiz < 5)); - assert(IMPLIES(conv_params->do_average, conv_params->is_compound)); - - const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz; - const __m128i clip_pixel = - _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255)); - const __m128i reduce_bits_vert_shift = _mm_cvtsi32_si128(reduce_bits_vert); - const __m128i reduce_bits_vert_const = - _mm_set1_epi32(((1 << reduce_bits_vert) >> 1)); - const __m128i res_add_const = _mm_set1_epi32(1 << offset_bits_vert); - const int round_bits = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - const __m128i res_sub_const = - _mm_set1_epi32(-(1 << (offset_bits - conv_params->round_1)) - - (1 << (offset_bits - conv_params->round_1 - 1))); - __m128i round_bits_shift = _mm_cvtsi32_si128(round_bits); - __m128i round_bits_const = _mm_set1_epi32(((1 << round_bits) >> 1)); - - const int w0 = conv_params->fwd_offset; - const int w1 = conv_params->bck_offset; - const __m128i wt0 = _mm_set1_epi32(w0); - const __m128i wt1 = _mm_set1_epi32(w1); - - /* Note: For this code to work, the left/right frame borders need to be - extended by at least 13 pixels each. By the time we get here, other - code will have set up this border, but we allow an explicit check - for debugging purposes. - */ - /*for (i = 0; i < height; ++i) { - for (j = 0; j < 13; ++j) { - assert(ref[i * stride - 13 + j] == ref[i * stride]); - assert(ref[i * stride + width + j] == ref[i * stride + (width - 1)]); - } - }*/ - - for (i = 0; i < p_height; i += 8) { - for (j = 0; j < p_width; j += 8) { - const int32_t src_x = (p_col + j + 4) << subsampling_x; - const int32_t src_y = (p_row + i + 4) << subsampling_y; - const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0]; - const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1]; - const int32_t x4 = dst_x >> subsampling_x; - const int32_t y4 = dst_y >> subsampling_y; - - int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS; - int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); - int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS; - int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); - - // Add in all the constant terms, including rounding and offset - sx4 += alpha * (-4) + beta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) + - (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS); - sy4 += gamma * (-4) + delta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) + - (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS); - - sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); - sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); - - // Horizontal filter - // If the block is aligned such that, after clamping, every sample - // would be taken from the leftmost/rightmost column, then we can - // skip the expensive horizontal filter. - if (ix4 <= -7) { - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - tmp[k + 7] = _mm_set1_epi16( - (1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) + - ref[iy * stride] * (1 << (FILTER_BITS - reduce_bits_horiz))); - } - } else if (ix4 >= width + 6) { - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - tmp[k + 7] = - _mm_set1_epi16((1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) + - ref[iy * stride + (width - 1)] * - (1 << (FILTER_BITS - reduce_bits_horiz))); - } - } else if (((ix4 - 7) < 0) || ((ix4 + 9) > width)) { - const int out_of_boundary_left = -(ix4 - 6); - const int out_of_boundary_right = (ix4 + 8) - width; - - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - int sx = sx4 + beta * (k + 4); - - // Load source pixels - const __m128i src = - _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7)); - const __m128i src2 = - _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 + 1)); - - const __m128i src_01 = _mm_shuffle_epi8( - src, _mm_loadu_si128((__m128i *)warp_highbd_arrange_bytes)); - const __m128i src2_01 = _mm_shuffle_epi8( - src2, _mm_loadu_si128((__m128i *)warp_highbd_arrange_bytes)); - - __m128i src_lo = _mm_unpacklo_epi64(src_01, src2_01); - __m128i src_hi = _mm_unpackhi_epi64(src_01, src2_01); - - if (out_of_boundary_left >= 0) { - const __m128i shuffle_reg_left = - _mm_loadu_si128((__m128i *)warp_pad_left[out_of_boundary_left]); - src_lo = _mm_shuffle_epi8(src_lo, shuffle_reg_left); - src_hi = _mm_shuffle_epi8(src_hi, shuffle_reg_left); - } - - if (out_of_boundary_right >= 0) { - const __m128i shuffle_reg_right = _mm_loadu_si128( - (__m128i *)warp_pad_right[out_of_boundary_right]); - src_lo = _mm_shuffle_epi8(src_lo, shuffle_reg_right); - src_hi = _mm_shuffle_epi8(src_hi, shuffle_reg_right); - } - - const __m128i src_padded = _mm_unpacklo_epi8(src_lo, src_hi); - const __m128i src2_padded = _mm_unpackhi_epi8(src_lo, src_hi); - - highbd_horiz_filter(&src_padded, &src2_padded, tmp, sx, alpha, k, - offset_bits_horiz, reduce_bits_horiz); - } - } else { - highbd_prepare_warp_horizontal_filter( - ref, tmp, stride, ix4, iy4, sx4, alpha, beta, p_height, height, i, - offset_bits_horiz, reduce_bits_horiz); - } - - // Vertical filter - for (k = -4; k < AOMMIN(4, p_height - i - 4); ++k) { - int sy = sy4 + delta * (k + 4); - - // Load from tmp and rearrange pairs of consecutive rows into the - // column order 0 0 2 2 4 4 6 6; 1 1 3 3 5 5 7 7 - const __m128i *src = tmp + (k + 4); - const __m128i src_0 = _mm_unpacklo_epi16(src[0], src[1]); - const __m128i src_2 = _mm_unpacklo_epi16(src[2], src[3]); - const __m128i src_4 = _mm_unpacklo_epi16(src[4], src[5]); - const __m128i src_6 = _mm_unpacklo_epi16(src[6], src[7]); - - // Filter even-index pixels - const __m128i tmp_0 = _mm_loadu_si128( - (__m128i *)(warped_filter + - ((sy + 0 * gamma) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_2 = _mm_loadu_si128( - (__m128i *)(warped_filter + - ((sy + 2 * gamma) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_4 = _mm_loadu_si128( - (__m128i *)(warped_filter + - ((sy + 4 * gamma) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_6 = _mm_loadu_si128( - (__m128i *)(warped_filter + - ((sy + 6 * gamma) >> WARPEDDIFF_PREC_BITS))); - - const __m128i tmp_8 = _mm_unpacklo_epi32(tmp_0, tmp_2); - const __m128i tmp_10 = _mm_unpacklo_epi32(tmp_4, tmp_6); - const __m128i tmp_12 = _mm_unpackhi_epi32(tmp_0, tmp_2); - const __m128i tmp_14 = _mm_unpackhi_epi32(tmp_4, tmp_6); - - const __m128i coeff_0 = _mm_unpacklo_epi64(tmp_8, tmp_10); - const __m128i coeff_2 = _mm_unpackhi_epi64(tmp_8, tmp_10); - const __m128i coeff_4 = _mm_unpacklo_epi64(tmp_12, tmp_14); - const __m128i coeff_6 = _mm_unpackhi_epi64(tmp_12, tmp_14); - - const __m128i res_0 = _mm_madd_epi16(src_0, coeff_0); - const __m128i res_2 = _mm_madd_epi16(src_2, coeff_2); - const __m128i res_4 = _mm_madd_epi16(src_4, coeff_4); - const __m128i res_6 = _mm_madd_epi16(src_6, coeff_6); - - const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2), - _mm_add_epi32(res_4, res_6)); - - // Filter odd-index pixels - const __m128i src_1 = _mm_unpackhi_epi16(src[0], src[1]); - const __m128i src_3 = _mm_unpackhi_epi16(src[2], src[3]); - const __m128i src_5 = _mm_unpackhi_epi16(src[4], src[5]); - const __m128i src_7 = _mm_unpackhi_epi16(src[6], src[7]); - - const __m128i tmp_1 = _mm_loadu_si128( - (__m128i *)(warped_filter + - ((sy + 1 * gamma) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_3 = _mm_loadu_si128( - (__m128i *)(warped_filter + - ((sy + 3 * gamma) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_5 = _mm_loadu_si128( - (__m128i *)(warped_filter + - ((sy + 5 * gamma) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_7 = _mm_loadu_si128( - (__m128i *)(warped_filter + - ((sy + 7 * gamma) >> WARPEDDIFF_PREC_BITS))); - - const __m128i tmp_9 = _mm_unpacklo_epi32(tmp_1, tmp_3); - const __m128i tmp_11 = _mm_unpacklo_epi32(tmp_5, tmp_7); - const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_1, tmp_3); - const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_5, tmp_7); - - const __m128i coeff_1 = _mm_unpacklo_epi64(tmp_9, tmp_11); - const __m128i coeff_3 = _mm_unpackhi_epi64(tmp_9, tmp_11); - const __m128i coeff_5 = _mm_unpacklo_epi64(tmp_13, tmp_15); - const __m128i coeff_7 = _mm_unpackhi_epi64(tmp_13, tmp_15); - - const __m128i res_1 = _mm_madd_epi16(src_1, coeff_1); - const __m128i res_3 = _mm_madd_epi16(src_3, coeff_3); - const __m128i res_5 = _mm_madd_epi16(src_5, coeff_5); - const __m128i res_7 = _mm_madd_epi16(src_7, coeff_7); - - const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3), - _mm_add_epi32(res_5, res_7)); - - // Rearrange pixels back into the order 0 ... 7 - __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd); - __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd); - - if (conv_params->is_compound) { - __m128i *const p = - (__m128i *)&conv_params - ->dst[(i + k + 4) * conv_params->dst_stride + j]; - res_lo = _mm_add_epi32(res_lo, res_add_const); - res_lo = _mm_sra_epi32(_mm_add_epi32(res_lo, reduce_bits_vert_const), - reduce_bits_vert_shift); - - if (conv_params->do_average) { - __m128i *const dst16 = (__m128i *)&pred[(i + k + 4) * p_stride + j]; - __m128i p_32 = _mm_cvtepu16_epi32(_mm_loadl_epi64(p)); - - if (conv_params->use_jnt_comp_avg) { - res_lo = _mm_add_epi32(_mm_mullo_epi32(p_32, wt0), - _mm_mullo_epi32(res_lo, wt1)); - res_lo = _mm_srai_epi32(res_lo, DIST_PRECISION_BITS); - } else { - res_lo = _mm_srai_epi32(_mm_add_epi32(p_32, res_lo), 1); - } - - __m128i res32_lo = _mm_add_epi32(res_lo, res_sub_const); - res32_lo = _mm_sra_epi32(_mm_add_epi32(res32_lo, round_bits_const), - round_bits_shift); - - __m128i res16_lo = _mm_packus_epi32(res32_lo, res32_lo); - res16_lo = _mm_min_epi16(res16_lo, clip_pixel); - _mm_storel_epi64(dst16, res16_lo); - } else { - res_lo = _mm_packus_epi32(res_lo, res_lo); - _mm_storel_epi64(p, res_lo); - } - if (p_width > 4) { - __m128i *const p4 = - (__m128i *)&conv_params - ->dst[(i + k + 4) * conv_params->dst_stride + j + 4]; - - res_hi = _mm_add_epi32(res_hi, res_add_const); - res_hi = - _mm_sra_epi32(_mm_add_epi32(res_hi, reduce_bits_vert_const), - reduce_bits_vert_shift); - if (conv_params->do_average) { - __m128i *const dst16_4 = - (__m128i *)&pred[(i + k + 4) * p_stride + j + 4]; - __m128i p4_32 = _mm_cvtepu16_epi32(_mm_loadl_epi64(p4)); - - if (conv_params->use_jnt_comp_avg) { - res_hi = _mm_add_epi32(_mm_mullo_epi32(p4_32, wt0), - _mm_mullo_epi32(res_hi, wt1)); - res_hi = _mm_srai_epi32(res_hi, DIST_PRECISION_BITS); - } else { - res_hi = _mm_srai_epi32(_mm_add_epi32(p4_32, res_hi), 1); - } - - __m128i res32_hi = _mm_add_epi32(res_hi, res_sub_const); - res32_hi = _mm_sra_epi32( - _mm_add_epi32(res32_hi, round_bits_const), round_bits_shift); - __m128i res16_hi = _mm_packus_epi32(res32_hi, res32_hi); - res16_hi = _mm_min_epi16(res16_hi, clip_pixel); - _mm_storel_epi64(dst16_4, res16_hi); - } else { - res_hi = _mm_packus_epi32(res_hi, res_hi); - _mm_storel_epi64(p4, res_hi); - } - } - } else { - // Round and pack into 8 bits - const __m128i round_const = - _mm_set1_epi32(-(1 << (bd + reduce_bits_vert - 1)) + - ((1 << reduce_bits_vert) >> 1)); - - const __m128i res_lo_round = _mm_srai_epi32( - _mm_add_epi32(res_lo, round_const), reduce_bits_vert); - const __m128i res_hi_round = _mm_srai_epi32( - _mm_add_epi32(res_hi, round_const), reduce_bits_vert); - - __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round); - // Clamp res_16bit to the range [0, 2^bd - 1] - const __m128i max_val = _mm_set1_epi16((1 << bd) - 1); - const __m128i zero = _mm_setzero_si128(); - res_16bit = _mm_max_epi16(_mm_min_epi16(res_16bit, max_val), zero); - - // Store, blending with 'pred' if needed - __m128i *const p = (__m128i *)&pred[(i + k + 4) * p_stride + j]; - - // Note: If we're outputting a 4x4 block, we need to be very careful - // to only output 4 pixels at this point, to avoid encode/decode - // mismatches when encoding with multiple threads. - if (p_width == 4) { - _mm_storel_epi64(p, res_16bit); - } else { - _mm_storeu_si128(p, res_16bit); - } - } - } - } - } -} diff --git a/third_party/aom/av1/common/x86/highbd_wiener_convolve_avx2.c b/third_party/aom/av1/common/x86/highbd_wiener_convolve_avx2.c deleted file mode 100644 index 0c8a8505b..000000000 --- a/third_party/aom/av1/common/x86/highbd_wiener_convolve_avx2.c +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <immintrin.h> -#include <assert.h> - -#include "config/aom_dsp_rtcd.h" - -#include "av1/common/convolve.h" -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/aom_filter.h" -#include "aom_dsp/x86/synonyms.h" -#include "aom_dsp/x86/synonyms_avx2.h" - -// 128-bit xmmwords are written as [ ... ] with the MSB on the left. -// 256-bit ymmwords are written as two xmmwords, [ ... ][ ... ] with the MSB -// on the left. -// A row of, say, 16-bit pixels with values p0, p1, p2, ..., p14, p15 will be -// loaded and stored as [ p15 ... p9 p8 ][ p7 ... p1 p0 ]. -void av1_highbd_wiener_convolve_add_src_avx2( - const uint8_t *src8, ptrdiff_t src_stride, uint8_t *dst8, - ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, int w, int h, - const ConvolveParams *conv_params, int bd) { - assert(x_step_q4 == 16 && y_step_q4 == 16); - assert(!(w & 7)); - assert(bd + FILTER_BITS - conv_params->round_0 + 2 <= 16); - (void)x_step_q4; - (void)y_step_q4; - - const uint16_t *const src = CONVERT_TO_SHORTPTR(src8); - uint16_t *const dst = CONVERT_TO_SHORTPTR(dst8); - - DECLARE_ALIGNED(32, uint16_t, - temp[(MAX_SB_SIZE + SUBPEL_TAPS - 1) * MAX_SB_SIZE]); - int intermediate_height = h + SUBPEL_TAPS - 1; - const int center_tap = ((SUBPEL_TAPS - 1) / 2); - const uint16_t *const src_ptr = src - center_tap * src_stride - center_tap; - - const __m128i zero_128 = _mm_setzero_si128(); - const __m256i zero_256 = _mm256_setzero_si256(); - - // Add an offset to account for the "add_src" part of the convolve function. - const __m128i offset = _mm_insert_epi16(zero_128, 1 << FILTER_BITS, 3); - - const __m256i clamp_low = zero_256; - - /* Horizontal filter */ - { - const __m256i clamp_high_ep = - _mm256_set1_epi16(WIENER_CLAMP_LIMIT(conv_params->round_0, bd) - 1); - - // coeffs [ f7 f6 f5 f4 f3 f2 f1 f0 ] - const __m128i coeffs_x = _mm_add_epi16(xx_loadu_128(filter_x), offset); - - // coeffs [ f3 f2 f3 f2 f1 f0 f1 f0 ] - const __m128i coeffs_0123 = _mm_unpacklo_epi32(coeffs_x, coeffs_x); - // coeffs [ f7 f6 f7 f6 f5 f4 f5 f4 ] - const __m128i coeffs_4567 = _mm_unpackhi_epi32(coeffs_x, coeffs_x); - - // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ] - const __m128i coeffs_01_128 = _mm_unpacklo_epi64(coeffs_0123, coeffs_0123); - // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ] - const __m128i coeffs_23_128 = _mm_unpackhi_epi64(coeffs_0123, coeffs_0123); - // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ] - const __m128i coeffs_45_128 = _mm_unpacklo_epi64(coeffs_4567, coeffs_4567); - // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ] - const __m128i coeffs_67_128 = _mm_unpackhi_epi64(coeffs_4567, coeffs_4567); - - // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ][ f1 f0 f1 f0 f1 f0 f1 f0 ] - const __m256i coeffs_01 = yy_set_m128i(coeffs_01_128, coeffs_01_128); - // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ][ f3 f2 f3 f2 f3 f2 f3 f2 ] - const __m256i coeffs_23 = yy_set_m128i(coeffs_23_128, coeffs_23_128); - // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ][ f5 f4 f5 f4 f5 f4 f5 f4 ] - const __m256i coeffs_45 = yy_set_m128i(coeffs_45_128, coeffs_45_128); - // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ][ f7 f6 f7 f6 f7 f6 f7 f6 ] - const __m256i coeffs_67 = yy_set_m128i(coeffs_67_128, coeffs_67_128); - - const __m256i round_const = _mm256_set1_epi32( - (1 << (conv_params->round_0 - 1)) + (1 << (bd + FILTER_BITS - 1))); - - for (int i = 0; i < intermediate_height; ++i) { - for (int j = 0; j < w; j += 16) { - const uint16_t *src_ij = src_ptr + i * src_stride + j; - - // Load 16-bit src data - const __m256i src_0 = yy_loadu_256(src_ij + 0); - const __m256i src_1 = yy_loadu_256(src_ij + 1); - const __m256i src_2 = yy_loadu_256(src_ij + 2); - const __m256i src_3 = yy_loadu_256(src_ij + 3); - const __m256i src_4 = yy_loadu_256(src_ij + 4); - const __m256i src_5 = yy_loadu_256(src_ij + 5); - const __m256i src_6 = yy_loadu_256(src_ij + 6); - const __m256i src_7 = yy_loadu_256(src_ij + 7); - - // Multiply src data by filter coeffs and sum pairs - const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01); - const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01); - const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23); - const __m256i res_3 = _mm256_madd_epi16(src_3, coeffs_23); - const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45); - const __m256i res_5 = _mm256_madd_epi16(src_5, coeffs_45); - const __m256i res_6 = _mm256_madd_epi16(src_6, coeffs_67); - const __m256i res_7 = _mm256_madd_epi16(src_7, coeffs_67); - - // Calculate scalar product for even- and odd-indices separately, - // increasing to 32-bit precision - const __m256i res_even_sum = _mm256_add_epi32( - _mm256_add_epi32(res_0, res_4), _mm256_add_epi32(res_2, res_6)); - const __m256i res_even = _mm256_srai_epi32( - _mm256_add_epi32(res_even_sum, round_const), conv_params->round_0); - - const __m256i res_odd_sum = _mm256_add_epi32( - _mm256_add_epi32(res_1, res_5), _mm256_add_epi32(res_3, res_7)); - const __m256i res_odd = _mm256_srai_epi32( - _mm256_add_epi32(res_odd_sum, round_const), conv_params->round_0); - - // Reduce to 16-bit precision and pack even- and odd-index results - // back into one register. The _mm256_packs_epi32 intrinsic returns - // a register with the pixels ordered as follows: - // [ 15 13 11 9 14 12 10 8 ] [ 7 5 3 1 6 4 2 0 ] - const __m256i res = _mm256_packs_epi32(res_even, res_odd); - const __m256i res_clamped = - _mm256_min_epi16(_mm256_max_epi16(res, clamp_low), clamp_high_ep); - - // Store in a temporary array - yy_storeu_256(temp + i * MAX_SB_SIZE + j, res_clamped); - } - } - } - - /* Vertical filter */ - { - const __m256i clamp_high = _mm256_set1_epi16((1 << bd) - 1); - - // coeffs [ f7 f6 f5 f4 f3 f2 f1 f0 ] - const __m128i coeffs_y = _mm_add_epi16(xx_loadu_128(filter_y), offset); - - // coeffs [ f3 f2 f3 f2 f1 f0 f1 f0 ] - const __m128i coeffs_0123 = _mm_unpacklo_epi32(coeffs_y, coeffs_y); - // coeffs [ f7 f6 f7 f6 f5 f4 f5 f4 ] - const __m128i coeffs_4567 = _mm_unpackhi_epi32(coeffs_y, coeffs_y); - - // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ] - const __m128i coeffs_01_128 = _mm_unpacklo_epi64(coeffs_0123, coeffs_0123); - // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ] - const __m128i coeffs_23_128 = _mm_unpackhi_epi64(coeffs_0123, coeffs_0123); - // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ] - const __m128i coeffs_45_128 = _mm_unpacklo_epi64(coeffs_4567, coeffs_4567); - // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ] - const __m128i coeffs_67_128 = _mm_unpackhi_epi64(coeffs_4567, coeffs_4567); - - // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ][ f1 f0 f1 f0 f1 f0 f1 f0 ] - const __m256i coeffs_01 = yy_set_m128i(coeffs_01_128, coeffs_01_128); - // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ][ f3 f2 f3 f2 f3 f2 f3 f2 ] - const __m256i coeffs_23 = yy_set_m128i(coeffs_23_128, coeffs_23_128); - // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ][ f5 f4 f5 f4 f5 f4 f5 f4 ] - const __m256i coeffs_45 = yy_set_m128i(coeffs_45_128, coeffs_45_128); - // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ][ f7 f6 f7 f6 f7 f6 f7 f6 ] - const __m256i coeffs_67 = yy_set_m128i(coeffs_67_128, coeffs_67_128); - - const __m256i round_const = - _mm256_set1_epi32((1 << (conv_params->round_1 - 1)) - - (1 << (bd + conv_params->round_1 - 1))); - - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; j += 16) { - const uint16_t *temp_ij = temp + i * MAX_SB_SIZE + j; - - // Load 16-bit data from the output of the horizontal filter in - // which the pixels are ordered as follows: - // [ 15 13 11 9 14 12 10 8 ] [ 7 5 3 1 6 4 2 0 ] - const __m256i data_0 = yy_loadu_256(temp_ij + 0 * MAX_SB_SIZE); - const __m256i data_1 = yy_loadu_256(temp_ij + 1 * MAX_SB_SIZE); - const __m256i data_2 = yy_loadu_256(temp_ij + 2 * MAX_SB_SIZE); - const __m256i data_3 = yy_loadu_256(temp_ij + 3 * MAX_SB_SIZE); - const __m256i data_4 = yy_loadu_256(temp_ij + 4 * MAX_SB_SIZE); - const __m256i data_5 = yy_loadu_256(temp_ij + 5 * MAX_SB_SIZE); - const __m256i data_6 = yy_loadu_256(temp_ij + 6 * MAX_SB_SIZE); - const __m256i data_7 = yy_loadu_256(temp_ij + 7 * MAX_SB_SIZE); - - // Filter the even-indices, increasing to 32-bit precision - const __m256i src_0 = _mm256_unpacklo_epi16(data_0, data_1); - const __m256i src_2 = _mm256_unpacklo_epi16(data_2, data_3); - const __m256i src_4 = _mm256_unpacklo_epi16(data_4, data_5); - const __m256i src_6 = _mm256_unpacklo_epi16(data_6, data_7); - - const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01); - const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23); - const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45); - const __m256i res_6 = _mm256_madd_epi16(src_6, coeffs_67); - - const __m256i res_even = _mm256_add_epi32( - _mm256_add_epi32(res_0, res_2), _mm256_add_epi32(res_4, res_6)); - - // Filter the odd-indices, increasing to 32-bit precision - const __m256i src_1 = _mm256_unpackhi_epi16(data_0, data_1); - const __m256i src_3 = _mm256_unpackhi_epi16(data_2, data_3); - const __m256i src_5 = _mm256_unpackhi_epi16(data_4, data_5); - const __m256i src_7 = _mm256_unpackhi_epi16(data_6, data_7); - - const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01); - const __m256i res_3 = _mm256_madd_epi16(src_3, coeffs_23); - const __m256i res_5 = _mm256_madd_epi16(src_5, coeffs_45); - const __m256i res_7 = _mm256_madd_epi16(src_7, coeffs_67); - - const __m256i res_odd = _mm256_add_epi32( - _mm256_add_epi32(res_1, res_3), _mm256_add_epi32(res_5, res_7)); - - // Pixels are currently in the following order: - // res_even order: [ 14 12 10 8 ] [ 6 4 2 0 ] - // res_odd order: [ 15 13 11 9 ] [ 7 5 3 1 ] - // - // Rearrange the pixels into the following order: - // res_lo order: [ 11 10 9 8 ] [ 3 2 1 0 ] - // res_hi order: [ 15 14 13 12 ] [ 7 6 5 4 ] - const __m256i res_lo = _mm256_unpacklo_epi32(res_even, res_odd); - const __m256i res_hi = _mm256_unpackhi_epi32(res_even, res_odd); - - const __m256i res_lo_round = _mm256_srai_epi32( - _mm256_add_epi32(res_lo, round_const), conv_params->round_1); - const __m256i res_hi_round = _mm256_srai_epi32( - _mm256_add_epi32(res_hi, round_const), conv_params->round_1); - - // Reduce to 16-bit precision and pack into the correct order: - // [ 15 14 13 12 11 10 9 8 ][ 7 6 5 4 3 2 1 0 ] - const __m256i res_16bit = - _mm256_packs_epi32(res_lo_round, res_hi_round); - const __m256i res_16bit_clamped = _mm256_min_epi16( - _mm256_max_epi16(res_16bit, clamp_low), clamp_high); - - // Store in the dst array - yy_storeu_256(dst + i * dst_stride + j, res_16bit_clamped); - } - } - } -} diff --git a/third_party/aom/av1/common/x86/highbd_wiener_convolve_ssse3.c b/third_party/aom/av1/common/x86/highbd_wiener_convolve_ssse3.c deleted file mode 100644 index 818b1099c..000000000 --- a/third_party/aom/av1/common/x86/highbd_wiener_convolve_ssse3.c +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <tmmintrin.h> -#include <assert.h> - -#include "config/aom_dsp_rtcd.h" - -#include "av1/common/convolve.h" -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/aom_filter.h" - -void av1_highbd_wiener_convolve_add_src_ssse3( - const uint8_t *src8, ptrdiff_t src_stride, uint8_t *dst8, - ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, int w, int h, - const ConvolveParams *conv_params, int bd) { - assert(x_step_q4 == 16 && y_step_q4 == 16); - assert(!(w & 7)); - assert(bd + FILTER_BITS - conv_params->round_0 + 2 <= 16); - (void)x_step_q4; - (void)y_step_q4; - - const uint16_t *const src = CONVERT_TO_SHORTPTR(src8); - uint16_t *const dst = CONVERT_TO_SHORTPTR(dst8); - - DECLARE_ALIGNED(16, uint16_t, - temp[(MAX_SB_SIZE + SUBPEL_TAPS - 1) * MAX_SB_SIZE]); - int intermediate_height = h + SUBPEL_TAPS - 1; - int i, j; - const int center_tap = ((SUBPEL_TAPS - 1) / 2); - const uint16_t *const src_ptr = src - center_tap * src_stride - center_tap; - - const __m128i zero = _mm_setzero_si128(); - // Add an offset to account for the "add_src" part of the convolve function. - const __m128i offset = _mm_insert_epi16(zero, 1 << FILTER_BITS, 3); - - /* Horizontal filter */ - { - const __m128i coeffs_x = - _mm_add_epi16(_mm_loadu_si128((__m128i *)filter_x), offset); - - // coeffs 0 1 0 1 2 3 2 3 - const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x); - // coeffs 4 5 4 5 6 7 6 7 - const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x); - - // coeffs 0 1 0 1 0 1 0 1 - const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0); - // coeffs 2 3 2 3 2 3 2 3 - const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0); - // coeffs 4 5 4 5 4 5 4 5 - const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1); - // coeffs 6 7 6 7 6 7 6 7 - const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1); - - const __m128i round_const = _mm_set1_epi32( - (1 << (conv_params->round_0 - 1)) + (1 << (bd + FILTER_BITS - 1))); - - for (i = 0; i < intermediate_height; ++i) { - for (j = 0; j < w; j += 8) { - const __m128i data = - _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]); - const __m128i data2 = - _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j + 8]); - - // Filter even-index pixels - const __m128i res_0 = _mm_madd_epi16(data, coeff_01); - const __m128i res_2 = - _mm_madd_epi16(_mm_alignr_epi8(data2, data, 4), coeff_23); - const __m128i res_4 = - _mm_madd_epi16(_mm_alignr_epi8(data2, data, 8), coeff_45); - const __m128i res_6 = - _mm_madd_epi16(_mm_alignr_epi8(data2, data, 12), coeff_67); - - __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4), - _mm_add_epi32(res_2, res_6)); - res_even = _mm_srai_epi32(_mm_add_epi32(res_even, round_const), - conv_params->round_0); - - // Filter odd-index pixels - const __m128i res_1 = - _mm_madd_epi16(_mm_alignr_epi8(data2, data, 2), coeff_01); - const __m128i res_3 = - _mm_madd_epi16(_mm_alignr_epi8(data2, data, 6), coeff_23); - const __m128i res_5 = - _mm_madd_epi16(_mm_alignr_epi8(data2, data, 10), coeff_45); - const __m128i res_7 = - _mm_madd_epi16(_mm_alignr_epi8(data2, data, 14), coeff_67); - - __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5), - _mm_add_epi32(res_3, res_7)); - res_odd = _mm_srai_epi32(_mm_add_epi32(res_odd, round_const), - conv_params->round_0); - - // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7 - const __m128i maxval = - _mm_set1_epi16((WIENER_CLAMP_LIMIT(conv_params->round_0, bd)) - 1); - __m128i res = _mm_packs_epi32(res_even, res_odd); - res = _mm_min_epi16(_mm_max_epi16(res, zero), maxval); - _mm_storeu_si128((__m128i *)&temp[i * MAX_SB_SIZE + j], res); - } - } - } - - /* Vertical filter */ - { - const __m128i coeffs_y = - _mm_add_epi16(_mm_loadu_si128((__m128i *)filter_y), offset); - - // coeffs 0 1 0 1 2 3 2 3 - const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y); - // coeffs 4 5 4 5 6 7 6 7 - const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y); - - // coeffs 0 1 0 1 0 1 0 1 - const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0); - // coeffs 2 3 2 3 2 3 2 3 - const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0); - // coeffs 4 5 4 5 4 5 4 5 - const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1); - // coeffs 6 7 6 7 6 7 6 7 - const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1); - - const __m128i round_const = - _mm_set1_epi32((1 << (conv_params->round_1 - 1)) - - (1 << (bd + conv_params->round_1 - 1))); - - for (i = 0; i < h; ++i) { - for (j = 0; j < w; j += 8) { - // Filter even-index pixels - const uint16_t *data = &temp[i * MAX_SB_SIZE + j]; - const __m128i src_0 = - _mm_unpacklo_epi16(*(__m128i *)(data + 0 * MAX_SB_SIZE), - *(__m128i *)(data + 1 * MAX_SB_SIZE)); - const __m128i src_2 = - _mm_unpacklo_epi16(*(__m128i *)(data + 2 * MAX_SB_SIZE), - *(__m128i *)(data + 3 * MAX_SB_SIZE)); - const __m128i src_4 = - _mm_unpacklo_epi16(*(__m128i *)(data + 4 * MAX_SB_SIZE), - *(__m128i *)(data + 5 * MAX_SB_SIZE)); - const __m128i src_6 = - _mm_unpacklo_epi16(*(__m128i *)(data + 6 * MAX_SB_SIZE), - *(__m128i *)(data + 7 * MAX_SB_SIZE)); - - const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01); - const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23); - const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); - const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67); - - const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2), - _mm_add_epi32(res_4, res_6)); - - // Filter odd-index pixels - const __m128i src_1 = - _mm_unpackhi_epi16(*(__m128i *)(data + 0 * MAX_SB_SIZE), - *(__m128i *)(data + 1 * MAX_SB_SIZE)); - const __m128i src_3 = - _mm_unpackhi_epi16(*(__m128i *)(data + 2 * MAX_SB_SIZE), - *(__m128i *)(data + 3 * MAX_SB_SIZE)); - const __m128i src_5 = - _mm_unpackhi_epi16(*(__m128i *)(data + 4 * MAX_SB_SIZE), - *(__m128i *)(data + 5 * MAX_SB_SIZE)); - const __m128i src_7 = - _mm_unpackhi_epi16(*(__m128i *)(data + 6 * MAX_SB_SIZE), - *(__m128i *)(data + 7 * MAX_SB_SIZE)); - - const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); - const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23); - const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45); - const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67); - - const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3), - _mm_add_epi32(res_5, res_7)); - - // Rearrange pixels back into the order 0 ... 7 - const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd); - const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd); - - const __m128i res_lo_round = _mm_srai_epi32( - _mm_add_epi32(res_lo, round_const), conv_params->round_1); - const __m128i res_hi_round = _mm_srai_epi32( - _mm_add_epi32(res_hi, round_const), conv_params->round_1); - - const __m128i maxval = _mm_set1_epi16((1 << bd) - 1); - __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round); - res_16bit = _mm_min_epi16(_mm_max_epi16(res_16bit, zero), maxval); - - __m128i *const p = (__m128i *)&dst[i * dst_stride + j]; - _mm_storeu_si128(p, res_16bit); - } - } - } -} diff --git a/third_party/aom/av1/common/x86/intra_edge_sse4.c b/third_party/aom/av1/common/x86/intra_edge_sse4.c deleted file mode 100644 index 0c857b583..000000000 --- a/third_party/aom/av1/common/x86/intra_edge_sse4.c +++ /dev/null @@ -1,318 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <assert.h> -#include <smmintrin.h> - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" - -void av1_filter_intra_edge_sse4_1(uint8_t *p, int sz, int strength) { - if (!strength) return; - - DECLARE_ALIGNED(16, static const int8_t, kern[3][16]) = { - { 4, 8, 4, 0, 4, 8, 4, 0, 4, 8, 4, 0, 4, 8, 4, 0 }, // strength 1: 4,8,4 - { 5, 6, 5, 0, 5, 6, 5, 0, 5, 6, 5, 0, 5, 6, 5, 0 }, // strength 2: 5,6,5 - { 2, 4, 4, 4, 2, 0, 0, 0, 2, 4, 4, 4, 2, 0, 0, 0 } // strength 3: 2,4,4,4,2 - }; - - DECLARE_ALIGNED(16, static const int8_t, v_const[5][16]) = { - { 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 }, - { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 }, - { 0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 8 }, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - }; - - // Extend the first and last samples to simplify the loop for the 5-tap case - p[-1] = p[0]; - __m128i last = _mm_set1_epi8(p[sz - 1]); - _mm_storeu_si128((__m128i *)&p[sz], last); - - // Adjust input pointer for filter support area - uint8_t *in = (strength == 3) ? p - 1 : p; - - // Avoid modifying first sample - uint8_t *out = p + 1; - int len = sz - 1; - - const int use_3tap_filter = (strength < 3); - - if (use_3tap_filter) { - __m128i coef0 = _mm_lddqu_si128((__m128i const *)kern[strength - 1]); - __m128i shuf0 = _mm_lddqu_si128((__m128i const *)v_const[0]); - __m128i shuf1 = _mm_lddqu_si128((__m128i const *)v_const[1]); - __m128i iden = _mm_lddqu_si128((__m128i *)v_const[3]); - __m128i in0 = _mm_lddqu_si128((__m128i *)in); - while (len > 0) { - int n_out = (len < 8) ? len : 8; - __m128i d0 = _mm_shuffle_epi8(in0, shuf0); - __m128i d1 = _mm_shuffle_epi8(in0, shuf1); - d0 = _mm_maddubs_epi16(d0, coef0); - d1 = _mm_maddubs_epi16(d1, coef0); - d0 = _mm_hadd_epi16(d0, d1); - __m128i eight = _mm_set1_epi16(8); - d0 = _mm_add_epi16(d0, eight); - d0 = _mm_srai_epi16(d0, 4); - d0 = _mm_packus_epi16(d0, d0); - __m128i out0 = _mm_lddqu_si128((__m128i *)out); - __m128i n0 = _mm_set1_epi8(n_out); - __m128i mask = _mm_cmpgt_epi8(n0, iden); - out0 = _mm_blendv_epi8(out0, d0, mask); - _mm_storel_epi64((__m128i *)out, out0); - __m128i in1 = _mm_lddqu_si128((__m128i *)(in + 16)); - in0 = _mm_alignr_epi8(in1, in0, 8); - in += 8; - out += 8; - len -= n_out; - } - } else { // 5-tap filter - __m128i coef0 = _mm_lddqu_si128((__m128i const *)kern[strength - 1]); - __m128i two = _mm_set1_epi8(2); - __m128i shuf_a = _mm_lddqu_si128((__m128i const *)v_const[2]); - __m128i shuf_b = _mm_add_epi8(shuf_a, two); - __m128i shuf_c = _mm_add_epi8(shuf_b, two); - __m128i shuf_d = _mm_add_epi8(shuf_c, two); - __m128i iden = _mm_lddqu_si128((__m128i *)v_const[3]); - __m128i in0 = _mm_lddqu_si128((__m128i *)in); - while (len > 0) { - int n_out = (len < 8) ? len : 8; - __m128i d0 = _mm_shuffle_epi8(in0, shuf_a); - __m128i d1 = _mm_shuffle_epi8(in0, shuf_b); - __m128i d2 = _mm_shuffle_epi8(in0, shuf_c); - __m128i d3 = _mm_shuffle_epi8(in0, shuf_d); - d0 = _mm_maddubs_epi16(d0, coef0); - d1 = _mm_maddubs_epi16(d1, coef0); - d2 = _mm_maddubs_epi16(d2, coef0); - d3 = _mm_maddubs_epi16(d3, coef0); - d0 = _mm_hadd_epi16(d0, d1); - d2 = _mm_hadd_epi16(d2, d3); - d0 = _mm_hadd_epi16(d0, d2); - __m128i eight = _mm_set1_epi16(8); - d0 = _mm_add_epi16(d0, eight); - d0 = _mm_srai_epi16(d0, 4); - d0 = _mm_packus_epi16(d0, d0); - __m128i out0 = _mm_lddqu_si128((__m128i *)out); - __m128i n0 = _mm_set1_epi8(n_out); - __m128i mask = _mm_cmpgt_epi8(n0, iden); - out0 = _mm_blendv_epi8(out0, d0, mask); - _mm_storel_epi64((__m128i *)out, out0); - __m128i in1 = _mm_lddqu_si128((__m128i *)(in + 16)); - in0 = _mm_alignr_epi8(in1, in0, 8); - in += 8; - out += 8; - len -= n_out; - } - } -} - -void av1_filter_intra_edge_high_sse4_1(uint16_t *p, int sz, int strength) { - if (!strength) return; - - DECLARE_ALIGNED(16, static const int16_t, kern[3][8]) = { - { 4, 8, 4, 8, 4, 8, 4, 8 }, // strength 1: 4,8,4 - { 5, 6, 5, 6, 5, 6, 5, 6 }, // strength 2: 5,6,5 - { 2, 4, 2, 4, 2, 4, 2, 4 } // strength 3: 2,4,4,4,2 - }; - - DECLARE_ALIGNED(16, static const int16_t, - v_const[1][8]) = { { 0, 1, 2, 3, 4, 5, 6, 7 } }; - - // Extend the first and last samples to simplify the loop for the 5-tap case - p[-1] = p[0]; - __m128i last = _mm_set1_epi16(p[sz - 1]); - _mm_storeu_si128((__m128i *)&p[sz], last); - - // Adjust input pointer for filter support area - uint16_t *in = (strength == 3) ? p - 1 : p; - - // Avoid modifying first sample - uint16_t *out = p + 1; - int len = sz - 1; - - const int use_3tap_filter = (strength < 3); - - if (use_3tap_filter) { - __m128i coef0 = _mm_lddqu_si128((__m128i const *)kern[strength - 1]); - __m128i iden = _mm_lddqu_si128((__m128i *)v_const[0]); - __m128i in0 = _mm_lddqu_si128((__m128i *)&in[0]); - __m128i in8 = _mm_lddqu_si128((__m128i *)&in[8]); - while (len > 0) { - int n_out = (len < 8) ? len : 8; - __m128i in1 = _mm_alignr_epi8(in8, in0, 2); - __m128i in2 = _mm_alignr_epi8(in8, in0, 4); - __m128i in02 = _mm_add_epi16(in0, in2); - __m128i d0 = _mm_unpacklo_epi16(in02, in1); - __m128i d1 = _mm_unpackhi_epi16(in02, in1); - d0 = _mm_mullo_epi16(d0, coef0); - d1 = _mm_mullo_epi16(d1, coef0); - d0 = _mm_hadd_epi16(d0, d1); - __m128i eight = _mm_set1_epi16(8); - d0 = _mm_add_epi16(d0, eight); - d0 = _mm_srli_epi16(d0, 4); - __m128i out0 = _mm_lddqu_si128((__m128i *)out); - __m128i n0 = _mm_set1_epi16(n_out); - __m128i mask = _mm_cmpgt_epi16(n0, iden); - out0 = _mm_blendv_epi8(out0, d0, mask); - _mm_storeu_si128((__m128i *)out, out0); - in += 8; - in0 = in8; - in8 = _mm_lddqu_si128((__m128i *)&in[8]); - out += 8; - len -= n_out; - } - } else { // 5-tap filter - __m128i coef0 = _mm_lddqu_si128((__m128i const *)kern[strength - 1]); - __m128i iden = _mm_lddqu_si128((__m128i *)v_const[0]); - __m128i in0 = _mm_lddqu_si128((__m128i *)&in[0]); - __m128i in8 = _mm_lddqu_si128((__m128i *)&in[8]); - while (len > 0) { - int n_out = (len < 8) ? len : 8; - __m128i in1 = _mm_alignr_epi8(in8, in0, 2); - __m128i in2 = _mm_alignr_epi8(in8, in0, 4); - __m128i in3 = _mm_alignr_epi8(in8, in0, 6); - __m128i in4 = _mm_alignr_epi8(in8, in0, 8); - __m128i in04 = _mm_add_epi16(in0, in4); - __m128i in123 = _mm_add_epi16(in1, in2); - in123 = _mm_add_epi16(in123, in3); - __m128i d0 = _mm_unpacklo_epi16(in04, in123); - __m128i d1 = _mm_unpackhi_epi16(in04, in123); - d0 = _mm_mullo_epi16(d0, coef0); - d1 = _mm_mullo_epi16(d1, coef0); - d0 = _mm_hadd_epi16(d0, d1); - __m128i eight = _mm_set1_epi16(8); - d0 = _mm_add_epi16(d0, eight); - d0 = _mm_srli_epi16(d0, 4); - __m128i out0 = _mm_lddqu_si128((__m128i *)out); - __m128i n0 = _mm_set1_epi16(n_out); - __m128i mask = _mm_cmpgt_epi16(n0, iden); - out0 = _mm_blendv_epi8(out0, d0, mask); - _mm_storeu_si128((__m128i *)out, out0); - in += 8; - in0 = in8; - in8 = _mm_lddqu_si128((__m128i *)&in[8]); - out += 8; - len -= n_out; - } - } -} - -void av1_upsample_intra_edge_sse4_1(uint8_t *p, int sz) { - // interpolate half-sample positions - assert(sz <= 24); - - DECLARE_ALIGNED(16, static const int8_t, kernel[1][16]) = { - { -1, 9, 9, -1, -1, 9, 9, -1, -1, 9, 9, -1, -1, 9, 9, -1 } - }; - - DECLARE_ALIGNED(16, static const int8_t, v_const[2][16]) = { - { 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 }, - { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 } - }; - - // Extend first/last samples (upper-left p[-1], last p[sz-1]) - // to support 4-tap filter - p[-2] = p[-1]; - p[sz] = p[sz - 1]; - - uint8_t *in = &p[-2]; - uint8_t *out = &p[-2]; - - int n = sz + 1; // Input length including upper-left sample - - __m128i in0 = _mm_lddqu_si128((__m128i *)&in[0]); - __m128i in16 = _mm_lddqu_si128((__m128i *)&in[16]); - - __m128i coef0 = _mm_lddqu_si128((__m128i *)kernel[0]); - __m128i shuf0 = _mm_lddqu_si128((__m128i *)v_const[0]); - __m128i shuf1 = _mm_lddqu_si128((__m128i *)v_const[1]); - - while (n > 0) { - __m128i in8 = _mm_alignr_epi8(in16, in0, 8); - __m128i d0 = _mm_shuffle_epi8(in0, shuf0); - __m128i d1 = _mm_shuffle_epi8(in0, shuf1); - __m128i d2 = _mm_shuffle_epi8(in8, shuf0); - __m128i d3 = _mm_shuffle_epi8(in8, shuf1); - d0 = _mm_maddubs_epi16(d0, coef0); - d1 = _mm_maddubs_epi16(d1, coef0); - d2 = _mm_maddubs_epi16(d2, coef0); - d3 = _mm_maddubs_epi16(d3, coef0); - d0 = _mm_hadd_epi16(d0, d1); - d2 = _mm_hadd_epi16(d2, d3); - __m128i eight = _mm_set1_epi16(8); - d0 = _mm_add_epi16(d0, eight); - d2 = _mm_add_epi16(d2, eight); - d0 = _mm_srai_epi16(d0, 4); - d2 = _mm_srai_epi16(d2, 4); - d0 = _mm_packus_epi16(d0, d2); - __m128i in1 = _mm_alignr_epi8(in16, in0, 1); - __m128i out0 = _mm_unpacklo_epi8(in1, d0); - __m128i out1 = _mm_unpackhi_epi8(in1, d0); - _mm_storeu_si128((__m128i *)&out[0], out0); - _mm_storeu_si128((__m128i *)&out[16], out1); - in0 = in16; - in16 = _mm_setzero_si128(); - out += 32; - n -= 16; - } -} - -void av1_upsample_intra_edge_high_sse4_1(uint16_t *p, int sz, int bd) { - // interpolate half-sample positions - assert(sz <= 24); - - DECLARE_ALIGNED(16, static const int16_t, - kernel[1][8]) = { { -1, 9, -1, 9, -1, 9, -1, 9 } }; - - // Extend first/last samples (upper-left p[-1], last p[sz-1]) - // to support 4-tap filter - p[-2] = p[-1]; - p[sz] = p[sz - 1]; - - uint16_t *in = &p[-2]; - uint16_t *out = in; - int n = sz + 1; - - __m128i in0 = _mm_lddqu_si128((__m128i *)&in[0]); - __m128i in8 = _mm_lddqu_si128((__m128i *)&in[8]); - __m128i in16 = _mm_lddqu_si128((__m128i *)&in[16]); - __m128i in24 = _mm_lddqu_si128((__m128i *)&in[24]); - - while (n > 0) { - __m128i in1 = _mm_alignr_epi8(in8, in0, 2); - __m128i in2 = _mm_alignr_epi8(in8, in0, 4); - __m128i in3 = _mm_alignr_epi8(in8, in0, 6); - __m128i sum0 = _mm_add_epi16(in0, in3); - __m128i sum1 = _mm_add_epi16(in1, in2); - __m128i d0 = _mm_unpacklo_epi16(sum0, sum1); - __m128i d1 = _mm_unpackhi_epi16(sum0, sum1); - __m128i coef0 = _mm_lddqu_si128((__m128i *)kernel[0]); - d0 = _mm_madd_epi16(d0, coef0); - d1 = _mm_madd_epi16(d1, coef0); - __m128i eight = _mm_set1_epi32(8); - d0 = _mm_add_epi32(d0, eight); - d1 = _mm_add_epi32(d1, eight); - d0 = _mm_srai_epi32(d0, 4); - d1 = _mm_srai_epi32(d1, 4); - d0 = _mm_packus_epi32(d0, d1); - __m128i max0 = _mm_set1_epi16((1 << bd) - 1); - d0 = _mm_min_epi16(d0, max0); - __m128i out0 = _mm_unpacklo_epi16(in1, d0); - __m128i out1 = _mm_unpackhi_epi16(in1, d0); - _mm_storeu_si128((__m128i *)&out[0], out0); - _mm_storeu_si128((__m128i *)&out[8], out1); - in0 = in8; - in8 = in16; - in16 = in24; - in24 = _mm_setzero_si128(); - out += 16; - n -= 8; - } -} diff --git a/third_party/aom/av1/common/x86/jnt_convolve_avx2.c b/third_party/aom/av1/common/x86/jnt_convolve_avx2.c deleted file mode 100644 index 9f2e2b457..000000000 --- a/third_party/aom/av1/common/x86/jnt_convolve_avx2.c +++ /dev/null @@ -1,633 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <immintrin.h> - -#include "config/aom_dsp_rtcd.h" - -#include "aom_dsp/x86/convolve_avx2.h" -#include "aom_dsp/x86/convolve_common_intrin.h" -#include "aom_dsp/x86/convolve_sse4_1.h" -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/aom_filter.h" -#include "av1/common/convolve.h" - -static INLINE __m256i unpack_weights_avx2(ConvolveParams *conv_params) { - const int w0 = conv_params->fwd_offset; - const int w1 = conv_params->bck_offset; - const __m256i wt0 = _mm256_set1_epi16(w0); - const __m256i wt1 = _mm256_set1_epi16(w1); - const __m256i wt = _mm256_unpacklo_epi16(wt0, wt1); - return wt; -} - -static INLINE __m256i load_line2_avx2(const void *a, const void *b) { - return _mm256_permute2x128_si256( - _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)a)), - _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)b)), 0x20); -} - -void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst0, - int dst_stride0, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - const int bd = 8; - int i, j; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const uint8_t *const src_ptr = src - fo_horiz; - const int bits = FILTER_BITS - conv_params->round_1; - const __m256i wt = unpack_weights_avx2(conv_params); - const int do_average = conv_params->do_average; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - const int offset_0 = - bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset = (1 << offset_0) + (1 << (offset_0 - 1)); - const __m256i offset_const = _mm256_set1_epi16(offset); - const int rounding_shift = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const __m256i rounding_const = _mm256_set1_epi16((1 << rounding_shift) >> 1); - __m256i filt[4], coeffs[4]; - - assert(bits >= 0); - assert(conv_params->round_0 > 0); - - filt[0] = _mm256_load_si256((__m256i const *)filt_global_avx2); - filt[1] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32)); - filt[2] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 2)); - filt[3] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 3)); - - prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs); - - const __m256i round_const = - _mm256_set1_epi16((1 << (conv_params->round_0 - 1)) >> 1); - const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0 - 1); - - (void)filter_params_y; - (void)subpel_y_q4; - - for (i = 0; i < h; i += 2) { - const uint8_t *src_data = src_ptr + i * src_stride; - CONV_BUF_TYPE *dst_data = dst + i * dst_stride; - for (j = 0; j < w; j += 8) { - const __m256i data = - load_line2_avx2(&src_data[j], &src_data[j + src_stride]); - - __m256i res = convolve_lowbd_x(data, coeffs, filt); - - res = _mm256_sra_epi16(_mm256_add_epi16(res, round_const), round_shift); - - res = _mm256_slli_epi16(res, bits); - - const __m256i res_unsigned = _mm256_add_epi16(res, offset_const); - - // Accumulate values into the destination buffer - if (do_average) { - const __m256i data_ref_0 = - load_line2_avx2(&dst_data[j], &dst_data[j + dst_stride]); - const __m256i comp_avg_res = - comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg); - - const __m256i round_result = convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m256i res_8 = _mm256_packus_epi16(round_result, round_result); - const __m128i res_0 = _mm256_castsi256_si128(res_8); - const __m128i res_1 = _mm256_extracti128_si256(res_8, 1); - - if (w > 4) { - _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_storel_epi64( - (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1); - } else { - *(uint32_t *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0); - *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) = - _mm_cvtsi128_si32(res_1); - } - } else { - const __m128i res_0 = _mm256_castsi256_si128(res_unsigned); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0); - - const __m128i res_1 = _mm256_extracti128_si256(res_unsigned, 1); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); - } - } - } -} - -void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst0, - int dst_stride0, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - const int bd = 8; - int i, j; - const int fo_vert = filter_params_y->taps / 2 - 1; - const uint8_t *const src_ptr = src - fo_vert * src_stride; - // +1 to compensate for dividing the filter coeffs by 2 - const int left_shift = FILTER_BITS - conv_params->round_0 + 1; - const __m256i round_const = - _mm256_set1_epi32((1 << conv_params->round_1) >> 1); - const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1); - const __m256i wt = unpack_weights_avx2(conv_params); - const int do_average = conv_params->do_average; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - const int offset_0 = - bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset = (1 << offset_0) + (1 << (offset_0 - 1)); - const __m256i offset_const = _mm256_set1_epi16(offset); - const int offset_1 = (1 << (bd + FILTER_BITS - 2)); - const __m256i offset_const_1 = _mm256_set1_epi16(offset_1); - const __m256i offset_const_2 = _mm256_set1_epi16((1 << offset_0)); - const int rounding_shift = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const __m256i rounding_const = _mm256_set1_epi16((1 << rounding_shift) >> 1); - const __m256i zero = _mm256_setzero_si256(); - __m256i coeffs[4], s[8]; - - assert((FILTER_BITS - conv_params->round_0) >= 0); - - prepare_coeffs_lowbd(filter_params_y, subpel_y_q4, coeffs); - - (void)conv_params; - (void)filter_params_x; - (void)subpel_x_q4; - - for (j = 0; j < w; j += 16) { - const uint8_t *data = &src_ptr[j]; - __m256i src6; - // Load lines a and b. Line a to lower 128, line b to upper 128 - { - __m256i src_ab[7]; - __m256i src_a[7]; - src_a[0] = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)data)); - for (int kk = 0; kk < 6; ++kk) { - data += src_stride; - src_a[kk + 1] = - _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)data)); - src_ab[kk] = _mm256_permute2x128_si256(src_a[kk], src_a[kk + 1], 0x20); - } - src6 = src_a[6]; - s[0] = _mm256_unpacklo_epi8(src_ab[0], src_ab[1]); - s[1] = _mm256_unpacklo_epi8(src_ab[2], src_ab[3]); - s[2] = _mm256_unpacklo_epi8(src_ab[4], src_ab[5]); - s[4] = _mm256_unpackhi_epi8(src_ab[0], src_ab[1]); - s[5] = _mm256_unpackhi_epi8(src_ab[2], src_ab[3]); - s[6] = _mm256_unpackhi_epi8(src_ab[4], src_ab[5]); - } - - for (i = 0; i < h; i += 2) { - data = &src_ptr[(i + 7) * src_stride + j]; - const __m256i src7 = - _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)data)); - const __m256i src_67a = _mm256_permute2x128_si256(src6, src7, 0x20); - - src6 = _mm256_castsi128_si256( - _mm_loadu_si128((__m128i *)(data + src_stride))); - const __m256i src_78a = _mm256_permute2x128_si256(src7, src6, 0x20); - - s[3] = _mm256_unpacklo_epi8(src_67a, src_78a); - s[7] = _mm256_unpackhi_epi8(src_67a, src_78a); - - __m256i res_lo = convolve_lowbd(s, coeffs); - - res_lo = _mm256_add_epi16(res_lo, offset_const_1); - - const __m256i res_lo_0_32b = _mm256_unpacklo_epi16(res_lo, zero); - const __m256i res_lo_0_shift = - _mm256_slli_epi32(res_lo_0_32b, left_shift); - const __m256i res_lo_0_round = _mm256_sra_epi32( - _mm256_add_epi32(res_lo_0_shift, round_const), round_shift); - - const __m256i res_lo_1_32b = _mm256_unpackhi_epi16(res_lo, zero); - const __m256i res_lo_1_shift = - _mm256_slli_epi32(res_lo_1_32b, left_shift); - const __m256i res_lo_1_round = _mm256_sra_epi32( - _mm256_add_epi32(res_lo_1_shift, round_const), round_shift); - - const __m256i res_lo_round = - _mm256_packs_epi32(res_lo_0_round, res_lo_1_round); - - const __m256i res_lo_unsigned = - _mm256_add_epi16(res_lo_round, offset_const_2); - - if (w - j < 16) { - if (do_average) { - const __m256i data_ref_0 = load_line2_avx2( - &dst[i * dst_stride + j], &dst[i * dst_stride + j + dst_stride]); - const __m256i comp_avg_res = - comp_avg(&data_ref_0, &res_lo_unsigned, &wt, use_jnt_comp_avg); - - const __m256i round_result = convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m256i res_8 = _mm256_packus_epi16(round_result, round_result); - const __m128i res_0 = _mm256_castsi256_si128(res_8); - const __m128i res_1 = _mm256_extracti128_si256(res_8, 1); - - if (w - j > 4) { - _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_storel_epi64( - (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1); - } else { - *(uint32_t *)(&dst0[i * dst_stride0 + j]) = - _mm_cvtsi128_si32(res_0); - *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) = - _mm_cvtsi128_si32(res_1); - } - } else { - const __m128i res_0 = _mm256_castsi256_si128(res_lo_unsigned); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0); - - const __m128i res_1 = _mm256_extracti128_si256(res_lo_unsigned, 1); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); - } - } else { - __m256i res_hi = convolve_lowbd(s + 4, coeffs); - - res_hi = _mm256_add_epi16(res_hi, offset_const_1); - - const __m256i res_hi_0_32b = _mm256_unpacklo_epi16(res_hi, zero); - const __m256i res_hi_0_shift = - _mm256_slli_epi32(res_hi_0_32b, left_shift); - const __m256i res_hi_0_round = _mm256_sra_epi32( - _mm256_add_epi32(res_hi_0_shift, round_const), round_shift); - - const __m256i res_hi_1_32b = _mm256_unpackhi_epi16(res_hi, zero); - const __m256i res_hi_1_shift = - _mm256_slli_epi32(res_hi_1_32b, left_shift); - const __m256i res_hi_1_round = _mm256_sra_epi32( - _mm256_add_epi32(res_hi_1_shift, round_const), round_shift); - - const __m256i res_hi_round = - _mm256_packs_epi32(res_hi_0_round, res_hi_1_round); - - const __m256i res_hi_unsigned = - _mm256_add_epi16(res_hi_round, offset_const_2); - - if (do_average) { - const __m256i data_ref_0_lo = load_line2_avx2( - &dst[i * dst_stride + j], &dst[i * dst_stride + j + dst_stride]); - - const __m256i data_ref_0_hi = - load_line2_avx2(&dst[i * dst_stride + j + 8], - &dst[i * dst_stride + j + 8 + dst_stride]); - - const __m256i comp_avg_res_lo = - comp_avg(&data_ref_0_lo, &res_lo_unsigned, &wt, use_jnt_comp_avg); - - const __m256i comp_avg_res_hi = - comp_avg(&data_ref_0_hi, &res_hi_unsigned, &wt, use_jnt_comp_avg); - - const __m256i round_result_lo = convolve_rounding( - &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift); - - const __m256i round_result_hi = convolve_rounding( - &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift); - - const __m256i res_8 = - _mm256_packus_epi16(round_result_lo, round_result_hi); - const __m128i res_0 = _mm256_castsi256_si128(res_8); - const __m128i res_1 = _mm256_extracti128_si256(res_8, 1); - - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_store_si128( - (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1); - - } else { - const __m128i res_lo_0 = _mm256_castsi256_si128(res_lo_unsigned); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_lo_0); - - const __m128i res_lo_1 = _mm256_extracti128_si256(res_lo_unsigned, 1); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_lo_1); - - const __m128i res_hi_0 = _mm256_castsi256_si128(res_hi_unsigned); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + 8]), res_hi_0); - - const __m128i res_hi_1 = _mm256_extracti128_si256(res_hi_unsigned, 1); - _mm_store_si128( - (__m128i *)(&dst[i * dst_stride + j + 8 + dst_stride]), res_hi_1); - } - } - s[0] = s[1]; - s[1] = s[2]; - s[2] = s[3]; - - s[4] = s[5]; - s[5] = s[6]; - s[6] = s[7]; - } - } -} - -void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst0, - int dst_stride0, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - const int bd = 8; - - DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]); - int im_h = h + filter_params_y->taps - 1; - int im_stride = 8; - int i, j; - const int fo_vert = filter_params_y->taps / 2 - 1; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz; - const __m256i wt = unpack_weights_avx2(conv_params); - const int do_average = conv_params->do_average; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - const int offset_0 = - bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset = (1 << offset_0) + (1 << (offset_0 - 1)); - const __m256i offset_const = _mm256_set1_epi16(offset); - const int rounding_shift = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const __m256i rounding_const = _mm256_set1_epi16((1 << rounding_shift) >> 1); - __m256i filt[4], s[8], coeffs_x[4], coeffs_y[4]; - - assert(conv_params->round_0 > 0); - - filt[0] = _mm256_load_si256((__m256i const *)filt_global_avx2); - filt[1] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32)); - filt[2] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 2)); - filt[3] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 3)); - - prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs_x); - prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y); - - const __m256i round_const_h = _mm256_set1_epi16( - ((1 << (conv_params->round_0 - 1)) >> 1) + (1 << (bd + FILTER_BITS - 2))); - const __m128i round_shift_h = _mm_cvtsi32_si128(conv_params->round_0 - 1); - - const __m256i round_const_v = _mm256_set1_epi32( - ((1 << conv_params->round_1) >> 1) - - (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1))); - const __m128i round_shift_v = _mm_cvtsi32_si128(conv_params->round_1); - - for (j = 0; j < w; j += 8) { - /* Horizontal filter */ - { - const uint8_t *src_h = src_ptr + j; - for (i = 0; i < im_h; i += 2) { - __m256i data = - _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)src_h)); - if (i + 1 < im_h) - data = _mm256_inserti128_si256( - data, _mm_loadu_si128((__m128i *)(src_h + src_stride)), 1); - src_h += (src_stride << 1); - __m256i res = convolve_lowbd_x(data, coeffs_x, filt); - - res = _mm256_sra_epi16(_mm256_add_epi16(res, round_const_h), - round_shift_h); - - _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); - } - } - - /* Vertical filter */ - { - __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); - __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); - __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); - __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); - __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); - __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride)); - - s[0] = _mm256_unpacklo_epi16(s0, s1); - s[1] = _mm256_unpacklo_epi16(s2, s3); - s[2] = _mm256_unpacklo_epi16(s4, s5); - - s[4] = _mm256_unpackhi_epi16(s0, s1); - s[5] = _mm256_unpackhi_epi16(s2, s3); - s[6] = _mm256_unpackhi_epi16(s4, s5); - - for (i = 0; i < h; i += 2) { - const int16_t *data = &im_block[i * im_stride]; - - const __m256i s6 = - _mm256_loadu_si256((__m256i *)(data + 6 * im_stride)); - const __m256i s7 = - _mm256_loadu_si256((__m256i *)(data + 7 * im_stride)); - - s[3] = _mm256_unpacklo_epi16(s6, s7); - s[7] = _mm256_unpackhi_epi16(s6, s7); - - const __m256i res_a = convolve(s, coeffs_y); - const __m256i res_a_round = _mm256_sra_epi32( - _mm256_add_epi32(res_a, round_const_v), round_shift_v); - - if (w - j > 4) { - const __m256i res_b = convolve(s + 4, coeffs_y); - const __m256i res_b_round = _mm256_sra_epi32( - _mm256_add_epi32(res_b, round_const_v), round_shift_v); - const __m256i res_16b = _mm256_packs_epi32(res_a_round, res_b_round); - const __m256i res_unsigned = _mm256_add_epi16(res_16b, offset_const); - - if (do_average) { - const __m256i data_ref_0 = - load_line2_avx2(&dst[i * dst_stride + j], - &dst[i * dst_stride + j + dst_stride]); - const __m256i comp_avg_res = - comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg); - - const __m256i round_result = convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m256i res_8 = - _mm256_packus_epi16(round_result, round_result); - const __m128i res_0 = _mm256_castsi256_si128(res_8); - const __m128i res_1 = _mm256_extracti128_si256(res_8, 1); - - _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_storel_epi64( - (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1); - } else { - const __m128i res_0 = _mm256_castsi256_si128(res_unsigned); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0); - - const __m128i res_1 = _mm256_extracti128_si256(res_unsigned, 1); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); - } - } else { - const __m256i res_16b = _mm256_packs_epi32(res_a_round, res_a_round); - const __m256i res_unsigned = _mm256_add_epi16(res_16b, offset_const); - - if (do_average) { - const __m256i data_ref_0 = - load_line2_avx2(&dst[i * dst_stride + j], - &dst[i * dst_stride + j + dst_stride]); - - const __m256i comp_avg_res = - comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg); - - const __m256i round_result = convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m256i res_8 = - _mm256_packus_epi16(round_result, round_result); - const __m128i res_0 = _mm256_castsi256_si128(res_8); - const __m128i res_1 = _mm256_extracti128_si256(res_8, 1); - - *(uint32_t *)(&dst0[i * dst_stride0 + j]) = - _mm_cvtsi128_si32(res_0); - *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) = - _mm_cvtsi128_si32(res_1); - - } else { - const __m128i res_0 = _mm256_castsi256_si128(res_unsigned); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0); - - const __m128i res_1 = _mm256_extracti128_si256(res_unsigned, 1); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); - } - } - - s[0] = s[1]; - s[1] = s[2]; - s[2] = s[3]; - - s[4] = s[5]; - s[5] = s[6]; - s[6] = s[7]; - } - } - } -} - -void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, - uint8_t *dst0, int dst_stride0, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - const int bd = 8; - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - (void)filter_params_x; - (void)filter_params_y; - (void)subpel_x_q4; - (void)subpel_y_q4; - - const int bits = - FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0; - const __m128i left_shift = _mm_cvtsi32_si128(bits); - const int do_average = conv_params->do_average; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - const __m256i wt = unpack_weights_avx2(conv_params); - const __m256i zero = _mm256_setzero_si256(); - - const int offset_0 = - bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset = (1 << offset_0) + (1 << (offset_0 - 1)); - const __m256i offset_const = _mm256_set1_epi16(offset); - const int rounding_shift = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const __m256i rounding_const = _mm256_set1_epi16((1 << rounding_shift) >> 1); - int i, j; - - if (!(w % 16)) { - for (i = 0; i < h; i += 1) { - for (j = 0; j < w; j += 16) { - const __m256i src_16bit = _mm256_cvtepu8_epi16( - _mm_loadu_si128((__m128i *)(&src[i * src_stride + j]))); - - const __m256i res = _mm256_sll_epi16(src_16bit, left_shift); - const __m256i res_unsigned = _mm256_add_epi16(res, offset_const); - - if (do_average) { - const __m256i data_ref_0 = - _mm256_loadu_si256((__m256i *)(&dst[i * dst_stride + j])); - - const __m256i comp_avg_res = - comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg); - - const __m256i round_result = convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m256i res_8 = _mm256_packus_epi16(round_result, round_result); - const __m256i res_0 = _mm256_permute4x64_epi64(res_8, 0xD8); - - _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), - _mm256_castsi256_si128(res_0)); - } else { - _mm256_store_si256((__m256i *)(&dst[i * dst_stride + j]), - res_unsigned); - } - } - } - } else if (!(w % 4)) { - for (i = 0; i < h; i += 2) { - for (j = 0; j < w; j += 8) { - const __m128i src_row_0 = - _mm_loadl_epi64((__m128i *)(&src[i * src_stride + j])); - const __m128i src_row_1 = - _mm_loadl_epi64((__m128i *)(&src[i * src_stride + j + src_stride])); - // since not all compilers yet support _mm256_set_m128i() - const __m256i src_10 = _mm256_insertf128_si256( - _mm256_castsi128_si256(src_row_0), src_row_1, 1); - - const __m256i src_16bit = _mm256_unpacklo_epi8(src_10, zero); - - const __m256i res = _mm256_sll_epi16(src_16bit, left_shift); - - const __m256i res_unsigned = _mm256_add_epi16(res, offset_const); - - // Accumulate values into the destination buffer - if (do_average) { - const __m256i data_ref_0 = load_line2_avx2( - &dst[i * dst_stride + j], &dst[i * dst_stride + j + dst_stride]); - const __m256i comp_avg_res = - comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg); - - const __m256i round_result = convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m256i res_8 = _mm256_packus_epi16(round_result, round_result); - const __m128i res_0 = _mm256_castsi256_si128(res_8); - const __m128i res_1 = _mm256_extracti128_si256(res_8, 1); - - if (w > 4) { - _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0); - _mm_storel_epi64( - (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1); - } else { - *(uint32_t *)(&dst0[i * dst_stride0 + j]) = - _mm_cvtsi128_si32(res_0); - *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) = - _mm_cvtsi128_si32(res_1); - } - } else { - const __m128i res_0 = _mm256_castsi256_si128(res_unsigned); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0); - - const __m128i res_1 = _mm256_extracti128_si256(res_unsigned, 1); - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]), - res_1); - } - } - } - } -} diff --git a/third_party/aom/av1/common/x86/jnt_convolve_sse2.c b/third_party/aom/av1/common/x86/jnt_convolve_sse2.c deleted file mode 100644 index 87dc3242e..000000000 --- a/third_party/aom/av1/common/x86/jnt_convolve_sse2.c +++ /dev/null @@ -1,385 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <emmintrin.h> - -#include "config/aom_dsp_rtcd.h" - -#include "aom_dsp/aom_filter.h" -#include "aom_dsp/x86/convolve_sse2.h" - -void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst0, - int dst_stride0, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - const int bd = 8; - CONV_BUF_TYPE *dst = conv_params->dst; - const int dst_stride = conv_params->dst_stride; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const uint8_t *src_ptr = src - fo_horiz; - const int bits = FILTER_BITS - conv_params->round_1; - const __m128i left_shift = _mm_cvtsi32_si128(bits); - const __m128i round_const = _mm_set1_epi32((1 << conv_params->round_0) >> 1); - const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0); - const int w0 = conv_params->fwd_offset; - const int w1 = conv_params->bck_offset; - const __m128i wt0 = _mm_set1_epi16(w0); - const __m128i wt1 = _mm_set1_epi16(w1); - const __m128i wt = _mm_unpacklo_epi16(wt0, wt1); - const int do_average = conv_params->do_average; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - const int offset_0 = - bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset = (1 << offset_0) + (1 << (offset_0 - 1)); - const __m128i offset_const = _mm_set1_epi16(offset); - const int rounding_shift = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const __m128i rounding_const = _mm_set1_epi16((1 << rounding_shift) >> 1); - __m128i coeffs[4]; - - (void)filter_params_y; - (void)subpel_y_q4; - - prepare_coeffs(filter_params_x, subpel_x_q4, coeffs); - - if (w == 4) { - do { - const __m128i data = _mm_loadu_si128((__m128i *)src_ptr); - __m128i s[4]; - - s[0] = _mm_unpacklo_epi8(data, _mm_srli_si128(data, 1)); - s[1] = - _mm_unpacklo_epi8(_mm_srli_si128(data, 2), _mm_srli_si128(data, 3)); - s[2] = - _mm_unpacklo_epi8(_mm_srli_si128(data, 4), _mm_srli_si128(data, 5)); - s[3] = - _mm_unpacklo_epi8(_mm_srli_si128(data, 6), _mm_srli_si128(data, 7)); - const __m128i res_lo = convolve_lo_x(s, coeffs); - const __m128i res_lo_round = - _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift); - const __m128i res_lo_shift = _mm_sll_epi32(res_lo_round, left_shift); - - const __m128i res_16b = _mm_packs_epi32(res_lo_shift, res_lo_shift); - const __m128i res_unsigned = _mm_add_epi16(res_16b, offset_const); - - // Accumulate values into the destination buffer - if (do_average) { - const __m128i data_ref_0 = _mm_loadu_si128((__m128i *)dst); - - const __m128i comp_avg_res = - comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg); - - const __m128i round_result = convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m128i res_8 = _mm_packus_epi16(round_result, round_result); - *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8); - } else { - _mm_store_si128((__m128i *)(&dst[0]), res_unsigned); - } - src_ptr += src_stride; - dst += dst_stride; - dst0 += dst_stride0; - } while (--h); - } else { - assert(!(w % 8)); - int i = 0; - do { - int j = 0; - do { - const __m128i data = - _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]); - __m128i s[4]; - - // Filter even-index pixels - s[0] = data; - s[1] = _mm_srli_si128(data, 2); - s[2] = _mm_srli_si128(data, 4); - s[3] = _mm_srli_si128(data, 6); - const __m128i res_even = convolve_lo_x(s, coeffs); - - // Filter odd-index pixels - s[0] = _mm_srli_si128(data, 1); - s[1] = _mm_srli_si128(data, 3); - s[2] = _mm_srli_si128(data, 5); - s[3] = _mm_srli_si128(data, 7); - const __m128i res_odd = convolve_lo_x(s, coeffs); - - // Rearrange pixels back into the order 0 ... 7 - const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd); - const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd); - const __m128i res_lo_round = - _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift); - const __m128i res_hi_round = - _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift); - const __m128i res_lo_shift = _mm_sll_epi32(res_lo_round, left_shift); - const __m128i res_hi_shift = _mm_sll_epi32(res_hi_round, left_shift); - - const __m128i res_16b = _mm_packs_epi32(res_lo_shift, res_hi_shift); - const __m128i res_unsigned = _mm_add_epi16(res_16b, offset_const); - - // Accumulate values into the destination buffer - if (do_average) { - const __m128i data_ref_0 = - _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j])); - - const __m128i comp_avg_res = - comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg); - - const __m128i round_result = convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m128i res_8 = _mm_packus_epi16(round_result, round_result); - _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8); - } else { - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned); - } - j += 8; - } while (j < w); - } while (++i < h); - } -} - -void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst0, - int dst_stride0, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - const int bd = 8; - CONV_BUF_TYPE *dst = conv_params->dst; - const int dst_stride = conv_params->dst_stride; - const int fo_vert = filter_params_y->taps / 2 - 1; - const uint8_t *src_ptr = src - fo_vert * src_stride; - const int bits = FILTER_BITS - conv_params->round_0; - const __m128i left_shift = _mm_cvtsi32_si128(bits); - const __m128i wt0 = _mm_set1_epi16(conv_params->fwd_offset); - const __m128i wt1 = _mm_set1_epi16(conv_params->bck_offset); - const __m128i wt = _mm_unpacklo_epi16(wt0, wt1); - const int do_average = conv_params->do_average; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - const int offset_0 = - bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset = (1 << offset_0) + (1 << (offset_0 - 1)); - const __m128i offset_const = _mm_set1_epi16(offset); - const int rounding_shift = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const __m128i rounding_const = _mm_set1_epi16((1 << rounding_shift) >> 1); - const __m128i round_const = _mm_set1_epi32((1 << conv_params->round_1) >> 1); - const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1); - __m128i coeffs[4]; - - (void)filter_params_x; - (void)subpel_x_q4; - - prepare_coeffs(filter_params_y, subpel_y_q4, coeffs); - - if (w == 4) { - __m128i s[8], src6, res, res_shift; - src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 6 * src_stride)); - s[0] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 0 * src_stride)), - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride))); - s[1] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)), - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride))); - s[2] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)), - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride))); - s[3] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)), - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride))); - s[4] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)), - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride))); - s[5] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)), src6); - - do { - s[6] = _mm_unpacklo_epi8( - src6, _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride))); - src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 8 * src_stride)); - s[7] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)), src6); - - res = convolve_lo_y(s + 0, coeffs); - res_shift = _mm_sll_epi32(res, left_shift); - res_shift = - _mm_sra_epi32(_mm_add_epi32(res_shift, round_const), round_shift); - - __m128i res_16b = _mm_packs_epi32(res_shift, res_shift); - __m128i res_unsigned = _mm_add_epi16(res_16b, offset_const); - - // Accumulate values into the destination buffer - if (do_average) { - const __m128i data_ref_0 = _mm_loadu_si128((__m128i *)dst); - - const __m128i comp_avg_res = - comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg); - - const __m128i round_result = convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m128i res_8 = _mm_packus_epi16(round_result, round_result); - *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8); - - } else { - _mm_store_si128((__m128i *)dst, res_unsigned); - } - - src_ptr += src_stride; - dst += dst_stride; - dst0 += dst_stride0; - - res = convolve_lo_y(s + 1, coeffs); - res_shift = _mm_sll_epi32(res, left_shift); - res_shift = - _mm_sra_epi32(_mm_add_epi32(res_shift, round_const), round_shift); - - res_16b = _mm_packs_epi32(res_shift, res_shift); - res_unsigned = _mm_add_epi16(res_16b, offset_const); - - // Accumulate values into the destination buffer - if (do_average) { - const __m128i data_ref_0 = _mm_loadu_si128((__m128i *)dst); - - const __m128i comp_avg_res = - comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg); - - const __m128i round_result = convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m128i res_8 = _mm_packus_epi16(round_result, round_result); - *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8); - - } else { - _mm_store_si128((__m128i *)dst, res_unsigned); - } - - src_ptr += src_stride; - dst += dst_stride; - dst0 += dst_stride0; - - s[0] = s[2]; - s[1] = s[3]; - s[2] = s[4]; - s[3] = s[5]; - s[4] = s[6]; - s[5] = s[7]; - h -= 2; - } while (h); - } else { - assert(!(w % 8)); - int j = 0; - do { - __m128i s[8], src6, res_lo, res_hi, res_lo_shift, res_hi_shift; - const uint8_t *data = &src_ptr[j]; - - src6 = _mm_loadl_epi64((__m128i *)(data + 6 * src_stride)); - s[0] = _mm_unpacklo_epi8( - _mm_loadl_epi64((__m128i *)(data + 0 * src_stride)), - _mm_loadl_epi64((__m128i *)(data + 1 * src_stride))); - s[1] = _mm_unpacklo_epi8( - _mm_loadl_epi64((__m128i *)(data + 1 * src_stride)), - _mm_loadl_epi64((__m128i *)(data + 2 * src_stride))); - s[2] = _mm_unpacklo_epi8( - _mm_loadl_epi64((__m128i *)(data + 2 * src_stride)), - _mm_loadl_epi64((__m128i *)(data + 3 * src_stride))); - s[3] = _mm_unpacklo_epi8( - _mm_loadl_epi64((__m128i *)(data + 3 * src_stride)), - _mm_loadl_epi64((__m128i *)(data + 4 * src_stride))); - s[4] = _mm_unpacklo_epi8( - _mm_loadl_epi64((__m128i *)(data + 4 * src_stride)), - _mm_loadl_epi64((__m128i *)(data + 5 * src_stride))); - s[5] = _mm_unpacklo_epi8( - _mm_loadl_epi64((__m128i *)(data + 5 * src_stride)), src6); - - int i = 0; - do { - data = &src_ptr[i * src_stride + j]; - s[6] = _mm_unpacklo_epi8( - src6, _mm_loadl_epi64((__m128i *)(data + 7 * src_stride))); - src6 = _mm_loadl_epi64((__m128i *)(data + 8 * src_stride)); - s[7] = _mm_unpacklo_epi8( - _mm_loadl_epi64((__m128i *)(data + 7 * src_stride)), src6); - - res_lo = convolve_lo_y(s, coeffs); // Filter low index pixels - res_hi = convolve_hi_y(s, coeffs); // Filter high index pixels - res_lo_shift = _mm_sll_epi32(res_lo, left_shift); - res_hi_shift = _mm_sll_epi32(res_hi, left_shift); - res_lo_shift = _mm_sra_epi32(_mm_add_epi32(res_lo_shift, round_const), - round_shift); - res_hi_shift = _mm_sra_epi32(_mm_add_epi32(res_hi_shift, round_const), - round_shift); - - __m128i res_16b = _mm_packs_epi32(res_lo_shift, res_hi_shift); - __m128i res_unsigned = _mm_add_epi16(res_16b, offset_const); - - // Accumulate values into the destination buffer - if (do_average) { - const __m128i data_ref_0 = - _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j])); - - const __m128i comp_avg_res = - comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg); - - const __m128i round_result = convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m128i res_8 = _mm_packus_epi16(round_result, round_result); - _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8); - } else { - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned); - } - i++; - - res_lo = convolve_lo_y(s + 1, coeffs); // Filter low index pixels - res_hi = convolve_hi_y(s + 1, coeffs); // Filter high index pixels - res_lo_shift = _mm_sll_epi32(res_lo, left_shift); - res_hi_shift = _mm_sll_epi32(res_hi, left_shift); - res_lo_shift = _mm_sra_epi32(_mm_add_epi32(res_lo_shift, round_const), - round_shift); - res_hi_shift = _mm_sra_epi32(_mm_add_epi32(res_hi_shift, round_const), - round_shift); - res_16b = _mm_packs_epi32(res_lo_shift, res_hi_shift); - res_unsigned = _mm_add_epi16(res_16b, offset_const); - - // Accumulate values into the destination buffer - if (do_average) { - __m128i data_ref_0 = - _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j])); - - const __m128i comp_avg_res = - comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg); - - const __m128i round_result = convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m128i res_8 = _mm_packus_epi16(round_result, round_result); - _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8); - } else { - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned); - } - i++; - - s[0] = s[2]; - s[1] = s[3]; - s[2] = s[4]; - s[3] = s[5]; - s[4] = s[6]; - s[5] = s[7]; - } while (i < h); - j += 8; - } while (j < w); - } -} diff --git a/third_party/aom/av1/common/x86/jnt_convolve_ssse3.c b/third_party/aom/av1/common/x86/jnt_convolve_ssse3.c deleted file mode 100644 index 822772782..000000000 --- a/third_party/aom/av1/common/x86/jnt_convolve_ssse3.c +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <tmmintrin.h> - -#include "config/aom_dsp_rtcd.h" - -#include "aom_dsp/aom_filter.h" -#include "aom_dsp/x86/convolve_sse2.h" - -void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, - uint8_t *dst0, int dst_stride0, int w, int h, - const InterpFilterParams *filter_params_x, - const InterpFilterParams *filter_params_y, - const int subpel_x_q4, const int subpel_y_q4, - ConvolveParams *conv_params) { - CONV_BUF_TYPE *dst = conv_params->dst; - int dst_stride = conv_params->dst_stride; - const int bd = 8; - - DECLARE_ALIGNED(16, int16_t, - im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]); - int im_h = h + filter_params_y->taps - 1; - int im_stride = MAX_SB_SIZE; - int i, j; - const int fo_vert = filter_params_y->taps / 2 - 1; - const int fo_horiz = filter_params_x->taps / 2 - 1; - const int do_average = conv_params->do_average; - const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg; - const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz; - - const __m128i zero = _mm_setzero_si128(); - - const int w0 = conv_params->fwd_offset; - const int w1 = conv_params->bck_offset; - const __m128i wt0 = _mm_set1_epi16(w0); - const __m128i wt1 = _mm_set1_epi16(w1); - const __m128i wt = _mm_unpacklo_epi16(wt0, wt1); - - const int offset_0 = - bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset = (1 << offset_0) + (1 << (offset_0 - 1)); - const __m128i offset_const = _mm_set1_epi16(offset); - const int rounding_shift = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const __m128i rounding_const = _mm_set1_epi16((1 << rounding_shift) >> 1); - - /* Horizontal filter */ - { - const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( - filter_params_x, subpel_x_q4 & SUBPEL_MASK); - const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter); - - // coeffs 0 1 0 1 2 3 2 3 - const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x); - // coeffs 4 5 4 5 6 7 6 7 - const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x); - - // coeffs 0 1 0 1 0 1 0 1 - const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0); - // coeffs 2 3 2 3 2 3 2 3 - const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0); - // coeffs 4 5 4 5 4 5 4 5 - const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1); - // coeffs 6 7 6 7 6 7 6 7 - const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1); - - const __m128i round_const = _mm_set1_epi32( - ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1))); - const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0); - - for (i = 0; i < im_h; ++i) { - for (j = 0; j < w; j += 8) { - const __m128i data = - _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]); - - const __m128i src_lo = _mm_unpacklo_epi8(data, zero); - const __m128i src_hi = _mm_unpackhi_epi8(data, zero); - - // Filter even-index pixels - const __m128i res_0 = _mm_madd_epi16(src_lo, coeff_01); - const __m128i src_2 = _mm_alignr_epi8(src_hi, src_lo, 4); - const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23); - const __m128i src_4 = _mm_alignr_epi8(src_hi, src_lo, 8); - const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); - const __m128i src_6 = _mm_alignr_epi8(src_hi, src_lo, 12); - const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67); - - __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4), - _mm_add_epi32(res_2, res_6)); - res_even = - _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift); - - // Filter odd-index pixels - const __m128i src_1 = _mm_alignr_epi8(src_hi, src_lo, 2); - const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); - const __m128i src_3 = _mm_alignr_epi8(src_hi, src_lo, 6); - const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23); - const __m128i src_5 = _mm_alignr_epi8(src_hi, src_lo, 10); - const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45); - const __m128i src_7 = _mm_alignr_epi8(src_hi, src_lo, 14); - const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67); - - __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5), - _mm_add_epi32(res_3, res_7)); - res_odd = - _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift); - - // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7 - __m128i res = _mm_packs_epi32(res_even, res_odd); - _mm_store_si128((__m128i *)&im_block[i * im_stride + j], res); - } - } - } - - /* Vertical filter */ - { - const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( - filter_params_y, subpel_y_q4 & SUBPEL_MASK); - const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter); - - // coeffs 0 1 0 1 2 3 2 3 - const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y); - // coeffs 4 5 4 5 6 7 6 7 - const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y); - - // coeffs 0 1 0 1 0 1 0 1 - const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0); - // coeffs 2 3 2 3 2 3 2 3 - const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0); - // coeffs 4 5 4 5 4 5 4 5 - const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1); - // coeffs 6 7 6 7 6 7 6 7 - const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1); - - const __m128i round_const = _mm_set1_epi32( - ((1 << conv_params->round_1) >> 1) - - (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1))); - const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1); - - for (i = 0; i < h; ++i) { - for (j = 0; j < w; j += 8) { - // Filter even-index pixels - const int16_t *data = &im_block[i * im_stride + j]; - const __m128i src_0 = - _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride), - *(__m128i *)(data + 1 * im_stride)); - const __m128i src_2 = - _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride), - *(__m128i *)(data + 3 * im_stride)); - const __m128i src_4 = - _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride), - *(__m128i *)(data + 5 * im_stride)); - const __m128i src_6 = - _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride), - *(__m128i *)(data + 7 * im_stride)); - - const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01); - const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23); - const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); - const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67); - - const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2), - _mm_add_epi32(res_4, res_6)); - - // Filter odd-index pixels - const __m128i src_1 = - _mm_unpackhi_epi16(*(__m128i *)(data + 0 * im_stride), - *(__m128i *)(data + 1 * im_stride)); - const __m128i src_3 = - _mm_unpackhi_epi16(*(__m128i *)(data + 2 * im_stride), - *(__m128i *)(data + 3 * im_stride)); - const __m128i src_5 = - _mm_unpackhi_epi16(*(__m128i *)(data + 4 * im_stride), - *(__m128i *)(data + 5 * im_stride)); - const __m128i src_7 = - _mm_unpackhi_epi16(*(__m128i *)(data + 6 * im_stride), - *(__m128i *)(data + 7 * im_stride)); - - const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); - const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23); - const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45); - const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67); - - const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3), - _mm_add_epi32(res_5, res_7)); - - // Rearrange pixels back into the order 0 ... 7 - const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd); - const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd); - - const __m128i res_lo_round = - _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift); - const __m128i res_hi_round = - _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift); - - const __m128i res_16b = _mm_packs_epi32(res_lo_round, res_hi_round); - const __m128i res_unsigned = _mm_add_epi16(res_16b, offset_const); - - // Accumulate values into the destination buffer - if (do_average) { - const __m128i data_ref_0 = - _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j])); - - const __m128i comp_avg_res = - comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg); - - const __m128i round_result = convolve_rounding( - &comp_avg_res, &offset_const, &rounding_const, rounding_shift); - - const __m128i res_8 = _mm_packus_epi16(round_result, round_result); - - if (w > 4) - _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8); - else - *(uint32_t *)(&dst0[i * dst_stride0 + j]) = - _mm_cvtsi128_si32(res_8); - } else { - _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned); - } - } - } - } -} diff --git a/third_party/aom/av1/common/x86/reconinter_avx2.c b/third_party/aom/av1/common/x86/reconinter_avx2.c deleted file mode 100644 index f645e0454..000000000 --- a/third_party/aom/av1/common/x86/reconinter_avx2.c +++ /dev/null @@ -1,620 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <immintrin.h> - -#include "config/av1_rtcd.h" - -#include "aom/aom_integer.h" -#include "aom_dsp/blend.h" -#include "aom_dsp/x86/synonyms.h" -#include "aom_dsp/x86/synonyms_avx2.h" -#include "av1/common/blockd.h" - -static INLINE __m256i calc_mask_avx2(const __m256i mask_base, const __m256i s0, - const __m256i s1) { - const __m256i diff = _mm256_abs_epi16(_mm256_sub_epi16(s0, s1)); - return _mm256_abs_epi16( - _mm256_add_epi16(mask_base, _mm256_srli_epi16(diff, 4))); - // clamp(diff, 0, 64) can be skiped for diff is always in the range ( 38, 54) -} -void av1_build_compound_diffwtd_mask_avx2(uint8_t *mask, - DIFFWTD_MASK_TYPE mask_type, - const uint8_t *src0, int stride0, - const uint8_t *src1, int stride1, - int h, int w) { - const int mb = (mask_type == DIFFWTD_38_INV) ? AOM_BLEND_A64_MAX_ALPHA : 0; - const __m256i y_mask_base = _mm256_set1_epi16(38 - mb); - int i = 0; - if (4 == w) { - do { - const __m128i s0A = xx_loadl_32(src0); - const __m128i s0B = xx_loadl_32(src0 + stride0); - const __m128i s0C = xx_loadl_32(src0 + stride0 * 2); - const __m128i s0D = xx_loadl_32(src0 + stride0 * 3); - const __m128i s0AB = _mm_unpacklo_epi32(s0A, s0B); - const __m128i s0CD = _mm_unpacklo_epi32(s0C, s0D); - const __m128i s0ABCD = _mm_unpacklo_epi64(s0AB, s0CD); - const __m256i s0ABCD_w = _mm256_cvtepu8_epi16(s0ABCD); - - const __m128i s1A = xx_loadl_32(src1); - const __m128i s1B = xx_loadl_32(src1 + stride1); - const __m128i s1C = xx_loadl_32(src1 + stride1 * 2); - const __m128i s1D = xx_loadl_32(src1 + stride1 * 3); - const __m128i s1AB = _mm_unpacklo_epi32(s1A, s1B); - const __m128i s1CD = _mm_unpacklo_epi32(s1C, s1D); - const __m128i s1ABCD = _mm_unpacklo_epi64(s1AB, s1CD); - const __m256i s1ABCD_w = _mm256_cvtepu8_epi16(s1ABCD); - const __m256i m16 = calc_mask_avx2(y_mask_base, s0ABCD_w, s1ABCD_w); - const __m256i m8 = _mm256_packus_epi16(m16, _mm256_setzero_si256()); - const __m128i x_m8 = - _mm256_castsi256_si128(_mm256_permute4x64_epi64(m8, 0xd8)); - xx_storeu_128(mask, x_m8); - src0 += (stride0 << 2); - src1 += (stride1 << 2); - mask += 16; - i += 4; - } while (i < h); - } else if (8 == w) { - do { - const __m128i s0A = xx_loadl_64(src0); - const __m128i s0B = xx_loadl_64(src0 + stride0); - const __m128i s0C = xx_loadl_64(src0 + stride0 * 2); - const __m128i s0D = xx_loadl_64(src0 + stride0 * 3); - const __m256i s0AC_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s0A, s0C)); - const __m256i s0BD_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s0B, s0D)); - const __m128i s1A = xx_loadl_64(src1); - const __m128i s1B = xx_loadl_64(src1 + stride1); - const __m128i s1C = xx_loadl_64(src1 + stride1 * 2); - const __m128i s1D = xx_loadl_64(src1 + stride1 * 3); - const __m256i s1AB_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s1A, s1C)); - const __m256i s1CD_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s1B, s1D)); - const __m256i m16AC = calc_mask_avx2(y_mask_base, s0AC_w, s1AB_w); - const __m256i m16BD = calc_mask_avx2(y_mask_base, s0BD_w, s1CD_w); - const __m256i m8 = _mm256_packus_epi16(m16AC, m16BD); - yy_storeu_256(mask, m8); - src0 += stride0 << 2; - src1 += stride1 << 2; - mask += 32; - i += 4; - } while (i < h); - } else if (16 == w) { - do { - const __m128i s0A = xx_load_128(src0); - const __m128i s0B = xx_load_128(src0 + stride0); - const __m128i s1A = xx_load_128(src1); - const __m128i s1B = xx_load_128(src1 + stride1); - const __m256i s0AL = _mm256_cvtepu8_epi16(s0A); - const __m256i s0BL = _mm256_cvtepu8_epi16(s0B); - const __m256i s1AL = _mm256_cvtepu8_epi16(s1A); - const __m256i s1BL = _mm256_cvtepu8_epi16(s1B); - - const __m256i m16AL = calc_mask_avx2(y_mask_base, s0AL, s1AL); - const __m256i m16BL = calc_mask_avx2(y_mask_base, s0BL, s1BL); - - const __m256i m8 = - _mm256_permute4x64_epi64(_mm256_packus_epi16(m16AL, m16BL), 0xd8); - yy_storeu_256(mask, m8); - src0 += stride0 << 1; - src1 += stride1 << 1; - mask += 32; - i += 2; - } while (i < h); - } else { - do { - int j = 0; - do { - const __m256i s0 = yy_loadu_256(src0 + j); - const __m256i s1 = yy_loadu_256(src1 + j); - const __m256i s0L = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(s0)); - const __m256i s1L = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(s1)); - const __m256i s0H = - _mm256_cvtepu8_epi16(_mm256_extracti128_si256(s0, 1)); - const __m256i s1H = - _mm256_cvtepu8_epi16(_mm256_extracti128_si256(s1, 1)); - const __m256i m16L = calc_mask_avx2(y_mask_base, s0L, s1L); - const __m256i m16H = calc_mask_avx2(y_mask_base, s0H, s1H); - const __m256i m8 = - _mm256_permute4x64_epi64(_mm256_packus_epi16(m16L, m16H), 0xd8); - yy_storeu_256(mask + j, m8); - j += 32; - } while (j < w); - src0 += stride0; - src1 += stride1; - mask += w; - i += 1; - } while (i < h); - } -} - -static INLINE __m256i calc_mask_d16_avx2(const __m256i *data_src0, - const __m256i *data_src1, - const __m256i *round_const, - const __m256i *mask_base_16, - const __m256i *clip_diff, int round) { - const __m256i diffa = _mm256_subs_epu16(*data_src0, *data_src1); - const __m256i diffb = _mm256_subs_epu16(*data_src1, *data_src0); - const __m256i diff = _mm256_max_epu16(diffa, diffb); - const __m256i diff_round = - _mm256_srli_epi16(_mm256_adds_epu16(diff, *round_const), round); - const __m256i diff_factor = _mm256_srli_epi16(diff_round, DIFF_FACTOR_LOG2); - const __m256i diff_mask = _mm256_adds_epi16(diff_factor, *mask_base_16); - const __m256i diff_clamp = _mm256_min_epi16(diff_mask, *clip_diff); - return diff_clamp; -} - -static INLINE __m256i calc_mask_d16_inv_avx2(const __m256i *data_src0, - const __m256i *data_src1, - const __m256i *round_const, - const __m256i *mask_base_16, - const __m256i *clip_diff, - int round) { - const __m256i diffa = _mm256_subs_epu16(*data_src0, *data_src1); - const __m256i diffb = _mm256_subs_epu16(*data_src1, *data_src0); - const __m256i diff = _mm256_max_epu16(diffa, diffb); - const __m256i diff_round = - _mm256_srli_epi16(_mm256_adds_epu16(diff, *round_const), round); - const __m256i diff_factor = _mm256_srli_epi16(diff_round, DIFF_FACTOR_LOG2); - const __m256i diff_mask = _mm256_adds_epi16(diff_factor, *mask_base_16); - const __m256i diff_clamp = _mm256_min_epi16(diff_mask, *clip_diff); - const __m256i diff_const_16 = _mm256_sub_epi16(*clip_diff, diff_clamp); - return diff_const_16; -} - -static INLINE void build_compound_diffwtd_mask_d16_avx2( - uint8_t *mask, const CONV_BUF_TYPE *src0, int src0_stride, - const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, int shift) { - const int mask_base = 38; - const __m256i _r = _mm256_set1_epi16((1 << shift) >> 1); - const __m256i y38 = _mm256_set1_epi16(mask_base); - const __m256i y64 = _mm256_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); - int i = 0; - if (w == 4) { - do { - const __m128i s0A = xx_loadl_64(src0); - const __m128i s0B = xx_loadl_64(src0 + src0_stride); - const __m128i s0C = xx_loadl_64(src0 + src0_stride * 2); - const __m128i s0D = xx_loadl_64(src0 + src0_stride * 3); - const __m128i s1A = xx_loadl_64(src1); - const __m128i s1B = xx_loadl_64(src1 + src1_stride); - const __m128i s1C = xx_loadl_64(src1 + src1_stride * 2); - const __m128i s1D = xx_loadl_64(src1 + src1_stride * 3); - const __m256i s0 = yy_set_m128i(_mm_unpacklo_epi64(s0C, s0D), - _mm_unpacklo_epi64(s0A, s0B)); - const __m256i s1 = yy_set_m128i(_mm_unpacklo_epi64(s1C, s1D), - _mm_unpacklo_epi64(s1A, s1B)); - const __m256i m16 = calc_mask_d16_avx2(&s0, &s1, &_r, &y38, &y64, shift); - const __m256i m8 = _mm256_packus_epi16(m16, _mm256_setzero_si256()); - xx_storeu_128(mask, - _mm256_castsi256_si128(_mm256_permute4x64_epi64(m8, 0xd8))); - src0 += src0_stride << 2; - src1 += src1_stride << 2; - mask += 16; - i += 4; - } while (i < h); - } else if (w == 8) { - do { - const __m256i s0AB = yy_loadu2_128(src0 + src0_stride, src0); - const __m256i s0CD = - yy_loadu2_128(src0 + src0_stride * 3, src0 + src0_stride * 2); - const __m256i s1AB = yy_loadu2_128(src1 + src1_stride, src1); - const __m256i s1CD = - yy_loadu2_128(src1 + src1_stride * 3, src1 + src1_stride * 2); - const __m256i m16AB = - calc_mask_d16_avx2(&s0AB, &s1AB, &_r, &y38, &y64, shift); - const __m256i m16CD = - calc_mask_d16_avx2(&s0CD, &s1CD, &_r, &y38, &y64, shift); - const __m256i m8 = _mm256_packus_epi16(m16AB, m16CD); - yy_storeu_256(mask, _mm256_permute4x64_epi64(m8, 0xd8)); - src0 += src0_stride << 2; - src1 += src1_stride << 2; - mask += 32; - i += 4; - } while (i < h); - } else if (w == 16) { - do { - const __m256i s0A = yy_loadu_256(src0); - const __m256i s0B = yy_loadu_256(src0 + src0_stride); - const __m256i s1A = yy_loadu_256(src1); - const __m256i s1B = yy_loadu_256(src1 + src1_stride); - const __m256i m16A = - calc_mask_d16_avx2(&s0A, &s1A, &_r, &y38, &y64, shift); - const __m256i m16B = - calc_mask_d16_avx2(&s0B, &s1B, &_r, &y38, &y64, shift); - const __m256i m8 = _mm256_packus_epi16(m16A, m16B); - yy_storeu_256(mask, _mm256_permute4x64_epi64(m8, 0xd8)); - src0 += src0_stride << 1; - src1 += src1_stride << 1; - mask += 32; - i += 2; - } while (i < h); - } else if (w == 32) { - do { - const __m256i s0A = yy_loadu_256(src0); - const __m256i s0B = yy_loadu_256(src0 + 16); - const __m256i s1A = yy_loadu_256(src1); - const __m256i s1B = yy_loadu_256(src1 + 16); - const __m256i m16A = - calc_mask_d16_avx2(&s0A, &s1A, &_r, &y38, &y64, shift); - const __m256i m16B = - calc_mask_d16_avx2(&s0B, &s1B, &_r, &y38, &y64, shift); - const __m256i m8 = _mm256_packus_epi16(m16A, m16B); - yy_storeu_256(mask, _mm256_permute4x64_epi64(m8, 0xd8)); - src0 += src0_stride; - src1 += src1_stride; - mask += 32; - i += 1; - } while (i < h); - } else if (w == 64) { - do { - const __m256i s0A = yy_loadu_256(src0); - const __m256i s0B = yy_loadu_256(src0 + 16); - const __m256i s0C = yy_loadu_256(src0 + 32); - const __m256i s0D = yy_loadu_256(src0 + 48); - const __m256i s1A = yy_loadu_256(src1); - const __m256i s1B = yy_loadu_256(src1 + 16); - const __m256i s1C = yy_loadu_256(src1 + 32); - const __m256i s1D = yy_loadu_256(src1 + 48); - const __m256i m16A = - calc_mask_d16_avx2(&s0A, &s1A, &_r, &y38, &y64, shift); - const __m256i m16B = - calc_mask_d16_avx2(&s0B, &s1B, &_r, &y38, &y64, shift); - const __m256i m16C = - calc_mask_d16_avx2(&s0C, &s1C, &_r, &y38, &y64, shift); - const __m256i m16D = - calc_mask_d16_avx2(&s0D, &s1D, &_r, &y38, &y64, shift); - const __m256i m8AB = _mm256_packus_epi16(m16A, m16B); - const __m256i m8CD = _mm256_packus_epi16(m16C, m16D); - yy_storeu_256(mask, _mm256_permute4x64_epi64(m8AB, 0xd8)); - yy_storeu_256(mask + 32, _mm256_permute4x64_epi64(m8CD, 0xd8)); - src0 += src0_stride; - src1 += src1_stride; - mask += 64; - i += 1; - } while (i < h); - } else { - do { - const __m256i s0A = yy_loadu_256(src0); - const __m256i s0B = yy_loadu_256(src0 + 16); - const __m256i s0C = yy_loadu_256(src0 + 32); - const __m256i s0D = yy_loadu_256(src0 + 48); - const __m256i s0E = yy_loadu_256(src0 + 64); - const __m256i s0F = yy_loadu_256(src0 + 80); - const __m256i s0G = yy_loadu_256(src0 + 96); - const __m256i s0H = yy_loadu_256(src0 + 112); - const __m256i s1A = yy_loadu_256(src1); - const __m256i s1B = yy_loadu_256(src1 + 16); - const __m256i s1C = yy_loadu_256(src1 + 32); - const __m256i s1D = yy_loadu_256(src1 + 48); - const __m256i s1E = yy_loadu_256(src1 + 64); - const __m256i s1F = yy_loadu_256(src1 + 80); - const __m256i s1G = yy_loadu_256(src1 + 96); - const __m256i s1H = yy_loadu_256(src1 + 112); - const __m256i m16A = - calc_mask_d16_avx2(&s0A, &s1A, &_r, &y38, &y64, shift); - const __m256i m16B = - calc_mask_d16_avx2(&s0B, &s1B, &_r, &y38, &y64, shift); - const __m256i m16C = - calc_mask_d16_avx2(&s0C, &s1C, &_r, &y38, &y64, shift); - const __m256i m16D = - calc_mask_d16_avx2(&s0D, &s1D, &_r, &y38, &y64, shift); - const __m256i m16E = - calc_mask_d16_avx2(&s0E, &s1E, &_r, &y38, &y64, shift); - const __m256i m16F = - calc_mask_d16_avx2(&s0F, &s1F, &_r, &y38, &y64, shift); - const __m256i m16G = - calc_mask_d16_avx2(&s0G, &s1G, &_r, &y38, &y64, shift); - const __m256i m16H = - calc_mask_d16_avx2(&s0H, &s1H, &_r, &y38, &y64, shift); - const __m256i m8AB = _mm256_packus_epi16(m16A, m16B); - const __m256i m8CD = _mm256_packus_epi16(m16C, m16D); - const __m256i m8EF = _mm256_packus_epi16(m16E, m16F); - const __m256i m8GH = _mm256_packus_epi16(m16G, m16H); - yy_storeu_256(mask, _mm256_permute4x64_epi64(m8AB, 0xd8)); - yy_storeu_256(mask + 32, _mm256_permute4x64_epi64(m8CD, 0xd8)); - yy_storeu_256(mask + 64, _mm256_permute4x64_epi64(m8EF, 0xd8)); - yy_storeu_256(mask + 96, _mm256_permute4x64_epi64(m8GH, 0xd8)); - src0 += src0_stride; - src1 += src1_stride; - mask += 128; - i += 1; - } while (i < h); - } -} - -static INLINE void build_compound_diffwtd_mask_d16_inv_avx2( - uint8_t *mask, const CONV_BUF_TYPE *src0, int src0_stride, - const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, int shift) { - const int mask_base = 38; - const __m256i _r = _mm256_set1_epi16((1 << shift) >> 1); - const __m256i y38 = _mm256_set1_epi16(mask_base); - const __m256i y64 = _mm256_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); - int i = 0; - if (w == 4) { - do { - const __m128i s0A = xx_loadl_64(src0); - const __m128i s0B = xx_loadl_64(src0 + src0_stride); - const __m128i s0C = xx_loadl_64(src0 + src0_stride * 2); - const __m128i s0D = xx_loadl_64(src0 + src0_stride * 3); - const __m128i s1A = xx_loadl_64(src1); - const __m128i s1B = xx_loadl_64(src1 + src1_stride); - const __m128i s1C = xx_loadl_64(src1 + src1_stride * 2); - const __m128i s1D = xx_loadl_64(src1 + src1_stride * 3); - const __m256i s0 = yy_set_m128i(_mm_unpacklo_epi64(s0C, s0D), - _mm_unpacklo_epi64(s0A, s0B)); - const __m256i s1 = yy_set_m128i(_mm_unpacklo_epi64(s1C, s1D), - _mm_unpacklo_epi64(s1A, s1B)); - const __m256i m16 = - calc_mask_d16_inv_avx2(&s0, &s1, &_r, &y38, &y64, shift); - const __m256i m8 = _mm256_packus_epi16(m16, _mm256_setzero_si256()); - xx_storeu_128(mask, - _mm256_castsi256_si128(_mm256_permute4x64_epi64(m8, 0xd8))); - src0 += src0_stride << 2; - src1 += src1_stride << 2; - mask += 16; - i += 4; - } while (i < h); - } else if (w == 8) { - do { - const __m256i s0AB = yy_loadu2_128(src0 + src0_stride, src0); - const __m256i s0CD = - yy_loadu2_128(src0 + src0_stride * 3, src0 + src0_stride * 2); - const __m256i s1AB = yy_loadu2_128(src1 + src1_stride, src1); - const __m256i s1CD = - yy_loadu2_128(src1 + src1_stride * 3, src1 + src1_stride * 2); - const __m256i m16AB = - calc_mask_d16_inv_avx2(&s0AB, &s1AB, &_r, &y38, &y64, shift); - const __m256i m16CD = - calc_mask_d16_inv_avx2(&s0CD, &s1CD, &_r, &y38, &y64, shift); - const __m256i m8 = _mm256_packus_epi16(m16AB, m16CD); - yy_storeu_256(mask, _mm256_permute4x64_epi64(m8, 0xd8)); - src0 += src0_stride << 2; - src1 += src1_stride << 2; - mask += 32; - i += 4; - } while (i < h); - } else if (w == 16) { - do { - const __m256i s0A = yy_loadu_256(src0); - const __m256i s0B = yy_loadu_256(src0 + src0_stride); - const __m256i s1A = yy_loadu_256(src1); - const __m256i s1B = yy_loadu_256(src1 + src1_stride); - const __m256i m16A = - calc_mask_d16_inv_avx2(&s0A, &s1A, &_r, &y38, &y64, shift); - const __m256i m16B = - calc_mask_d16_inv_avx2(&s0B, &s1B, &_r, &y38, &y64, shift); - const __m256i m8 = _mm256_packus_epi16(m16A, m16B); - yy_storeu_256(mask, _mm256_permute4x64_epi64(m8, 0xd8)); - src0 += src0_stride << 1; - src1 += src1_stride << 1; - mask += 32; - i += 2; - } while (i < h); - } else if (w == 32) { - do { - const __m256i s0A = yy_loadu_256(src0); - const __m256i s0B = yy_loadu_256(src0 + 16); - const __m256i s1A = yy_loadu_256(src1); - const __m256i s1B = yy_loadu_256(src1 + 16); - const __m256i m16A = - calc_mask_d16_inv_avx2(&s0A, &s1A, &_r, &y38, &y64, shift); - const __m256i m16B = - calc_mask_d16_inv_avx2(&s0B, &s1B, &_r, &y38, &y64, shift); - const __m256i m8 = _mm256_packus_epi16(m16A, m16B); - yy_storeu_256(mask, _mm256_permute4x64_epi64(m8, 0xd8)); - src0 += src0_stride; - src1 += src1_stride; - mask += 32; - i += 1; - } while (i < h); - } else if (w == 64) { - do { - const __m256i s0A = yy_loadu_256(src0); - const __m256i s0B = yy_loadu_256(src0 + 16); - const __m256i s0C = yy_loadu_256(src0 + 32); - const __m256i s0D = yy_loadu_256(src0 + 48); - const __m256i s1A = yy_loadu_256(src1); - const __m256i s1B = yy_loadu_256(src1 + 16); - const __m256i s1C = yy_loadu_256(src1 + 32); - const __m256i s1D = yy_loadu_256(src1 + 48); - const __m256i m16A = - calc_mask_d16_inv_avx2(&s0A, &s1A, &_r, &y38, &y64, shift); - const __m256i m16B = - calc_mask_d16_inv_avx2(&s0B, &s1B, &_r, &y38, &y64, shift); - const __m256i m16C = - calc_mask_d16_inv_avx2(&s0C, &s1C, &_r, &y38, &y64, shift); - const __m256i m16D = - calc_mask_d16_inv_avx2(&s0D, &s1D, &_r, &y38, &y64, shift); - const __m256i m8AB = _mm256_packus_epi16(m16A, m16B); - const __m256i m8CD = _mm256_packus_epi16(m16C, m16D); - yy_storeu_256(mask, _mm256_permute4x64_epi64(m8AB, 0xd8)); - yy_storeu_256(mask + 32, _mm256_permute4x64_epi64(m8CD, 0xd8)); - src0 += src0_stride; - src1 += src1_stride; - mask += 64; - i += 1; - } while (i < h); - } else { - do { - const __m256i s0A = yy_loadu_256(src0); - const __m256i s0B = yy_loadu_256(src0 + 16); - const __m256i s0C = yy_loadu_256(src0 + 32); - const __m256i s0D = yy_loadu_256(src0 + 48); - const __m256i s0E = yy_loadu_256(src0 + 64); - const __m256i s0F = yy_loadu_256(src0 + 80); - const __m256i s0G = yy_loadu_256(src0 + 96); - const __m256i s0H = yy_loadu_256(src0 + 112); - const __m256i s1A = yy_loadu_256(src1); - const __m256i s1B = yy_loadu_256(src1 + 16); - const __m256i s1C = yy_loadu_256(src1 + 32); - const __m256i s1D = yy_loadu_256(src1 + 48); - const __m256i s1E = yy_loadu_256(src1 + 64); - const __m256i s1F = yy_loadu_256(src1 + 80); - const __m256i s1G = yy_loadu_256(src1 + 96); - const __m256i s1H = yy_loadu_256(src1 + 112); - const __m256i m16A = - calc_mask_d16_inv_avx2(&s0A, &s1A, &_r, &y38, &y64, shift); - const __m256i m16B = - calc_mask_d16_inv_avx2(&s0B, &s1B, &_r, &y38, &y64, shift); - const __m256i m16C = - calc_mask_d16_inv_avx2(&s0C, &s1C, &_r, &y38, &y64, shift); - const __m256i m16D = - calc_mask_d16_inv_avx2(&s0D, &s1D, &_r, &y38, &y64, shift); - const __m256i m16E = - calc_mask_d16_inv_avx2(&s0E, &s1E, &_r, &y38, &y64, shift); - const __m256i m16F = - calc_mask_d16_inv_avx2(&s0F, &s1F, &_r, &y38, &y64, shift); - const __m256i m16G = - calc_mask_d16_inv_avx2(&s0G, &s1G, &_r, &y38, &y64, shift); - const __m256i m16H = - calc_mask_d16_inv_avx2(&s0H, &s1H, &_r, &y38, &y64, shift); - const __m256i m8AB = _mm256_packus_epi16(m16A, m16B); - const __m256i m8CD = _mm256_packus_epi16(m16C, m16D); - const __m256i m8EF = _mm256_packus_epi16(m16E, m16F); - const __m256i m8GH = _mm256_packus_epi16(m16G, m16H); - yy_storeu_256(mask, _mm256_permute4x64_epi64(m8AB, 0xd8)); - yy_storeu_256(mask + 32, _mm256_permute4x64_epi64(m8CD, 0xd8)); - yy_storeu_256(mask + 64, _mm256_permute4x64_epi64(m8EF, 0xd8)); - yy_storeu_256(mask + 96, _mm256_permute4x64_epi64(m8GH, 0xd8)); - src0 += src0_stride; - src1 += src1_stride; - mask += 128; - i += 1; - } while (i < h); - } -} - -void av1_build_compound_diffwtd_mask_d16_avx2( - uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, - int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, - ConvolveParams *conv_params, int bd) { - const int shift = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8); - // When rounding constant is added, there is a possibility of overflow. - // However that much precision is not required. Code should very well work for - // other values of DIFF_FACTOR_LOG2 and AOM_BLEND_A64_MAX_ALPHA as well. But - // there is a possibility of corner case bugs. - assert(DIFF_FACTOR_LOG2 == 4); - assert(AOM_BLEND_A64_MAX_ALPHA == 64); - - if (mask_type == DIFFWTD_38) { - build_compound_diffwtd_mask_d16_avx2(mask, src0, src0_stride, src1, - src1_stride, h, w, shift); - } else { - build_compound_diffwtd_mask_d16_inv_avx2(mask, src0, src0_stride, src1, - src1_stride, h, w, shift); - } -} - -void av1_build_compound_diffwtd_mask_highbd_avx2( - uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, - int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, - int bd) { - if (w < 16) { - av1_build_compound_diffwtd_mask_highbd_ssse3( - mask, mask_type, src0, src0_stride, src1, src1_stride, h, w, bd); - } else { - assert(mask_type == DIFFWTD_38 || mask_type == DIFFWTD_38_INV); - assert(bd >= 8); - assert((w % 16) == 0); - const __m256i y0 = _mm256_setzero_si256(); - const __m256i yAOM_BLEND_A64_MAX_ALPHA = - _mm256_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); - const int mask_base = 38; - const __m256i ymask_base = _mm256_set1_epi16(mask_base); - const uint16_t *ssrc0 = CONVERT_TO_SHORTPTR(src0); - const uint16_t *ssrc1 = CONVERT_TO_SHORTPTR(src1); - if (bd == 8) { - if (mask_type == DIFFWTD_38_INV) { - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; j += 16) { - __m256i s0 = _mm256_loadu_si256((const __m256i *)&ssrc0[j]); - __m256i s1 = _mm256_loadu_si256((const __m256i *)&ssrc1[j]); - __m256i diff = _mm256_srai_epi16( - _mm256_abs_epi16(_mm256_sub_epi16(s0, s1)), DIFF_FACTOR_LOG2); - __m256i m = _mm256_min_epi16( - _mm256_max_epi16(y0, _mm256_add_epi16(diff, ymask_base)), - yAOM_BLEND_A64_MAX_ALPHA); - m = _mm256_sub_epi16(yAOM_BLEND_A64_MAX_ALPHA, m); - m = _mm256_packus_epi16(m, m); - m = _mm256_permute4x64_epi64(m, _MM_SHUFFLE(0, 0, 2, 0)); - __m128i m0 = _mm256_castsi256_si128(m); - _mm_storeu_si128((__m128i *)&mask[j], m0); - } - ssrc0 += src0_stride; - ssrc1 += src1_stride; - mask += w; - } - } else { - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; j += 16) { - __m256i s0 = _mm256_loadu_si256((const __m256i *)&ssrc0[j]); - __m256i s1 = _mm256_loadu_si256((const __m256i *)&ssrc1[j]); - __m256i diff = _mm256_srai_epi16( - _mm256_abs_epi16(_mm256_sub_epi16(s0, s1)), DIFF_FACTOR_LOG2); - __m256i m = _mm256_min_epi16( - _mm256_max_epi16(y0, _mm256_add_epi16(diff, ymask_base)), - yAOM_BLEND_A64_MAX_ALPHA); - m = _mm256_packus_epi16(m, m); - m = _mm256_permute4x64_epi64(m, _MM_SHUFFLE(0, 0, 2, 0)); - __m128i m0 = _mm256_castsi256_si128(m); - _mm_storeu_si128((__m128i *)&mask[j], m0); - } - ssrc0 += src0_stride; - ssrc1 += src1_stride; - mask += w; - } - } - } else { - const __m128i xshift = xx_set1_64_from_32i(bd - 8 + DIFF_FACTOR_LOG2); - if (mask_type == DIFFWTD_38_INV) { - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; j += 16) { - __m256i s0 = _mm256_loadu_si256((const __m256i *)&ssrc0[j]); - __m256i s1 = _mm256_loadu_si256((const __m256i *)&ssrc1[j]); - __m256i diff = _mm256_sra_epi16( - _mm256_abs_epi16(_mm256_sub_epi16(s0, s1)), xshift); - __m256i m = _mm256_min_epi16( - _mm256_max_epi16(y0, _mm256_add_epi16(diff, ymask_base)), - yAOM_BLEND_A64_MAX_ALPHA); - m = _mm256_sub_epi16(yAOM_BLEND_A64_MAX_ALPHA, m); - m = _mm256_packus_epi16(m, m); - m = _mm256_permute4x64_epi64(m, _MM_SHUFFLE(0, 0, 2, 0)); - __m128i m0 = _mm256_castsi256_si128(m); - _mm_storeu_si128((__m128i *)&mask[j], m0); - } - ssrc0 += src0_stride; - ssrc1 += src1_stride; - mask += w; - } - } else { - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; j += 16) { - __m256i s0 = _mm256_loadu_si256((const __m256i *)&ssrc0[j]); - __m256i s1 = _mm256_loadu_si256((const __m256i *)&ssrc1[j]); - __m256i diff = _mm256_sra_epi16( - _mm256_abs_epi16(_mm256_sub_epi16(s0, s1)), xshift); - __m256i m = _mm256_min_epi16( - _mm256_max_epi16(y0, _mm256_add_epi16(diff, ymask_base)), - yAOM_BLEND_A64_MAX_ALPHA); - m = _mm256_packus_epi16(m, m); - m = _mm256_permute4x64_epi64(m, _MM_SHUFFLE(0, 0, 2, 0)); - __m128i m0 = _mm256_castsi256_si128(m); - _mm_storeu_si128((__m128i *)&mask[j], m0); - } - ssrc0 += src0_stride; - ssrc1 += src1_stride; - mask += w; - } - } - } - } -} diff --git a/third_party/aom/av1/common/x86/reconinter_sse4.c b/third_party/aom/av1/common/x86/reconinter_sse4.c deleted file mode 100644 index 5171ca493..000000000 --- a/third_party/aom/av1/common/x86/reconinter_sse4.c +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <emmintrin.h> // SSE2 -#include <smmintrin.h> /* SSE4.1 */ - -#include "aom/aom_integer.h" -#include "aom_dsp/blend.h" -#include "av1/common/blockd.h" - -static INLINE __m128i calc_mask(const __m128i mask_base, const __m128i s0, - const __m128i s1) { - const __m128i diff = _mm_abs_epi16(_mm_sub_epi16(s0, s1)); - return _mm_abs_epi16(_mm_add_epi16(mask_base, _mm_srli_epi16(diff, 4))); - // clamp(diff, 0, 64) can be skiped for diff is always in the range ( 38, 54) -} - -void av1_build_compound_diffwtd_mask_sse4_1(uint8_t *mask, - DIFFWTD_MASK_TYPE mask_type, - const uint8_t *src0, int stride0, - const uint8_t *src1, int stride1, - int h, int w) { - const int mb = (mask_type == DIFFWTD_38_INV) ? AOM_BLEND_A64_MAX_ALPHA : 0; - const __m128i mask_base = _mm_set1_epi16(38 - mb); - int i = 0; - if (4 == w) { - do { - const __m128i s0A = _mm_cvtsi32_si128(*(uint32_t *)src0); - const __m128i s0B = _mm_cvtsi32_si128(*(uint32_t *)(src0 + stride0)); - const __m128i s0AB = _mm_unpacklo_epi32(s0A, s0B); - const __m128i s0 = _mm_cvtepu8_epi16(s0AB); - - const __m128i s1A = _mm_cvtsi32_si128(*(uint32_t *)src1); - const __m128i s1B = _mm_cvtsi32_si128(*(uint32_t *)(src1 + stride1)); - const __m128i s1AB = _mm_unpacklo_epi32(s1A, s1B); - const __m128i s1 = _mm_cvtepu8_epi16(s1AB); - - const __m128i m16 = calc_mask(mask_base, s0, s1); - const __m128i m8 = _mm_packus_epi16(m16, m16); - - *(uint32_t *)mask = _mm_cvtsi128_si32(m8); - *(uint32_t *)(mask + w) = _mm_extract_epi32(m8, 1); - src0 += (stride0 << 1); - src1 += (stride1 << 1); - mask += 8; - i += 2; - } while (i < h); - } else if (8 == w) { - do { - __m128i s0 = _mm_loadl_epi64((__m128i const *)src0); - __m128i s1 = _mm_loadl_epi64((__m128i const *)src1); - s0 = _mm_cvtepu8_epi16(s0); - s1 = _mm_cvtepu8_epi16(s1); - const __m128i m16 = calc_mask(mask_base, s0, s1); - const __m128i m8 = _mm_packus_epi16(m16, m16); - _mm_storel_epi64((__m128i *)mask, m8); - src0 += stride0; - src1 += stride1; - mask += 8; - i += 1; - } while (i < h); - } else { - const __m128i zero = _mm_setzero_si128(); - do { - int j = 0; - do { - const __m128i s0 = _mm_load_si128((__m128i const *)(src0 + j)); - const __m128i s1 = _mm_load_si128((__m128i const *)(src1 + j)); - const __m128i s0L = _mm_cvtepu8_epi16(s0); - const __m128i s1L = _mm_cvtepu8_epi16(s1); - const __m128i s0H = _mm_unpackhi_epi8(s0, zero); - const __m128i s1H = _mm_unpackhi_epi8(s1, zero); - - const __m128i m16L = calc_mask(mask_base, s0L, s1L); - const __m128i m16H = calc_mask(mask_base, s0H, s1H); - - const __m128i m8 = _mm_packus_epi16(m16L, m16H); - _mm_store_si128((__m128i *)(mask + j), m8); - j += 16; - } while (j < w); - src0 += stride0; - src1 += stride1; - mask += w; - i += 1; - } while (i < h); - } -} - -void av1_build_compound_diffwtd_mask_d16_sse4_1( - uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, - int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, - ConvolveParams *conv_params, int bd) { - const int which_inverse = (mask_type == DIFFWTD_38) ? 0 : 1; - const int mask_base = 38; - int round = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8); - const __m128i round_const = _mm_set1_epi16((1 << round) >> 1); - const __m128i mask_base_16 = _mm_set1_epi16(mask_base); - const __m128i clip_diff = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); - const __m128i add_const = - _mm_set1_epi16((which_inverse ? AOM_BLEND_A64_MAX_ALPHA : 0)); - const __m128i add_sign = _mm_set1_epi16((which_inverse ? -1 : 1)); - - int i, j; - // When rounding constant is added, there is a possibility of overflow. - // However that much precision is not required. Code should very well work for - // other values of DIFF_FACTOR_LOG2 and AOM_BLEND_A64_MAX_ALPHA as well. But - // there is a possibility of corner case bugs. - assert(DIFF_FACTOR_LOG2 == 4); - assert(AOM_BLEND_A64_MAX_ALPHA == 64); - for (i = 0; i < h; ++i) { - for (j = 0; j < w; j += 8) { - const __m128i data_src0 = - _mm_loadu_si128((__m128i *)&src0[(i * src0_stride) + j]); - const __m128i data_src1 = - _mm_loadu_si128((__m128i *)&src1[(i * src1_stride) + j]); - - const __m128i diffa = _mm_subs_epu16(data_src0, data_src1); - const __m128i diffb = _mm_subs_epu16(data_src1, data_src0); - const __m128i diff = _mm_max_epu16(diffa, diffb); - const __m128i diff_round = - _mm_srli_epi16(_mm_adds_epu16(diff, round_const), round); - const __m128i diff_factor = _mm_srli_epi16(diff_round, DIFF_FACTOR_LOG2); - const __m128i diff_mask = _mm_adds_epi16(diff_factor, mask_base_16); - __m128i diff_clamp = _mm_min_epi16(diff_mask, clip_diff); - // clamp to 0 can be skipped since we are using add and saturate - // instruction - - const __m128i diff_sign = _mm_sign_epi16(diff_clamp, add_sign); - const __m128i diff_const_16 = _mm_add_epi16(diff_sign, add_const); - - // 8 bit conversion and saturation to uint8 - const __m128i res_8 = _mm_packus_epi16(diff_const_16, diff_const_16); - - // Store values into the destination buffer - __m128i *const dst = (__m128i *)&mask[i * w + j]; - - if ((w - j) > 4) { - _mm_storel_epi64(dst, res_8); - } else { // w==4 - *(uint32_t *)dst = _mm_cvtsi128_si32(res_8); - } - } - } -} diff --git a/third_party/aom/av1/common/x86/reconinter_ssse3.c b/third_party/aom/av1/common/x86/reconinter_ssse3.c deleted file mode 100644 index cf684447c..000000000 --- a/third_party/aom/av1/common/x86/reconinter_ssse3.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <tmmintrin.h> - -#include "config/av1_rtcd.h" - -#include "aom/aom_integer.h" -#include "aom_dsp/blend.h" -#include "aom_dsp/x86/synonyms.h" -#include "av1/common/blockd.h" - -void av1_build_compound_diffwtd_mask_highbd_ssse3( - uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, - int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, - int bd) { - if (w < 8) { - av1_build_compound_diffwtd_mask_highbd_c(mask, mask_type, src0, src0_stride, - src1, src1_stride, h, w, bd); - } else { - assert(bd >= 8); - assert((w % 8) == 0); - assert(mask_type == DIFFWTD_38 || mask_type == DIFFWTD_38_INV); - const __m128i x0 = _mm_setzero_si128(); - const __m128i xAOM_BLEND_A64_MAX_ALPHA = - _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); - const int mask_base = 38; - const __m128i xmask_base = _mm_set1_epi16(mask_base); - const uint16_t *ssrc0 = CONVERT_TO_SHORTPTR(src0); - const uint16_t *ssrc1 = CONVERT_TO_SHORTPTR(src1); - if (bd == 8) { - if (mask_type == DIFFWTD_38_INV) { - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; j += 8) { - __m128i s0 = _mm_loadu_si128((const __m128i *)&ssrc0[j]); - __m128i s1 = _mm_loadu_si128((const __m128i *)&ssrc1[j]); - __m128i diff = _mm_srai_epi16(_mm_abs_epi16(_mm_sub_epi16(s0, s1)), - DIFF_FACTOR_LOG2); - __m128i m = _mm_min_epi16( - _mm_max_epi16(x0, _mm_add_epi16(diff, xmask_base)), - xAOM_BLEND_A64_MAX_ALPHA); - m = _mm_sub_epi16(xAOM_BLEND_A64_MAX_ALPHA, m); - m = _mm_packus_epi16(m, m); - _mm_storel_epi64((__m128i *)&mask[j], m); - } - ssrc0 += src0_stride; - ssrc1 += src1_stride; - mask += w; - } - } else { - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; j += 8) { - __m128i s0 = _mm_loadu_si128((const __m128i *)&ssrc0[j]); - __m128i s1 = _mm_loadu_si128((const __m128i *)&ssrc1[j]); - __m128i diff = _mm_srai_epi16(_mm_abs_epi16(_mm_sub_epi16(s0, s1)), - DIFF_FACTOR_LOG2); - __m128i m = _mm_min_epi16( - _mm_max_epi16(x0, _mm_add_epi16(diff, xmask_base)), - xAOM_BLEND_A64_MAX_ALPHA); - m = _mm_packus_epi16(m, m); - _mm_storel_epi64((__m128i *)&mask[j], m); - } - ssrc0 += src0_stride; - ssrc1 += src1_stride; - mask += w; - } - } - } else { - const __m128i xshift = xx_set1_64_from_32i(bd - 8 + DIFF_FACTOR_LOG2); - if (mask_type == DIFFWTD_38_INV) { - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; j += 8) { - __m128i s0 = _mm_loadu_si128((const __m128i *)&ssrc0[j]); - __m128i s1 = _mm_loadu_si128((const __m128i *)&ssrc1[j]); - __m128i diff = - _mm_sra_epi16(_mm_abs_epi16(_mm_sub_epi16(s0, s1)), xshift); - __m128i m = _mm_min_epi16( - _mm_max_epi16(x0, _mm_add_epi16(diff, xmask_base)), - xAOM_BLEND_A64_MAX_ALPHA); - m = _mm_sub_epi16(xAOM_BLEND_A64_MAX_ALPHA, m); - m = _mm_packus_epi16(m, m); - _mm_storel_epi64((__m128i *)&mask[j], m); - } - ssrc0 += src0_stride; - ssrc1 += src1_stride; - mask += w; - } - } else { - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; j += 8) { - __m128i s0 = _mm_loadu_si128((const __m128i *)&ssrc0[j]); - __m128i s1 = _mm_loadu_si128((const __m128i *)&ssrc1[j]); - __m128i diff = - _mm_sra_epi16(_mm_abs_epi16(_mm_sub_epi16(s0, s1)), xshift); - __m128i m = _mm_min_epi16( - _mm_max_epi16(x0, _mm_add_epi16(diff, xmask_base)), - xAOM_BLEND_A64_MAX_ALPHA); - m = _mm_packus_epi16(m, m); - _mm_storel_epi64((__m128i *)&mask[j], m); - } - ssrc0 += src0_stride; - ssrc1 += src1_stride; - mask += w; - } - } - } - } -} diff --git a/third_party/aom/av1/common/x86/selfguided_avx2.c b/third_party/aom/av1/common/x86/selfguided_avx2.c deleted file mode 100644 index 0aaf1f454..000000000 --- a/third_party/aom/av1/common/x86/selfguided_avx2.c +++ /dev/null @@ -1,724 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <immintrin.h> - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" - -#include "av1/common/restoration.h" -#include "aom_dsp/x86/synonyms.h" -#include "aom_dsp/x86/synonyms_avx2.h" - -// Load 8 bytes from the possibly-misaligned pointer p, extend each byte to -// 32-bit precision and return them in an AVX2 register. -static __m256i yy256_load_extend_8_32(const void *p) { - return _mm256_cvtepu8_epi32(xx_loadl_64(p)); -} - -// Load 8 halfwords from the possibly-misaligned pointer p, extend each -// halfword to 32-bit precision and return them in an AVX2 register. -static __m256i yy256_load_extend_16_32(const void *p) { - return _mm256_cvtepu16_epi32(xx_loadu_128(p)); -} - -// Compute the scan of an AVX2 register holding 8 32-bit integers. If the -// register holds x0..x7 then the scan will hold x0, x0+x1, x0+x1+x2, ..., -// x0+x1+...+x7 -// -// Let [...] represent a 128-bit block, and let a, ..., h be 32-bit integers -// (assumed small enough to be able to add them without overflow). -// -// Use -> as shorthand for summing, i.e. h->a = h + g + f + e + d + c + b + a. -// -// x = [h g f e][d c b a] -// x01 = [g f e 0][c b a 0] -// x02 = [g+h f+g e+f e][c+d b+c a+b a] -// x03 = [e+f e 0 0][a+b a 0 0] -// x04 = [e->h e->g e->f e][a->d a->c a->b a] -// s = a->d -// s01 = [a->d a->d a->d a->d] -// s02 = [a->d a->d a->d a->d][0 0 0 0] -// ret = [a->h a->g a->f a->e][a->d a->c a->b a] -static __m256i scan_32(__m256i x) { - const __m256i x01 = _mm256_slli_si256(x, 4); - const __m256i x02 = _mm256_add_epi32(x, x01); - const __m256i x03 = _mm256_slli_si256(x02, 8); - const __m256i x04 = _mm256_add_epi32(x02, x03); - const int32_t s = _mm256_extract_epi32(x04, 3); - const __m128i s01 = _mm_set1_epi32(s); - const __m256i s02 = _mm256_insertf128_si256(_mm256_setzero_si256(), s01, 1); - return _mm256_add_epi32(x04, s02); -} - -// Compute two integral images from src. B sums elements; A sums their -// squares. The images are offset by one pixel, so will have width and height -// equal to width + 1, height + 1 and the first row and column will be zero. -// -// A+1 and B+1 should be aligned to 32 bytes. buf_stride should be a multiple -// of 8. - -static void *memset_zero_avx(int32_t *dest, const __m256i *zero, size_t count) { - unsigned int i = 0; - for (i = 0; i < (count & 0xffffffe0); i += 32) { - _mm256_storeu_si256((__m256i *)(dest + i), *zero); - _mm256_storeu_si256((__m256i *)(dest + i + 8), *zero); - _mm256_storeu_si256((__m256i *)(dest + i + 16), *zero); - _mm256_storeu_si256((__m256i *)(dest + i + 24), *zero); - } - for (; i < (count & 0xfffffff8); i += 8) { - _mm256_storeu_si256((__m256i *)(dest + i), *zero); - } - for (; i < count; i++) { - dest[i] = 0; - } - return dest; -} - -static void integral_images(const uint8_t *src, int src_stride, int width, - int height, int32_t *A, int32_t *B, - int buf_stride) { - const __m256i zero = _mm256_setzero_si256(); - // Write out the zero top row - memset_zero_avx(A, &zero, (width + 8)); - memset_zero_avx(B, &zero, (width + 8)); - for (int i = 0; i < height; ++i) { - // Zero the left column. - A[(i + 1) * buf_stride] = B[(i + 1) * buf_stride] = 0; - - // ldiff is the difference H - D where H is the output sample immediately - // to the left and D is the output sample above it. These are scalars, - // replicated across the eight lanes. - __m256i ldiff1 = zero, ldiff2 = zero; - for (int j = 0; j < width; j += 8) { - const int ABj = 1 + j; - - const __m256i above1 = yy_load_256(B + ABj + i * buf_stride); - const __m256i above2 = yy_load_256(A + ABj + i * buf_stride); - - const __m256i x1 = yy256_load_extend_8_32(src + j + i * src_stride); - const __m256i x2 = _mm256_madd_epi16(x1, x1); - - const __m256i sc1 = scan_32(x1); - const __m256i sc2 = scan_32(x2); - - const __m256i row1 = - _mm256_add_epi32(_mm256_add_epi32(sc1, above1), ldiff1); - const __m256i row2 = - _mm256_add_epi32(_mm256_add_epi32(sc2, above2), ldiff2); - - yy_store_256(B + ABj + (i + 1) * buf_stride, row1); - yy_store_256(A + ABj + (i + 1) * buf_stride, row2); - - // Calculate the new H - D. - ldiff1 = _mm256_set1_epi32( - _mm256_extract_epi32(_mm256_sub_epi32(row1, above1), 7)); - ldiff2 = _mm256_set1_epi32( - _mm256_extract_epi32(_mm256_sub_epi32(row2, above2), 7)); - } - } -} - -// Compute two integral images from src. B sums elements; A sums their squares -// -// A and B should be aligned to 32 bytes. buf_stride should be a multiple of 8. -static void integral_images_highbd(const uint16_t *src, int src_stride, - int width, int height, int32_t *A, - int32_t *B, int buf_stride) { - const __m256i zero = _mm256_setzero_si256(); - // Write out the zero top row - memset_zero_avx(A, &zero, (width + 8)); - memset_zero_avx(B, &zero, (width + 8)); - - for (int i = 0; i < height; ++i) { - // Zero the left column. - A[(i + 1) * buf_stride] = B[(i + 1) * buf_stride] = 0; - - // ldiff is the difference H - D where H is the output sample immediately - // to the left and D is the output sample above it. These are scalars, - // replicated across the eight lanes. - __m256i ldiff1 = zero, ldiff2 = zero; - for (int j = 0; j < width; j += 8) { - const int ABj = 1 + j; - - const __m256i above1 = yy_load_256(B + ABj + i * buf_stride); - const __m256i above2 = yy_load_256(A + ABj + i * buf_stride); - - const __m256i x1 = yy256_load_extend_16_32(src + j + i * src_stride); - const __m256i x2 = _mm256_madd_epi16(x1, x1); - - const __m256i sc1 = scan_32(x1); - const __m256i sc2 = scan_32(x2); - - const __m256i row1 = - _mm256_add_epi32(_mm256_add_epi32(sc1, above1), ldiff1); - const __m256i row2 = - _mm256_add_epi32(_mm256_add_epi32(sc2, above2), ldiff2); - - yy_store_256(B + ABj + (i + 1) * buf_stride, row1); - yy_store_256(A + ABj + (i + 1) * buf_stride, row2); - - // Calculate the new H - D. - ldiff1 = _mm256_set1_epi32( - _mm256_extract_epi32(_mm256_sub_epi32(row1, above1), 7)); - ldiff2 = _mm256_set1_epi32( - _mm256_extract_epi32(_mm256_sub_epi32(row2, above2), 7)); - } - } -} - -// Compute 8 values of boxsum from the given integral image. ii should point -// at the middle of the box (for the first value). r is the box radius. -static INLINE __m256i boxsum_from_ii(const int32_t *ii, int stride, int r) { - const __m256i tl = yy_loadu_256(ii - (r + 1) - (r + 1) * stride); - const __m256i tr = yy_loadu_256(ii + (r + 0) - (r + 1) * stride); - const __m256i bl = yy_loadu_256(ii - (r + 1) + r * stride); - const __m256i br = yy_loadu_256(ii + (r + 0) + r * stride); - const __m256i u = _mm256_sub_epi32(tr, tl); - const __m256i v = _mm256_sub_epi32(br, bl); - return _mm256_sub_epi32(v, u); -} - -static __m256i round_for_shift(unsigned shift) { - return _mm256_set1_epi32((1 << shift) >> 1); -} - -static __m256i compute_p(__m256i sum1, __m256i sum2, int bit_depth, int n) { - __m256i an, bb; - if (bit_depth > 8) { - const __m256i rounding_a = round_for_shift(2 * (bit_depth - 8)); - const __m256i rounding_b = round_for_shift(bit_depth - 8); - const __m128i shift_a = _mm_cvtsi32_si128(2 * (bit_depth - 8)); - const __m128i shift_b = _mm_cvtsi32_si128(bit_depth - 8); - const __m256i a = - _mm256_srl_epi32(_mm256_add_epi32(sum2, rounding_a), shift_a); - const __m256i b = - _mm256_srl_epi32(_mm256_add_epi32(sum1, rounding_b), shift_b); - // b < 2^14, so we can use a 16-bit madd rather than a 32-bit - // mullo to square it - bb = _mm256_madd_epi16(b, b); - an = _mm256_max_epi32(_mm256_mullo_epi32(a, _mm256_set1_epi32(n)), bb); - } else { - bb = _mm256_madd_epi16(sum1, sum1); - an = _mm256_mullo_epi32(sum2, _mm256_set1_epi32(n)); - } - return _mm256_sub_epi32(an, bb); -} - -// Assumes that C, D are integral images for the original buffer which has been -// extended to have a padding of SGRPROJ_BORDER_VERT/SGRPROJ_BORDER_HORZ pixels -// on the sides. A, B, C, D point at logical position (0, 0). -static void calc_ab(int32_t *A, int32_t *B, const int32_t *C, const int32_t *D, - int width, int height, int buf_stride, int bit_depth, - int sgr_params_idx, int radius_idx) { - const sgr_params_type *const params = &sgr_params[sgr_params_idx]; - const int r = params->r[radius_idx]; - const int n = (2 * r + 1) * (2 * r + 1); - const __m256i s = _mm256_set1_epi32(params->s[radius_idx]); - // one_over_n[n-1] is 2^12/n, so easily fits in an int16 - const __m256i one_over_n = _mm256_set1_epi32(one_by_x[n - 1]); - - const __m256i rnd_z = round_for_shift(SGRPROJ_MTABLE_BITS); - const __m256i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS); - - // Set up masks - const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff); - __m256i mask[8]; - for (int idx = 0; idx < 8; idx++) { - const __m128i shift = _mm_cvtsi32_si128(8 * (8 - idx)); - mask[idx] = _mm256_cvtepi8_epi32(_mm_srl_epi64(ones32, shift)); - } - - for (int i = -1; i < height + 1; ++i) { - for (int j = -1; j < width + 1; j += 8) { - const int32_t *Cij = C + i * buf_stride + j; - const int32_t *Dij = D + i * buf_stride + j; - - __m256i sum1 = boxsum_from_ii(Dij, buf_stride, r); - __m256i sum2 = boxsum_from_ii(Cij, buf_stride, r); - - // When width + 2 isn't a multiple of 8, sum1 and sum2 will contain - // some uninitialised data in their upper words. We use a mask to - // ensure that these bits are set to 0. - int idx = AOMMIN(8, width + 1 - j); - assert(idx >= 1); - - if (idx < 8) { - sum1 = _mm256_and_si256(mask[idx], sum1); - sum2 = _mm256_and_si256(mask[idx], sum2); - } - - const __m256i p = compute_p(sum1, sum2, bit_depth, n); - - const __m256i z = _mm256_min_epi32( - _mm256_srli_epi32(_mm256_add_epi32(_mm256_mullo_epi32(p, s), rnd_z), - SGRPROJ_MTABLE_BITS), - _mm256_set1_epi32(255)); - - const __m256i a_res = _mm256_i32gather_epi32(x_by_xplus1, z, 4); - - yy_storeu_256(A + i * buf_stride + j, a_res); - - const __m256i a_complement = - _mm256_sub_epi32(_mm256_set1_epi32(SGRPROJ_SGR), a_res); - - // sum1 might have lanes greater than 2^15, so we can't use madd to do - // multiplication involving sum1. However, a_complement and one_over_n - // are both less than 256, so we can multiply them first. - const __m256i a_comp_over_n = _mm256_madd_epi16(a_complement, one_over_n); - const __m256i b_int = _mm256_mullo_epi32(a_comp_over_n, sum1); - const __m256i b_res = _mm256_srli_epi32(_mm256_add_epi32(b_int, rnd_res), - SGRPROJ_RECIP_BITS); - - yy_storeu_256(B + i * buf_stride + j, b_res); - } - } -} - -// Calculate 8 values of the "cross sum" starting at buf. This is a 3x3 filter -// where the outer four corners have weight 3 and all other pixels have weight -// 4. -// -// Pixels are indexed as follows: -// xtl xt xtr -// xl x xr -// xbl xb xbr -// -// buf points to x -// -// fours = xl + xt + xr + xb + x -// threes = xtl + xtr + xbr + xbl -// cross_sum = 4 * fours + 3 * threes -// = 4 * (fours + threes) - threes -// = (fours + threes) << 2 - threes -static INLINE __m256i cross_sum(const int32_t *buf, int stride) { - const __m256i xtl = yy_loadu_256(buf - 1 - stride); - const __m256i xt = yy_loadu_256(buf - stride); - const __m256i xtr = yy_loadu_256(buf + 1 - stride); - const __m256i xl = yy_loadu_256(buf - 1); - const __m256i x = yy_loadu_256(buf); - const __m256i xr = yy_loadu_256(buf + 1); - const __m256i xbl = yy_loadu_256(buf - 1 + stride); - const __m256i xb = yy_loadu_256(buf + stride); - const __m256i xbr = yy_loadu_256(buf + 1 + stride); - - const __m256i fours = _mm256_add_epi32( - xl, _mm256_add_epi32(xt, _mm256_add_epi32(xr, _mm256_add_epi32(xb, x)))); - const __m256i threes = - _mm256_add_epi32(xtl, _mm256_add_epi32(xtr, _mm256_add_epi32(xbr, xbl))); - - return _mm256_sub_epi32(_mm256_slli_epi32(_mm256_add_epi32(fours, threes), 2), - threes); -} - -// The final filter for self-guided restoration. Computes a weighted average -// across A, B with "cross sums" (see cross_sum implementation above). -static void final_filter(int32_t *dst, int dst_stride, const int32_t *A, - const int32_t *B, int buf_stride, const void *dgd8, - int dgd_stride, int width, int height, int highbd) { - const int nb = 5; - const __m256i rounding = - round_for_shift(SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); - const uint8_t *dgd_real = - highbd ? (const uint8_t *)CONVERT_TO_SHORTPTR(dgd8) : dgd8; - - for (int i = 0; i < height; ++i) { - for (int j = 0; j < width; j += 8) { - const __m256i a = cross_sum(A + i * buf_stride + j, buf_stride); - const __m256i b = cross_sum(B + i * buf_stride + j, buf_stride); - - const __m128i raw = - xx_loadu_128(dgd_real + ((i * dgd_stride + j) << highbd)); - const __m256i src = - highbd ? _mm256_cvtepu16_epi32(raw) : _mm256_cvtepu8_epi32(raw); - - __m256i v = _mm256_add_epi32(_mm256_madd_epi16(a, src), b); - __m256i w = _mm256_srai_epi32(_mm256_add_epi32(v, rounding), - SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); - - yy_storeu_256(dst + i * dst_stride + j, w); - } - } -} - -// Assumes that C, D are integral images for the original buffer which has been -// extended to have a padding of SGRPROJ_BORDER_VERT/SGRPROJ_BORDER_HORZ pixels -// on the sides. A, B, C, D point at logical position (0, 0). -static void calc_ab_fast(int32_t *A, int32_t *B, const int32_t *C, - const int32_t *D, int width, int height, - int buf_stride, int bit_depth, int sgr_params_idx, - int radius_idx) { - const sgr_params_type *const params = &sgr_params[sgr_params_idx]; - const int r = params->r[radius_idx]; - const int n = (2 * r + 1) * (2 * r + 1); - const __m256i s = _mm256_set1_epi32(params->s[radius_idx]); - // one_over_n[n-1] is 2^12/n, so easily fits in an int16 - const __m256i one_over_n = _mm256_set1_epi32(one_by_x[n - 1]); - - const __m256i rnd_z = round_for_shift(SGRPROJ_MTABLE_BITS); - const __m256i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS); - - // Set up masks - const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff); - __m256i mask[8]; - for (int idx = 0; idx < 8; idx++) { - const __m128i shift = _mm_cvtsi32_si128(8 * (8 - idx)); - mask[idx] = _mm256_cvtepi8_epi32(_mm_srl_epi64(ones32, shift)); - } - - for (int i = -1; i < height + 1; i += 2) { - for (int j = -1; j < width + 1; j += 8) { - const int32_t *Cij = C + i * buf_stride + j; - const int32_t *Dij = D + i * buf_stride + j; - - __m256i sum1 = boxsum_from_ii(Dij, buf_stride, r); - __m256i sum2 = boxsum_from_ii(Cij, buf_stride, r); - - // When width + 2 isn't a multiple of 8, sum1 and sum2 will contain - // some uninitialised data in their upper words. We use a mask to - // ensure that these bits are set to 0. - int idx = AOMMIN(8, width + 1 - j); - assert(idx >= 1); - - if (idx < 8) { - sum1 = _mm256_and_si256(mask[idx], sum1); - sum2 = _mm256_and_si256(mask[idx], sum2); - } - - const __m256i p = compute_p(sum1, sum2, bit_depth, n); - - const __m256i z = _mm256_min_epi32( - _mm256_srli_epi32(_mm256_add_epi32(_mm256_mullo_epi32(p, s), rnd_z), - SGRPROJ_MTABLE_BITS), - _mm256_set1_epi32(255)); - - const __m256i a_res = _mm256_i32gather_epi32(x_by_xplus1, z, 4); - - yy_storeu_256(A + i * buf_stride + j, a_res); - - const __m256i a_complement = - _mm256_sub_epi32(_mm256_set1_epi32(SGRPROJ_SGR), a_res); - - // sum1 might have lanes greater than 2^15, so we can't use madd to do - // multiplication involving sum1. However, a_complement and one_over_n - // are both less than 256, so we can multiply them first. - const __m256i a_comp_over_n = _mm256_madd_epi16(a_complement, one_over_n); - const __m256i b_int = _mm256_mullo_epi32(a_comp_over_n, sum1); - const __m256i b_res = _mm256_srli_epi32(_mm256_add_epi32(b_int, rnd_res), - SGRPROJ_RECIP_BITS); - - yy_storeu_256(B + i * buf_stride + j, b_res); - } - } -} - -// Calculate 8 values of the "cross sum" starting at buf. -// -// Pixels are indexed like this: -// xtl xt xtr -// - buf - -// xbl xb xbr -// -// Pixels are weighted like this: -// 5 6 5 -// 0 0 0 -// 5 6 5 -// -// fives = xtl + xtr + xbl + xbr -// sixes = xt + xb -// cross_sum = 6 * sixes + 5 * fives -// = 5 * (fives + sixes) - sixes -// = (fives + sixes) << 2 + (fives + sixes) + sixes -static INLINE __m256i cross_sum_fast_even_row(const int32_t *buf, int stride) { - const __m256i xtl = yy_loadu_256(buf - 1 - stride); - const __m256i xt = yy_loadu_256(buf - stride); - const __m256i xtr = yy_loadu_256(buf + 1 - stride); - const __m256i xbl = yy_loadu_256(buf - 1 + stride); - const __m256i xb = yy_loadu_256(buf + stride); - const __m256i xbr = yy_loadu_256(buf + 1 + stride); - - const __m256i fives = - _mm256_add_epi32(xtl, _mm256_add_epi32(xtr, _mm256_add_epi32(xbr, xbl))); - const __m256i sixes = _mm256_add_epi32(xt, xb); - const __m256i fives_plus_sixes = _mm256_add_epi32(fives, sixes); - - return _mm256_add_epi32( - _mm256_add_epi32(_mm256_slli_epi32(fives_plus_sixes, 2), - fives_plus_sixes), - sixes); -} - -// Calculate 8 values of the "cross sum" starting at buf. -// -// Pixels are indexed like this: -// xl x xr -// -// Pixels are weighted like this: -// 5 6 5 -// -// buf points to x -// -// fives = xl + xr -// sixes = x -// cross_sum = 5 * fives + 6 * sixes -// = 4 * (fives + sixes) + (fives + sixes) + sixes -// = (fives + sixes) << 2 + (fives + sixes) + sixes -static INLINE __m256i cross_sum_fast_odd_row(const int32_t *buf) { - const __m256i xl = yy_loadu_256(buf - 1); - const __m256i x = yy_loadu_256(buf); - const __m256i xr = yy_loadu_256(buf + 1); - - const __m256i fives = _mm256_add_epi32(xl, xr); - const __m256i sixes = x; - - const __m256i fives_plus_sixes = _mm256_add_epi32(fives, sixes); - - return _mm256_add_epi32( - _mm256_add_epi32(_mm256_slli_epi32(fives_plus_sixes, 2), - fives_plus_sixes), - sixes); -} - -// The final filter for the self-guided restoration. Computes a -// weighted average across A, B with "cross sums" (see cross_sum_... -// implementations above). -static void final_filter_fast(int32_t *dst, int dst_stride, const int32_t *A, - const int32_t *B, int buf_stride, - const void *dgd8, int dgd_stride, int width, - int height, int highbd) { - const int nb0 = 5; - const int nb1 = 4; - - const __m256i rounding0 = - round_for_shift(SGRPROJ_SGR_BITS + nb0 - SGRPROJ_RST_BITS); - const __m256i rounding1 = - round_for_shift(SGRPROJ_SGR_BITS + nb1 - SGRPROJ_RST_BITS); - - const uint8_t *dgd_real = - highbd ? (const uint8_t *)CONVERT_TO_SHORTPTR(dgd8) : dgd8; - - for (int i = 0; i < height; ++i) { - if (!(i & 1)) { // even row - for (int j = 0; j < width; j += 8) { - const __m256i a = - cross_sum_fast_even_row(A + i * buf_stride + j, buf_stride); - const __m256i b = - cross_sum_fast_even_row(B + i * buf_stride + j, buf_stride); - - const __m128i raw = - xx_loadu_128(dgd_real + ((i * dgd_stride + j) << highbd)); - const __m256i src = - highbd ? _mm256_cvtepu16_epi32(raw) : _mm256_cvtepu8_epi32(raw); - - __m256i v = _mm256_add_epi32(_mm256_madd_epi16(a, src), b); - __m256i w = - _mm256_srai_epi32(_mm256_add_epi32(v, rounding0), - SGRPROJ_SGR_BITS + nb0 - SGRPROJ_RST_BITS); - - yy_storeu_256(dst + i * dst_stride + j, w); - } - } else { // odd row - for (int j = 0; j < width; j += 8) { - const __m256i a = cross_sum_fast_odd_row(A + i * buf_stride + j); - const __m256i b = cross_sum_fast_odd_row(B + i * buf_stride + j); - - const __m128i raw = - xx_loadu_128(dgd_real + ((i * dgd_stride + j) << highbd)); - const __m256i src = - highbd ? _mm256_cvtepu16_epi32(raw) : _mm256_cvtepu8_epi32(raw); - - __m256i v = _mm256_add_epi32(_mm256_madd_epi16(a, src), b); - __m256i w = - _mm256_srai_epi32(_mm256_add_epi32(v, rounding1), - SGRPROJ_SGR_BITS + nb1 - SGRPROJ_RST_BITS); - - yy_storeu_256(dst + i * dst_stride + j, w); - } - } - } -} - -int av1_selfguided_restoration_avx2(const uint8_t *dgd8, int width, int height, - int dgd_stride, int32_t *flt0, - int32_t *flt1, int flt_stride, - int sgr_params_idx, int bit_depth, - int highbd) { - // The ALIGN_POWER_OF_TWO macro here ensures that column 1 of Atl, Btl, - // Ctl and Dtl is 32-byte aligned. - const int buf_elts = ALIGN_POWER_OF_TWO(RESTORATION_PROC_UNIT_PELS, 3); - - int32_t *buf = aom_memalign( - 32, 4 * sizeof(*buf) * ALIGN_POWER_OF_TWO(RESTORATION_PROC_UNIT_PELS, 3)); - if (!buf) return -1; - - const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ; - const int height_ext = height + 2 * SGRPROJ_BORDER_VERT; - - // Adjusting the stride of A and B here appears to avoid bad cache effects, - // leading to a significant speed improvement. - // We also align the stride to a multiple of 32 bytes for efficiency. - int buf_stride = ALIGN_POWER_OF_TWO(width_ext + 16, 3); - - // The "tl" pointers point at the top-left of the initialised data for the - // array. - int32_t *Atl = buf + 0 * buf_elts + 7; - int32_t *Btl = buf + 1 * buf_elts + 7; - int32_t *Ctl = buf + 2 * buf_elts + 7; - int32_t *Dtl = buf + 3 * buf_elts + 7; - - // The "0" pointers are (- SGRPROJ_BORDER_VERT, -SGRPROJ_BORDER_HORZ). Note - // there's a zero row and column in A, B (integral images), so we move down - // and right one for them. - const int buf_diag_border = - SGRPROJ_BORDER_HORZ + buf_stride * SGRPROJ_BORDER_VERT; - - int32_t *A0 = Atl + 1 + buf_stride; - int32_t *B0 = Btl + 1 + buf_stride; - int32_t *C0 = Ctl + 1 + buf_stride; - int32_t *D0 = Dtl + 1 + buf_stride; - - // Finally, A, B, C, D point at position (0, 0). - int32_t *A = A0 + buf_diag_border; - int32_t *B = B0 + buf_diag_border; - int32_t *C = C0 + buf_diag_border; - int32_t *D = D0 + buf_diag_border; - - const int dgd_diag_border = - SGRPROJ_BORDER_HORZ + dgd_stride * SGRPROJ_BORDER_VERT; - const uint8_t *dgd0 = dgd8 - dgd_diag_border; - - // Generate integral images from the input. C will contain sums of squares; D - // will contain just sums - if (highbd) - integral_images_highbd(CONVERT_TO_SHORTPTR(dgd0), dgd_stride, width_ext, - height_ext, Ctl, Dtl, buf_stride); - else - integral_images(dgd0, dgd_stride, width_ext, height_ext, Ctl, Dtl, - buf_stride); - - const sgr_params_type *const params = &sgr_params[sgr_params_idx]; - // Write to flt0 and flt1 - // If params->r == 0 we skip the corresponding filter. We only allow one of - // the radii to be 0, as having both equal to 0 would be equivalent to - // skipping SGR entirely. - assert(!(params->r[0] == 0 && params->r[1] == 0)); - assert(params->r[0] < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); - assert(params->r[1] < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); - - if (params->r[0] > 0) { - calc_ab_fast(A, B, C, D, width, height, buf_stride, bit_depth, - sgr_params_idx, 0); - final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride, - width, height, highbd); - } - - if (params->r[1] > 0) { - calc_ab(A, B, C, D, width, height, buf_stride, bit_depth, sgr_params_idx, - 1); - final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width, - height, highbd); - } - aom_free(buf); - return 0; -} - -void apply_selfguided_restoration_avx2(const uint8_t *dat8, int width, - int height, int stride, int eps, - const int *xqd, uint8_t *dst8, - int dst_stride, int32_t *tmpbuf, - int bit_depth, int highbd) { - int32_t *flt0 = tmpbuf; - int32_t *flt1 = flt0 + RESTORATION_UNITPELS_MAX; - assert(width * height <= RESTORATION_UNITPELS_MAX); - const int ret = av1_selfguided_restoration_avx2( - dat8, width, height, stride, flt0, flt1, width, eps, bit_depth, highbd); - (void)ret; - assert(!ret); - const sgr_params_type *const params = &sgr_params[eps]; - int xq[2]; - decode_xq(xqd, xq, params); - - __m256i xq0 = _mm256_set1_epi32(xq[0]); - __m256i xq1 = _mm256_set1_epi32(xq[1]); - - for (int i = 0; i < height; ++i) { - // Calculate output in batches of 16 pixels - for (int j = 0; j < width; j += 16) { - const int k = i * width + j; - const int m = i * dst_stride + j; - - const uint8_t *dat8ij = dat8 + i * stride + j; - __m256i ep_0, ep_1; - __m128i src_0, src_1; - if (highbd) { - src_0 = xx_loadu_128(CONVERT_TO_SHORTPTR(dat8ij)); - src_1 = xx_loadu_128(CONVERT_TO_SHORTPTR(dat8ij + 8)); - ep_0 = _mm256_cvtepu16_epi32(src_0); - ep_1 = _mm256_cvtepu16_epi32(src_1); - } else { - src_0 = xx_loadu_128(dat8ij); - ep_0 = _mm256_cvtepu8_epi32(src_0); - ep_1 = _mm256_cvtepu8_epi32(_mm_srli_si128(src_0, 8)); - } - - const __m256i u_0 = _mm256_slli_epi32(ep_0, SGRPROJ_RST_BITS); - const __m256i u_1 = _mm256_slli_epi32(ep_1, SGRPROJ_RST_BITS); - - __m256i v_0 = _mm256_slli_epi32(u_0, SGRPROJ_PRJ_BITS); - __m256i v_1 = _mm256_slli_epi32(u_1, SGRPROJ_PRJ_BITS); - - if (params->r[0] > 0) { - const __m256i f1_0 = _mm256_sub_epi32(yy_loadu_256(&flt0[k]), u_0); - v_0 = _mm256_add_epi32(v_0, _mm256_mullo_epi32(xq0, f1_0)); - - const __m256i f1_1 = _mm256_sub_epi32(yy_loadu_256(&flt0[k + 8]), u_1); - v_1 = _mm256_add_epi32(v_1, _mm256_mullo_epi32(xq0, f1_1)); - } - - if (params->r[1] > 0) { - const __m256i f2_0 = _mm256_sub_epi32(yy_loadu_256(&flt1[k]), u_0); - v_0 = _mm256_add_epi32(v_0, _mm256_mullo_epi32(xq1, f2_0)); - - const __m256i f2_1 = _mm256_sub_epi32(yy_loadu_256(&flt1[k + 8]), u_1); - v_1 = _mm256_add_epi32(v_1, _mm256_mullo_epi32(xq1, f2_1)); - } - - const __m256i rounding = - round_for_shift(SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS); - const __m256i w_0 = _mm256_srai_epi32( - _mm256_add_epi32(v_0, rounding), SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS); - const __m256i w_1 = _mm256_srai_epi32( - _mm256_add_epi32(v_1, rounding), SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS); - - if (highbd) { - // Pack into 16 bits and clamp to [0, 2^bit_depth) - // Note that packing into 16 bits messes up the order of the bits, - // so we use a permute function to correct this - const __m256i tmp = _mm256_packus_epi32(w_0, w_1); - const __m256i tmp2 = _mm256_permute4x64_epi64(tmp, 0xd8); - const __m256i max = _mm256_set1_epi16((1 << bit_depth) - 1); - const __m256i res = _mm256_min_epi16(tmp2, max); - yy_storeu_256(CONVERT_TO_SHORTPTR(dst8 + m), res); - } else { - // Pack into 8 bits and clamp to [0, 256) - // Note that each pack messes up the order of the bits, - // so we use a permute function to correct this - const __m256i tmp = _mm256_packs_epi32(w_0, w_1); - const __m256i tmp2 = _mm256_permute4x64_epi64(tmp, 0xd8); - const __m256i res = - _mm256_packus_epi16(tmp2, tmp2 /* "don't care" value */); - const __m128i res2 = - _mm256_castsi256_si128(_mm256_permute4x64_epi64(res, 0xd8)); - xx_storeu_128(dst8 + m, res2); - } - } - } -} diff --git a/third_party/aom/av1/common/x86/selfguided_sse4.c b/third_party/aom/av1/common/x86/selfguided_sse4.c deleted file mode 100644 index ea3f6d942..000000000 --- a/third_party/aom/av1/common/x86/selfguided_sse4.c +++ /dev/null @@ -1,660 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <smmintrin.h> - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" - -#include "av1/common/restoration.h" -#include "aom_dsp/x86/synonyms.h" - -// Load 4 bytes from the possibly-misaligned pointer p, extend each byte to -// 32-bit precision and return them in an SSE register. -static __m128i xx_load_extend_8_32(const void *p) { - return _mm_cvtepu8_epi32(xx_loadl_32(p)); -} - -// Load 4 halfwords from the possibly-misaligned pointer p, extend each -// halfword to 32-bit precision and return them in an SSE register. -static __m128i xx_load_extend_16_32(const void *p) { - return _mm_cvtepu16_epi32(xx_loadl_64(p)); -} - -// Compute the scan of an SSE register holding 4 32-bit integers. If the -// register holds x0..x3 then the scan will hold x0, x0+x1, x0+x1+x2, -// x0+x1+x2+x3 -static __m128i scan_32(__m128i x) { - const __m128i x01 = _mm_add_epi32(x, _mm_slli_si128(x, 4)); - return _mm_add_epi32(x01, _mm_slli_si128(x01, 8)); -} - -// Compute two integral images from src. B sums elements; A sums their -// squares. The images are offset by one pixel, so will have width and height -// equal to width + 1, height + 1 and the first row and column will be zero. -// -// A+1 and B+1 should be aligned to 16 bytes. buf_stride should be a multiple -// of 4. -static void integral_images(const uint8_t *src, int src_stride, int width, - int height, int32_t *A, int32_t *B, - int buf_stride) { - // Write out the zero top row - memset(A, 0, sizeof(*A) * (width + 1)); - memset(B, 0, sizeof(*B) * (width + 1)); - - const __m128i zero = _mm_setzero_si128(); - for (int i = 0; i < height; ++i) { - // Zero the left column. - A[(i + 1) * buf_stride] = B[(i + 1) * buf_stride] = 0; - - // ldiff is the difference H - D where H is the output sample immediately - // to the left and D is the output sample above it. These are scalars, - // replicated across the four lanes. - __m128i ldiff1 = zero, ldiff2 = zero; - for (int j = 0; j < width; j += 4) { - const int ABj = 1 + j; - - const __m128i above1 = xx_load_128(B + ABj + i * buf_stride); - const __m128i above2 = xx_load_128(A + ABj + i * buf_stride); - - const __m128i x1 = xx_load_extend_8_32(src + j + i * src_stride); - const __m128i x2 = _mm_madd_epi16(x1, x1); - - const __m128i sc1 = scan_32(x1); - const __m128i sc2 = scan_32(x2); - - const __m128i row1 = _mm_add_epi32(_mm_add_epi32(sc1, above1), ldiff1); - const __m128i row2 = _mm_add_epi32(_mm_add_epi32(sc2, above2), ldiff2); - - xx_store_128(B + ABj + (i + 1) * buf_stride, row1); - xx_store_128(A + ABj + (i + 1) * buf_stride, row2); - - // Calculate the new H - D. - ldiff1 = _mm_shuffle_epi32(_mm_sub_epi32(row1, above1), 0xff); - ldiff2 = _mm_shuffle_epi32(_mm_sub_epi32(row2, above2), 0xff); - } - } -} - -// Compute two integral images from src. B sums elements; A sums their squares -// -// A and B should be aligned to 16 bytes. buf_stride should be a multiple of 4. -static void integral_images_highbd(const uint16_t *src, int src_stride, - int width, int height, int32_t *A, - int32_t *B, int buf_stride) { - // Write out the zero top row - memset(A, 0, sizeof(*A) * (width + 1)); - memset(B, 0, sizeof(*B) * (width + 1)); - - const __m128i zero = _mm_setzero_si128(); - for (int i = 0; i < height; ++i) { - // Zero the left column. - A[(i + 1) * buf_stride] = B[(i + 1) * buf_stride] = 0; - - // ldiff is the difference H - D where H is the output sample immediately - // to the left and D is the output sample above it. These are scalars, - // replicated across the four lanes. - __m128i ldiff1 = zero, ldiff2 = zero; - for (int j = 0; j < width; j += 4) { - const int ABj = 1 + j; - - const __m128i above1 = xx_load_128(B + ABj + i * buf_stride); - const __m128i above2 = xx_load_128(A + ABj + i * buf_stride); - - const __m128i x1 = xx_load_extend_16_32(src + j + i * src_stride); - const __m128i x2 = _mm_madd_epi16(x1, x1); - - const __m128i sc1 = scan_32(x1); - const __m128i sc2 = scan_32(x2); - - const __m128i row1 = _mm_add_epi32(_mm_add_epi32(sc1, above1), ldiff1); - const __m128i row2 = _mm_add_epi32(_mm_add_epi32(sc2, above2), ldiff2); - - xx_store_128(B + ABj + (i + 1) * buf_stride, row1); - xx_store_128(A + ABj + (i + 1) * buf_stride, row2); - - // Calculate the new H - D. - ldiff1 = _mm_shuffle_epi32(_mm_sub_epi32(row1, above1), 0xff); - ldiff2 = _mm_shuffle_epi32(_mm_sub_epi32(row2, above2), 0xff); - } - } -} - -// Compute 4 values of boxsum from the given integral image. ii should point -// at the middle of the box (for the first value). r is the box radius. -static INLINE __m128i boxsum_from_ii(const int32_t *ii, int stride, int r) { - const __m128i tl = xx_loadu_128(ii - (r + 1) - (r + 1) * stride); - const __m128i tr = xx_loadu_128(ii + (r + 0) - (r + 1) * stride); - const __m128i bl = xx_loadu_128(ii - (r + 1) + r * stride); - const __m128i br = xx_loadu_128(ii + (r + 0) + r * stride); - const __m128i u = _mm_sub_epi32(tr, tl); - const __m128i v = _mm_sub_epi32(br, bl); - return _mm_sub_epi32(v, u); -} - -static __m128i round_for_shift(unsigned shift) { - return _mm_set1_epi32((1 << shift) >> 1); -} - -static __m128i compute_p(__m128i sum1, __m128i sum2, int bit_depth, int n) { - __m128i an, bb; - if (bit_depth > 8) { - const __m128i rounding_a = round_for_shift(2 * (bit_depth - 8)); - const __m128i rounding_b = round_for_shift(bit_depth - 8); - const __m128i shift_a = _mm_cvtsi32_si128(2 * (bit_depth - 8)); - const __m128i shift_b = _mm_cvtsi32_si128(bit_depth - 8); - const __m128i a = _mm_srl_epi32(_mm_add_epi32(sum2, rounding_a), shift_a); - const __m128i b = _mm_srl_epi32(_mm_add_epi32(sum1, rounding_b), shift_b); - // b < 2^14, so we can use a 16-bit madd rather than a 32-bit - // mullo to square it - bb = _mm_madd_epi16(b, b); - an = _mm_max_epi32(_mm_mullo_epi32(a, _mm_set1_epi32(n)), bb); - } else { - bb = _mm_madd_epi16(sum1, sum1); - an = _mm_mullo_epi32(sum2, _mm_set1_epi32(n)); - } - return _mm_sub_epi32(an, bb); -} - -// Assumes that C, D are integral images for the original buffer which has been -// extended to have a padding of SGRPROJ_BORDER_VERT/SGRPROJ_BORDER_HORZ pixels -// on the sides. A, B, C, D point at logical position (0, 0). -static void calc_ab(int32_t *A, int32_t *B, const int32_t *C, const int32_t *D, - int width, int height, int buf_stride, int bit_depth, - int sgr_params_idx, int radius_idx) { - const sgr_params_type *const params = &sgr_params[sgr_params_idx]; - const int r = params->r[radius_idx]; - const int n = (2 * r + 1) * (2 * r + 1); - const __m128i s = _mm_set1_epi32(params->s[radius_idx]); - // one_over_n[n-1] is 2^12/n, so easily fits in an int16 - const __m128i one_over_n = _mm_set1_epi32(one_by_x[n - 1]); - - const __m128i rnd_z = round_for_shift(SGRPROJ_MTABLE_BITS); - const __m128i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS); - - // Set up masks - const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff); - __m128i mask[4]; - for (int idx = 0; idx < 4; idx++) { - const __m128i shift = _mm_cvtsi32_si128(8 * (4 - idx)); - mask[idx] = _mm_cvtepi8_epi32(_mm_srl_epi64(ones32, shift)); - } - - for (int i = -1; i < height + 1; ++i) { - for (int j = -1; j < width + 1; j += 4) { - const int32_t *Cij = C + i * buf_stride + j; - const int32_t *Dij = D + i * buf_stride + j; - - __m128i sum1 = boxsum_from_ii(Dij, buf_stride, r); - __m128i sum2 = boxsum_from_ii(Cij, buf_stride, r); - - // When width + 2 isn't a multiple of 4, sum1 and sum2 will contain - // some uninitialised data in their upper words. We use a mask to - // ensure that these bits are set to 0. - int idx = AOMMIN(4, width + 1 - j); - assert(idx >= 1); - - if (idx < 4) { - sum1 = _mm_and_si128(mask[idx], sum1); - sum2 = _mm_and_si128(mask[idx], sum2); - } - - const __m128i p = compute_p(sum1, sum2, bit_depth, n); - - const __m128i z = _mm_min_epi32( - _mm_srli_epi32(_mm_add_epi32(_mm_mullo_epi32(p, s), rnd_z), - SGRPROJ_MTABLE_BITS), - _mm_set1_epi32(255)); - - // 'Gather' type instructions are not available pre-AVX2, so synthesize a - // gather using scalar loads. - const __m128i a_res = _mm_set_epi32(x_by_xplus1[_mm_extract_epi32(z, 3)], - x_by_xplus1[_mm_extract_epi32(z, 2)], - x_by_xplus1[_mm_extract_epi32(z, 1)], - x_by_xplus1[_mm_extract_epi32(z, 0)]); - - xx_storeu_128(A + i * buf_stride + j, a_res); - - const __m128i a_complement = - _mm_sub_epi32(_mm_set1_epi32(SGRPROJ_SGR), a_res); - - // sum1 might have lanes greater than 2^15, so we can't use madd to do - // multiplication involving sum1. However, a_complement and one_over_n - // are both less than 256, so we can multiply them first. - const __m128i a_comp_over_n = _mm_madd_epi16(a_complement, one_over_n); - const __m128i b_int = _mm_mullo_epi32(a_comp_over_n, sum1); - const __m128i b_res = - _mm_srli_epi32(_mm_add_epi32(b_int, rnd_res), SGRPROJ_RECIP_BITS); - - xx_storeu_128(B + i * buf_stride + j, b_res); - } - } -} - -// Calculate 4 values of the "cross sum" starting at buf. This is a 3x3 filter -// where the outer four corners have weight 3 and all other pixels have weight -// 4. -// -// Pixels are indexed like this: -// xtl xt xtr -// xl x xr -// xbl xb xbr -// -// buf points to x -// -// fours = xl + xt + xr + xb + x -// threes = xtl + xtr + xbr + xbl -// cross_sum = 4 * fours + 3 * threes -// = 4 * (fours + threes) - threes -// = (fours + threes) << 2 - threes -static INLINE __m128i cross_sum(const int32_t *buf, int stride) { - const __m128i xtl = xx_loadu_128(buf - 1 - stride); - const __m128i xt = xx_loadu_128(buf - stride); - const __m128i xtr = xx_loadu_128(buf + 1 - stride); - const __m128i xl = xx_loadu_128(buf - 1); - const __m128i x = xx_loadu_128(buf); - const __m128i xr = xx_loadu_128(buf + 1); - const __m128i xbl = xx_loadu_128(buf - 1 + stride); - const __m128i xb = xx_loadu_128(buf + stride); - const __m128i xbr = xx_loadu_128(buf + 1 + stride); - - const __m128i fours = _mm_add_epi32( - xl, _mm_add_epi32(xt, _mm_add_epi32(xr, _mm_add_epi32(xb, x)))); - const __m128i threes = - _mm_add_epi32(xtl, _mm_add_epi32(xtr, _mm_add_epi32(xbr, xbl))); - - return _mm_sub_epi32(_mm_slli_epi32(_mm_add_epi32(fours, threes), 2), threes); -} - -// The final filter for self-guided restoration. Computes a weighted average -// across A, B with "cross sums" (see cross_sum implementation above). -static void final_filter(int32_t *dst, int dst_stride, const int32_t *A, - const int32_t *B, int buf_stride, const void *dgd8, - int dgd_stride, int width, int height, int highbd) { - const int nb = 5; - const __m128i rounding = - round_for_shift(SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); - const uint8_t *dgd_real = - highbd ? (const uint8_t *)CONVERT_TO_SHORTPTR(dgd8) : dgd8; - - for (int i = 0; i < height; ++i) { - for (int j = 0; j < width; j += 4) { - const __m128i a = cross_sum(A + i * buf_stride + j, buf_stride); - const __m128i b = cross_sum(B + i * buf_stride + j, buf_stride); - const __m128i raw = - xx_loadl_64(dgd_real + ((i * dgd_stride + j) << highbd)); - const __m128i src = - highbd ? _mm_cvtepu16_epi32(raw) : _mm_cvtepu8_epi32(raw); - - __m128i v = _mm_add_epi32(_mm_madd_epi16(a, src), b); - __m128i w = _mm_srai_epi32(_mm_add_epi32(v, rounding), - SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); - - xx_storeu_128(dst + i * dst_stride + j, w); - } - } -} - -// Assumes that C, D are integral images for the original buffer which has been -// extended to have a padding of SGRPROJ_BORDER_VERT/SGRPROJ_BORDER_HORZ pixels -// on the sides. A, B, C, D point at logical position (0, 0). -static void calc_ab_fast(int32_t *A, int32_t *B, const int32_t *C, - const int32_t *D, int width, int height, - int buf_stride, int bit_depth, int sgr_params_idx, - int radius_idx) { - const sgr_params_type *const params = &sgr_params[sgr_params_idx]; - const int r = params->r[radius_idx]; - const int n = (2 * r + 1) * (2 * r + 1); - const __m128i s = _mm_set1_epi32(params->s[radius_idx]); - // one_over_n[n-1] is 2^12/n, so easily fits in an int16 - const __m128i one_over_n = _mm_set1_epi32(one_by_x[n - 1]); - - const __m128i rnd_z = round_for_shift(SGRPROJ_MTABLE_BITS); - const __m128i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS); - - // Set up masks - const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff); - __m128i mask[4]; - for (int idx = 0; idx < 4; idx++) { - const __m128i shift = _mm_cvtsi32_si128(8 * (4 - idx)); - mask[idx] = _mm_cvtepi8_epi32(_mm_srl_epi64(ones32, shift)); - } - - for (int i = -1; i < height + 1; i += 2) { - for (int j = -1; j < width + 1; j += 4) { - const int32_t *Cij = C + i * buf_stride + j; - const int32_t *Dij = D + i * buf_stride + j; - - __m128i sum1 = boxsum_from_ii(Dij, buf_stride, r); - __m128i sum2 = boxsum_from_ii(Cij, buf_stride, r); - - // When width + 2 isn't a multiple of 4, sum1 and sum2 will contain - // some uninitialised data in their upper words. We use a mask to - // ensure that these bits are set to 0. - int idx = AOMMIN(4, width + 1 - j); - assert(idx >= 1); - - if (idx < 4) { - sum1 = _mm_and_si128(mask[idx], sum1); - sum2 = _mm_and_si128(mask[idx], sum2); - } - - const __m128i p = compute_p(sum1, sum2, bit_depth, n); - - const __m128i z = _mm_min_epi32( - _mm_srli_epi32(_mm_add_epi32(_mm_mullo_epi32(p, s), rnd_z), - SGRPROJ_MTABLE_BITS), - _mm_set1_epi32(255)); - - // 'Gather' type instructions are not available pre-AVX2, so synthesize a - // gather using scalar loads. - const __m128i a_res = _mm_set_epi32(x_by_xplus1[_mm_extract_epi32(z, 3)], - x_by_xplus1[_mm_extract_epi32(z, 2)], - x_by_xplus1[_mm_extract_epi32(z, 1)], - x_by_xplus1[_mm_extract_epi32(z, 0)]); - - xx_storeu_128(A + i * buf_stride + j, a_res); - - const __m128i a_complement = - _mm_sub_epi32(_mm_set1_epi32(SGRPROJ_SGR), a_res); - - // sum1 might have lanes greater than 2^15, so we can't use madd to do - // multiplication involving sum1. However, a_complement and one_over_n - // are both less than 256, so we can multiply them first. - const __m128i a_comp_over_n = _mm_madd_epi16(a_complement, one_over_n); - const __m128i b_int = _mm_mullo_epi32(a_comp_over_n, sum1); - const __m128i b_res = - _mm_srli_epi32(_mm_add_epi32(b_int, rnd_res), SGRPROJ_RECIP_BITS); - - xx_storeu_128(B + i * buf_stride + j, b_res); - } - } -} - -// Calculate 4 values of the "cross sum" starting at buf. -// -// Pixels are indexed like this: -// xtl xt xtr -// - buf - -// xbl xb xbr -// -// Pixels are weighted like this: -// 5 6 5 -// 0 0 0 -// 5 6 5 -// -// fives = xtl + xtr + xbl + xbr -// sixes = xt + xb -// cross_sum = 6 * sixes + 5 * fives -// = 5 * (fives + sixes) - sixes -// = (fives + sixes) << 2 + (fives + sixes) + sixes -static INLINE __m128i cross_sum_fast_even_row(const int32_t *buf, int stride) { - const __m128i xtl = xx_loadu_128(buf - 1 - stride); - const __m128i xt = xx_loadu_128(buf - stride); - const __m128i xtr = xx_loadu_128(buf + 1 - stride); - const __m128i xbl = xx_loadu_128(buf - 1 + stride); - const __m128i xb = xx_loadu_128(buf + stride); - const __m128i xbr = xx_loadu_128(buf + 1 + stride); - - const __m128i fives = - _mm_add_epi32(xtl, _mm_add_epi32(xtr, _mm_add_epi32(xbr, xbl))); - const __m128i sixes = _mm_add_epi32(xt, xb); - const __m128i fives_plus_sixes = _mm_add_epi32(fives, sixes); - - return _mm_add_epi32( - _mm_add_epi32(_mm_slli_epi32(fives_plus_sixes, 2), fives_plus_sixes), - sixes); -} - -// Calculate 4 values of the "cross sum" starting at buf. -// -// Pixels are indexed like this: -// xl x xr -// -// Pixels are weighted like this: -// 5 6 5 -// -// buf points to x -// -// fives = xl + xr -// sixes = x -// cross_sum = 5 * fives + 6 * sixes -// = 4 * (fives + sixes) + (fives + sixes) + sixes -// = (fives + sixes) << 2 + (fives + sixes) + sixes -static INLINE __m128i cross_sum_fast_odd_row(const int32_t *buf) { - const __m128i xl = xx_loadu_128(buf - 1); - const __m128i x = xx_loadu_128(buf); - const __m128i xr = xx_loadu_128(buf + 1); - - const __m128i fives = _mm_add_epi32(xl, xr); - const __m128i sixes = x; - - const __m128i fives_plus_sixes = _mm_add_epi32(fives, sixes); - - return _mm_add_epi32( - _mm_add_epi32(_mm_slli_epi32(fives_plus_sixes, 2), fives_plus_sixes), - sixes); -} - -// The final filter for the self-guided restoration. Computes a -// weighted average across A, B with "cross sums" (see cross_sum_... -// implementations above). -static void final_filter_fast(int32_t *dst, int dst_stride, const int32_t *A, - const int32_t *B, int buf_stride, - const void *dgd8, int dgd_stride, int width, - int height, int highbd) { - const int nb0 = 5; - const int nb1 = 4; - - const __m128i rounding0 = - round_for_shift(SGRPROJ_SGR_BITS + nb0 - SGRPROJ_RST_BITS); - const __m128i rounding1 = - round_for_shift(SGRPROJ_SGR_BITS + nb1 - SGRPROJ_RST_BITS); - - const uint8_t *dgd_real = - highbd ? (const uint8_t *)CONVERT_TO_SHORTPTR(dgd8) : dgd8; - - for (int i = 0; i < height; ++i) { - if (!(i & 1)) { // even row - for (int j = 0; j < width; j += 4) { - const __m128i a = - cross_sum_fast_even_row(A + i * buf_stride + j, buf_stride); - const __m128i b = - cross_sum_fast_even_row(B + i * buf_stride + j, buf_stride); - const __m128i raw = - xx_loadl_64(dgd_real + ((i * dgd_stride + j) << highbd)); - const __m128i src = - highbd ? _mm_cvtepu16_epi32(raw) : _mm_cvtepu8_epi32(raw); - - __m128i v = _mm_add_epi32(_mm_madd_epi16(a, src), b); - __m128i w = _mm_srai_epi32(_mm_add_epi32(v, rounding0), - SGRPROJ_SGR_BITS + nb0 - SGRPROJ_RST_BITS); - - xx_storeu_128(dst + i * dst_stride + j, w); - } - } else { // odd row - for (int j = 0; j < width; j += 4) { - const __m128i a = cross_sum_fast_odd_row(A + i * buf_stride + j); - const __m128i b = cross_sum_fast_odd_row(B + i * buf_stride + j); - const __m128i raw = - xx_loadl_64(dgd_real + ((i * dgd_stride + j) << highbd)); - const __m128i src = - highbd ? _mm_cvtepu16_epi32(raw) : _mm_cvtepu8_epi32(raw); - - __m128i v = _mm_add_epi32(_mm_madd_epi16(a, src), b); - __m128i w = _mm_srai_epi32(_mm_add_epi32(v, rounding1), - SGRPROJ_SGR_BITS + nb1 - SGRPROJ_RST_BITS); - - xx_storeu_128(dst + i * dst_stride + j, w); - } - } - } -} - -int av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width, - int height, int dgd_stride, int32_t *flt0, - int32_t *flt1, int flt_stride, - int sgr_params_idx, int bit_depth, - int highbd) { - int32_t *buf = (int32_t *)aom_memalign( - 16, 4 * sizeof(*buf) * RESTORATION_PROC_UNIT_PELS); - if (!buf) return -1; - memset(buf, 0, 4 * sizeof(*buf) * RESTORATION_PROC_UNIT_PELS); - - const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ; - const int height_ext = height + 2 * SGRPROJ_BORDER_VERT; - - // Adjusting the stride of A and B here appears to avoid bad cache effects, - // leading to a significant speed improvement. - // We also align the stride to a multiple of 16 bytes for efficiency. - int buf_stride = ((width_ext + 3) & ~3) + 16; - - // The "tl" pointers point at the top-left of the initialised data for the - // array. Adding 3 here ensures that column 1 is 16-byte aligned. - int32_t *Atl = buf + 0 * RESTORATION_PROC_UNIT_PELS + 3; - int32_t *Btl = buf + 1 * RESTORATION_PROC_UNIT_PELS + 3; - int32_t *Ctl = buf + 2 * RESTORATION_PROC_UNIT_PELS + 3; - int32_t *Dtl = buf + 3 * RESTORATION_PROC_UNIT_PELS + 3; - - // The "0" pointers are (- SGRPROJ_BORDER_VERT, -SGRPROJ_BORDER_HORZ). Note - // there's a zero row and column in A, B (integral images), so we move down - // and right one for them. - const int buf_diag_border = - SGRPROJ_BORDER_HORZ + buf_stride * SGRPROJ_BORDER_VERT; - - int32_t *A0 = Atl + 1 + buf_stride; - int32_t *B0 = Btl + 1 + buf_stride; - int32_t *C0 = Ctl + 1 + buf_stride; - int32_t *D0 = Dtl + 1 + buf_stride; - - // Finally, A, B, C, D point at position (0, 0). - int32_t *A = A0 + buf_diag_border; - int32_t *B = B0 + buf_diag_border; - int32_t *C = C0 + buf_diag_border; - int32_t *D = D0 + buf_diag_border; - - const int dgd_diag_border = - SGRPROJ_BORDER_HORZ + dgd_stride * SGRPROJ_BORDER_VERT; - const uint8_t *dgd0 = dgd8 - dgd_diag_border; - - // Generate integral images from the input. C will contain sums of squares; D - // will contain just sums - if (highbd) - integral_images_highbd(CONVERT_TO_SHORTPTR(dgd0), dgd_stride, width_ext, - height_ext, Ctl, Dtl, buf_stride); - else - integral_images(dgd0, dgd_stride, width_ext, height_ext, Ctl, Dtl, - buf_stride); - - const sgr_params_type *const params = &sgr_params[sgr_params_idx]; - // Write to flt0 and flt1 - // If params->r == 0 we skip the corresponding filter. We only allow one of - // the radii to be 0, as having both equal to 0 would be equivalent to - // skipping SGR entirely. - assert(!(params->r[0] == 0 && params->r[1] == 0)); - assert(params->r[0] < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); - assert(params->r[1] < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); - - if (params->r[0] > 0) { - calc_ab_fast(A, B, C, D, width, height, buf_stride, bit_depth, - sgr_params_idx, 0); - final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride, - width, height, highbd); - } - - if (params->r[1] > 0) { - calc_ab(A, B, C, D, width, height, buf_stride, bit_depth, sgr_params_idx, - 1); - final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width, - height, highbd); - } - aom_free(buf); - return 0; -} - -void apply_selfguided_restoration_sse4_1(const uint8_t *dat8, int width, - int height, int stride, int eps, - const int *xqd, uint8_t *dst8, - int dst_stride, int32_t *tmpbuf, - int bit_depth, int highbd) { - int32_t *flt0 = tmpbuf; - int32_t *flt1 = flt0 + RESTORATION_UNITPELS_MAX; - assert(width * height <= RESTORATION_UNITPELS_MAX); - const int ret = av1_selfguided_restoration_sse4_1( - dat8, width, height, stride, flt0, flt1, width, eps, bit_depth, highbd); - (void)ret; - assert(!ret); - const sgr_params_type *const params = &sgr_params[eps]; - int xq[2]; - decode_xq(xqd, xq, params); - - __m128i xq0 = _mm_set1_epi32(xq[0]); - __m128i xq1 = _mm_set1_epi32(xq[1]); - - for (int i = 0; i < height; ++i) { - // Calculate output in batches of 8 pixels - for (int j = 0; j < width; j += 8) { - const int k = i * width + j; - const int m = i * dst_stride + j; - - const uint8_t *dat8ij = dat8 + i * stride + j; - __m128i src; - if (highbd) { - src = xx_loadu_128(CONVERT_TO_SHORTPTR(dat8ij)); - } else { - src = _mm_cvtepu8_epi16(xx_loadl_64(dat8ij)); - } - - const __m128i u = _mm_slli_epi16(src, SGRPROJ_RST_BITS); - const __m128i u_0 = _mm_cvtepu16_epi32(u); - const __m128i u_1 = _mm_cvtepu16_epi32(_mm_srli_si128(u, 8)); - - __m128i v_0 = _mm_slli_epi32(u_0, SGRPROJ_PRJ_BITS); - __m128i v_1 = _mm_slli_epi32(u_1, SGRPROJ_PRJ_BITS); - - if (params->r[0] > 0) { - const __m128i f1_0 = _mm_sub_epi32(xx_loadu_128(&flt0[k]), u_0); - v_0 = _mm_add_epi32(v_0, _mm_mullo_epi32(xq0, f1_0)); - - const __m128i f1_1 = _mm_sub_epi32(xx_loadu_128(&flt0[k + 4]), u_1); - v_1 = _mm_add_epi32(v_1, _mm_mullo_epi32(xq0, f1_1)); - } - - if (params->r[1] > 0) { - const __m128i f2_0 = _mm_sub_epi32(xx_loadu_128(&flt1[k]), u_0); - v_0 = _mm_add_epi32(v_0, _mm_mullo_epi32(xq1, f2_0)); - - const __m128i f2_1 = _mm_sub_epi32(xx_loadu_128(&flt1[k + 4]), u_1); - v_1 = _mm_add_epi32(v_1, _mm_mullo_epi32(xq1, f2_1)); - } - - const __m128i rounding = - round_for_shift(SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS); - const __m128i w_0 = _mm_srai_epi32(_mm_add_epi32(v_0, rounding), - SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS); - const __m128i w_1 = _mm_srai_epi32(_mm_add_epi32(v_1, rounding), - SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS); - - if (highbd) { - // Pack into 16 bits and clamp to [0, 2^bit_depth) - const __m128i tmp = _mm_packus_epi32(w_0, w_1); - const __m128i max = _mm_set1_epi16((1 << bit_depth) - 1); - const __m128i res = _mm_min_epi16(tmp, max); - xx_storeu_128(CONVERT_TO_SHORTPTR(dst8 + m), res); - } else { - // Pack into 8 bits and clamp to [0, 256) - const __m128i tmp = _mm_packs_epi32(w_0, w_1); - const __m128i res = _mm_packus_epi16(tmp, tmp /* "don't care" value */); - xx_storel_64(dst8 + m, res); - } - } - } -} diff --git a/third_party/aom/av1/common/x86/warp_plane_sse4.c b/third_party/aom/av1/common/x86/warp_plane_sse4.c deleted file mode 100644 index b810cea2e..000000000 --- a/third_party/aom/av1/common/x86/warp_plane_sse4.c +++ /dev/null @@ -1,942 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <emmintrin.h> -#include <smmintrin.h> - -#include "config/av1_rtcd.h" - -#include "av1/common/warped_motion.h" - -/* This is a modified version of 'warped_filter' from warped_motion.c: - * Each coefficient is stored in 8 bits instead of 16 bits - * The coefficients are rearranged in the column order 0, 2, 4, 6, 1, 3, 5, 7 - - This is done in order to avoid overflow: Since the tap with the largest - coefficient could be any of taps 2, 3, 4 or 5, we can't use the summation - order ((0 + 1) + (4 + 5)) + ((2 + 3) + (6 + 7)) used in the regular - convolve functions. - - Instead, we use the summation order - ((0 + 2) + (4 + 6)) + ((1 + 3) + (5 + 7)). - The rearrangement of coefficients in this table is so that we can get the - coefficients into the correct order more quickly. -*/ -/* clang-format off */ -DECLARE_ALIGNED(8, static const int8_t, - filter_8bit[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8]) = { -#if WARPEDPIXEL_PREC_BITS == 6 - // [-1, 0) - { 0, 127, 0, 0, 0, 1, 0, 0}, { 0, 127, 0, 0, -1, 2, 0, 0}, - { 1, 127, -1, 0, -3, 4, 0, 0}, { 1, 126, -2, 0, -4, 6, 1, 0}, - { 1, 126, -3, 0, -5, 8, 1, 0}, { 1, 125, -4, 0, -6, 11, 1, 0}, - { 1, 124, -4, 0, -7, 13, 1, 0}, { 2, 123, -5, 0, -8, 15, 1, 0}, - { 2, 122, -6, 0, -9, 18, 1, 0}, { 2, 121, -6, 0, -10, 20, 1, 0}, - { 2, 120, -7, 0, -11, 22, 2, 0}, { 2, 119, -8, 0, -12, 25, 2, 0}, - { 3, 117, -8, 0, -13, 27, 2, 0}, { 3, 116, -9, 0, -13, 29, 2, 0}, - { 3, 114, -10, 0, -14, 32, 3, 0}, { 3, 113, -10, 0, -15, 35, 2, 0}, - { 3, 111, -11, 0, -15, 37, 3, 0}, { 3, 109, -11, 0, -16, 40, 3, 0}, - { 3, 108, -12, 0, -16, 42, 3, 0}, { 4, 106, -13, 0, -17, 45, 3, 0}, - { 4, 104, -13, 0, -17, 47, 3, 0}, { 4, 102, -14, 0, -17, 50, 3, 0}, - { 4, 100, -14, 0, -17, 52, 3, 0}, { 4, 98, -15, 0, -18, 55, 4, 0}, - { 4, 96, -15, 0, -18, 58, 3, 0}, { 4, 94, -16, 0, -18, 60, 4, 0}, - { 4, 91, -16, 0, -18, 63, 4, 0}, { 4, 89, -16, 0, -18, 65, 4, 0}, - { 4, 87, -17, 0, -18, 68, 4, 0}, { 4, 85, -17, 0, -18, 70, 4, 0}, - { 4, 82, -17, 0, -18, 73, 4, 0}, { 4, 80, -17, 0, -18, 75, 4, 0}, - { 4, 78, -18, 0, -18, 78, 4, 0}, { 4, 75, -18, 0, -17, 80, 4, 0}, - { 4, 73, -18, 0, -17, 82, 4, 0}, { 4, 70, -18, 0, -17, 85, 4, 0}, - { 4, 68, -18, 0, -17, 87, 4, 0}, { 4, 65, -18, 0, -16, 89, 4, 0}, - { 4, 63, -18, 0, -16, 91, 4, 0}, { 4, 60, -18, 0, -16, 94, 4, 0}, - { 3, 58, -18, 0, -15, 96, 4, 0}, { 4, 55, -18, 0, -15, 98, 4, 0}, - { 3, 52, -17, 0, -14, 100, 4, 0}, { 3, 50, -17, 0, -14, 102, 4, 0}, - { 3, 47, -17, 0, -13, 104, 4, 0}, { 3, 45, -17, 0, -13, 106, 4, 0}, - { 3, 42, -16, 0, -12, 108, 3, 0}, { 3, 40, -16, 0, -11, 109, 3, 0}, - { 3, 37, -15, 0, -11, 111, 3, 0}, { 2, 35, -15, 0, -10, 113, 3, 0}, - { 3, 32, -14, 0, -10, 114, 3, 0}, { 2, 29, -13, 0, -9, 116, 3, 0}, - { 2, 27, -13, 0, -8, 117, 3, 0}, { 2, 25, -12, 0, -8, 119, 2, 0}, - { 2, 22, -11, 0, -7, 120, 2, 0}, { 1, 20, -10, 0, -6, 121, 2, 0}, - { 1, 18, -9, 0, -6, 122, 2, 0}, { 1, 15, -8, 0, -5, 123, 2, 0}, - { 1, 13, -7, 0, -4, 124, 1, 0}, { 1, 11, -6, 0, -4, 125, 1, 0}, - { 1, 8, -5, 0, -3, 126, 1, 0}, { 1, 6, -4, 0, -2, 126, 1, 0}, - { 0, 4, -3, 0, -1, 127, 1, 0}, { 0, 2, -1, 0, 0, 127, 0, 0}, - // [0, 1) - { 0, 0, 1, 0, 0, 127, 0, 0}, { 0, -1, 2, 0, 0, 127, 0, 0}, - { 0, -3, 4, 1, 1, 127, -2, 0}, { 0, -5, 6, 1, 1, 127, -2, 0}, - { 0, -6, 8, 1, 2, 126, -3, 0}, {-1, -7, 11, 2, 2, 126, -4, -1}, - {-1, -8, 13, 2, 3, 125, -5, -1}, {-1, -10, 16, 3, 3, 124, -6, -1}, - {-1, -11, 18, 3, 4, 123, -7, -1}, {-1, -12, 20, 3, 4, 122, -7, -1}, - {-1, -13, 23, 3, 4, 121, -8, -1}, {-2, -14, 25, 4, 5, 120, -9, -1}, - {-1, -15, 27, 4, 5, 119, -10, -1}, {-1, -16, 30, 4, 5, 118, -11, -1}, - {-2, -17, 33, 5, 6, 116, -12, -1}, {-2, -17, 35, 5, 6, 114, -12, -1}, - {-2, -18, 38, 5, 6, 113, -13, -1}, {-2, -19, 41, 6, 7, 111, -14, -2}, - {-2, -19, 43, 6, 7, 110, -15, -2}, {-2, -20, 46, 6, 7, 108, -15, -2}, - {-2, -20, 49, 6, 7, 106, -16, -2}, {-2, -21, 51, 7, 7, 104, -16, -2}, - {-2, -21, 54, 7, 7, 102, -17, -2}, {-2, -21, 56, 7, 8, 100, -18, -2}, - {-2, -22, 59, 7, 8, 98, -18, -2}, {-2, -22, 62, 7, 8, 96, -19, -2}, - {-2, -22, 64, 7, 8, 94, -19, -2}, {-2, -22, 67, 8, 8, 91, -20, -2}, - {-2, -22, 69, 8, 8, 89, -20, -2}, {-2, -22, 72, 8, 8, 87, -21, -2}, - {-2, -21, 74, 8, 8, 84, -21, -2}, {-2, -22, 77, 8, 8, 82, -21, -2}, - {-2, -21, 79, 8, 8, 79, -21, -2}, {-2, -21, 82, 8, 8, 77, -22, -2}, - {-2, -21, 84, 8, 8, 74, -21, -2}, {-2, -21, 87, 8, 8, 72, -22, -2}, - {-2, -20, 89, 8, 8, 69, -22, -2}, {-2, -20, 91, 8, 8, 67, -22, -2}, - {-2, -19, 94, 8, 7, 64, -22, -2}, {-2, -19, 96, 8, 7, 62, -22, -2}, - {-2, -18, 98, 8, 7, 59, -22, -2}, {-2, -18, 100, 8, 7, 56, -21, -2}, - {-2, -17, 102, 7, 7, 54, -21, -2}, {-2, -16, 104, 7, 7, 51, -21, -2}, - {-2, -16, 106, 7, 6, 49, -20, -2}, {-2, -15, 108, 7, 6, 46, -20, -2}, - {-2, -15, 110, 7, 6, 43, -19, -2}, {-2, -14, 111, 7, 6, 41, -19, -2}, - {-1, -13, 113, 6, 5, 38, -18, -2}, {-1, -12, 114, 6, 5, 35, -17, -2}, - {-1, -12, 116, 6, 5, 33, -17, -2}, {-1, -11, 118, 5, 4, 30, -16, -1}, - {-1, -10, 119, 5, 4, 27, -15, -1}, {-1, -9, 120, 5, 4, 25, -14, -2}, - {-1, -8, 121, 4, 3, 23, -13, -1}, {-1, -7, 122, 4, 3, 20, -12, -1}, - {-1, -7, 123, 4, 3, 18, -11, -1}, {-1, -6, 124, 3, 3, 16, -10, -1}, - {-1, -5, 125, 3, 2, 13, -8, -1}, {-1, -4, 126, 2, 2, 11, -7, -1}, - { 0, -3, 126, 2, 1, 8, -6, 0}, { 0, -2, 127, 1, 1, 6, -5, 0}, - { 0, -2, 127, 1, 1, 4, -3, 0}, { 0, 0, 127, 0, 0, 2, -1, 0}, - // [1, 2) - { 0, 0, 127, 0, 0, 1, 0, 0}, { 0, 0, 127, 0, 0, -1, 2, 0}, - { 0, 1, 127, -1, 0, -3, 4, 0}, { 0, 1, 126, -2, 0, -4, 6, 1}, - { 0, 1, 126, -3, 0, -5, 8, 1}, { 0, 1, 125, -4, 0, -6, 11, 1}, - { 0, 1, 124, -4, 0, -7, 13, 1}, { 0, 2, 123, -5, 0, -8, 15, 1}, - { 0, 2, 122, -6, 0, -9, 18, 1}, { 0, 2, 121, -6, 0, -10, 20, 1}, - { 0, 2, 120, -7, 0, -11, 22, 2}, { 0, 2, 119, -8, 0, -12, 25, 2}, - { 0, 3, 117, -8, 0, -13, 27, 2}, { 0, 3, 116, -9, 0, -13, 29, 2}, - { 0, 3, 114, -10, 0, -14, 32, 3}, { 0, 3, 113, -10, 0, -15, 35, 2}, - { 0, 3, 111, -11, 0, -15, 37, 3}, { 0, 3, 109, -11, 0, -16, 40, 3}, - { 0, 3, 108, -12, 0, -16, 42, 3}, { 0, 4, 106, -13, 0, -17, 45, 3}, - { 0, 4, 104, -13, 0, -17, 47, 3}, { 0, 4, 102, -14, 0, -17, 50, 3}, - { 0, 4, 100, -14, 0, -17, 52, 3}, { 0, 4, 98, -15, 0, -18, 55, 4}, - { 0, 4, 96, -15, 0, -18, 58, 3}, { 0, 4, 94, -16, 0, -18, 60, 4}, - { 0, 4, 91, -16, 0, -18, 63, 4}, { 0, 4, 89, -16, 0, -18, 65, 4}, - { 0, 4, 87, -17, 0, -18, 68, 4}, { 0, 4, 85, -17, 0, -18, 70, 4}, - { 0, 4, 82, -17, 0, -18, 73, 4}, { 0, 4, 80, -17, 0, -18, 75, 4}, - { 0, 4, 78, -18, 0, -18, 78, 4}, { 0, 4, 75, -18, 0, -17, 80, 4}, - { 0, 4, 73, -18, 0, -17, 82, 4}, { 0, 4, 70, -18, 0, -17, 85, 4}, - { 0, 4, 68, -18, 0, -17, 87, 4}, { 0, 4, 65, -18, 0, -16, 89, 4}, - { 0, 4, 63, -18, 0, -16, 91, 4}, { 0, 4, 60, -18, 0, -16, 94, 4}, - { 0, 3, 58, -18, 0, -15, 96, 4}, { 0, 4, 55, -18, 0, -15, 98, 4}, - { 0, 3, 52, -17, 0, -14, 100, 4}, { 0, 3, 50, -17, 0, -14, 102, 4}, - { 0, 3, 47, -17, 0, -13, 104, 4}, { 0, 3, 45, -17, 0, -13, 106, 4}, - { 0, 3, 42, -16, 0, -12, 108, 3}, { 0, 3, 40, -16, 0, -11, 109, 3}, - { 0, 3, 37, -15, 0, -11, 111, 3}, { 0, 2, 35, -15, 0, -10, 113, 3}, - { 0, 3, 32, -14, 0, -10, 114, 3}, { 0, 2, 29, -13, 0, -9, 116, 3}, - { 0, 2, 27, -13, 0, -8, 117, 3}, { 0, 2, 25, -12, 0, -8, 119, 2}, - { 0, 2, 22, -11, 0, -7, 120, 2}, { 0, 1, 20, -10, 0, -6, 121, 2}, - { 0, 1, 18, -9, 0, -6, 122, 2}, { 0, 1, 15, -8, 0, -5, 123, 2}, - { 0, 1, 13, -7, 0, -4, 124, 1}, { 0, 1, 11, -6, 0, -4, 125, 1}, - { 0, 1, 8, -5, 0, -3, 126, 1}, { 0, 1, 6, -4, 0, -2, 126, 1}, - { 0, 0, 4, -3, 0, -1, 127, 1}, { 0, 0, 2, -1, 0, 0, 127, 0}, - // dummy (replicate row index 191) - { 0, 0, 2, -1, 0, 0, 127, 0}, - -#else - // [-1, 0) - { 0, 127, 0, 0, 0, 1, 0, 0}, { 1, 127, -1, 0, -3, 4, 0, 0}, - { 1, 126, -3, 0, -5, 8, 1, 0}, { 1, 124, -4, 0, -7, 13, 1, 0}, - { 2, 122, -6, 0, -9, 18, 1, 0}, { 2, 120, -7, 0, -11, 22, 2, 0}, - { 3, 117, -8, 0, -13, 27, 2, 0}, { 3, 114, -10, 0, -14, 32, 3, 0}, - { 3, 111, -11, 0, -15, 37, 3, 0}, { 3, 108, -12, 0, -16, 42, 3, 0}, - { 4, 104, -13, 0, -17, 47, 3, 0}, { 4, 100, -14, 0, -17, 52, 3, 0}, - { 4, 96, -15, 0, -18, 58, 3, 0}, { 4, 91, -16, 0, -18, 63, 4, 0}, - { 4, 87, -17, 0, -18, 68, 4, 0}, { 4, 82, -17, 0, -18, 73, 4, 0}, - { 4, 78, -18, 0, -18, 78, 4, 0}, { 4, 73, -18, 0, -17, 82, 4, 0}, - { 4, 68, -18, 0, -17, 87, 4, 0}, { 4, 63, -18, 0, -16, 91, 4, 0}, - { 3, 58, -18, 0, -15, 96, 4, 0}, { 3, 52, -17, 0, -14, 100, 4, 0}, - { 3, 47, -17, 0, -13, 104, 4, 0}, { 3, 42, -16, 0, -12, 108, 3, 0}, - { 3, 37, -15, 0, -11, 111, 3, 0}, { 3, 32, -14, 0, -10, 114, 3, 0}, - { 2, 27, -13, 0, -8, 117, 3, 0}, { 2, 22, -11, 0, -7, 120, 2, 0}, - { 1, 18, -9, 0, -6, 122, 2, 0}, { 1, 13, -7, 0, -4, 124, 1, 0}, - { 1, 8, -5, 0, -3, 126, 1, 0}, { 0, 4, -3, 0, -1, 127, 1, 0}, - // [0, 1) - { 0, 0, 1, 0, 0, 127, 0, 0}, { 0, -3, 4, 1, 1, 127, -2, 0}, - { 0, -6, 8, 1, 2, 126, -3, 0}, {-1, -8, 13, 2, 3, 125, -5, -1}, - {-1, -11, 18, 3, 4, 123, -7, -1}, {-1, -13, 23, 3, 4, 121, -8, -1}, - {-1, -15, 27, 4, 5, 119, -10, -1}, {-2, -17, 33, 5, 6, 116, -12, -1}, - {-2, -18, 38, 5, 6, 113, -13, -1}, {-2, -19, 43, 6, 7, 110, -15, -2}, - {-2, -20, 49, 6, 7, 106, -16, -2}, {-2, -21, 54, 7, 7, 102, -17, -2}, - {-2, -22, 59, 7, 8, 98, -18, -2}, {-2, -22, 64, 7, 8, 94, -19, -2}, - {-2, -22, 69, 8, 8, 89, -20, -2}, {-2, -21, 74, 8, 8, 84, -21, -2}, - {-2, -21, 79, 8, 8, 79, -21, -2}, {-2, -21, 84, 8, 8, 74, -21, -2}, - {-2, -20, 89, 8, 8, 69, -22, -2}, {-2, -19, 94, 8, 7, 64, -22, -2}, - {-2, -18, 98, 8, 7, 59, -22, -2}, {-2, -17, 102, 7, 7, 54, -21, -2}, - {-2, -16, 106, 7, 6, 49, -20, -2}, {-2, -15, 110, 7, 6, 43, -19, -2}, - {-1, -13, 113, 6, 5, 38, -18, -2}, {-1, -12, 116, 6, 5, 33, -17, -2}, - {-1, -10, 119, 5, 4, 27, -15, -1}, {-1, -8, 121, 4, 3, 23, -13, -1}, - {-1, -7, 123, 4, 3, 18, -11, -1}, {-1, -5, 125, 3, 2, 13, -8, -1}, - { 0, -3, 126, 2, 1, 8, -6, 0}, { 0, -2, 127, 1, 1, 4, -3, 0}, - // [1, 2) - { 0, 0, 127, 0, 0, 1, 0, 0}, { 0, 1, 127, -1, 0, -3, 4, 0}, - { 0, 1, 126, -3, 0, -5, 8, 1}, { 0, 1, 124, -4, 0, -7, 13, 1}, - { 0, 2, 122, -6, 0, -9, 18, 1}, { 0, 2, 120, -7, 0, -11, 22, 2}, - { 0, 3, 117, -8, 0, -13, 27, 2}, { 0, 3, 114, -10, 0, -14, 32, 3}, - { 0, 3, 111, -11, 0, -15, 37, 3}, { 0, 3, 108, -12, 0, -16, 42, 3}, - { 0, 4, 104, -13, 0, -17, 47, 3}, { 0, 4, 100, -14, 0, -17, 52, 3}, - { 0, 4, 96, -15, 0, -18, 58, 3}, { 0, 4, 91, -16, 0, -18, 63, 4}, - { 0, 4, 87, -17, 0, -18, 68, 4}, { 0, 4, 82, -17, 0, -18, 73, 4}, - { 0, 4, 78, -18, 0, -18, 78, 4}, { 0, 4, 73, -18, 0, -17, 82, 4}, - { 0, 4, 68, -18, 0, -17, 87, 4}, { 0, 4, 63, -18, 0, -16, 91, 4}, - { 0, 3, 58, -18, 0, -15, 96, 4}, { 0, 3, 52, -17, 0, -14, 100, 4}, - { 0, 3, 47, -17, 0, -13, 104, 4}, { 0, 3, 42, -16, 0, -12, 108, 3}, - { 0, 3, 37, -15, 0, -11, 111, 3}, { 0, 3, 32, -14, 0, -10, 114, 3}, - { 0, 2, 27, -13, 0, -8, 117, 3}, { 0, 2, 22, -11, 0, -7, 120, 2}, - { 0, 1, 18, -9, 0, -6, 122, 2}, { 0, 1, 13, -7, 0, -4, 124, 1}, - { 0, 1, 8, -5, 0, -3, 126, 1}, { 0, 0, 4, -3, 0, -1, 127, 1}, - // dummy (replicate row index 95) - { 0, 0, 4, -3, 0, -1, 127, 1}, -#endif // WARPEDPIXEL_PREC_BITS == 6 -}; -/* clang-format on */ - -// Shuffle masks: we want to convert a sequence of bytes 0, 1, 2, ..., 15 -// in an SSE register into two sequences: -// 0, 2, 2, 4, ..., 12, 12, 14, <don't care> -// 1, 3, 3, 5, ..., 13, 13, 15, <don't care> -static const uint8_t even_mask[16] = { 0, 2, 2, 4, 4, 6, 6, 8, - 8, 10, 10, 12, 12, 14, 14, 0 }; -static const uint8_t odd_mask[16] = { 1, 3, 3, 5, 5, 7, 7, 9, - 9, 11, 11, 13, 13, 15, 15, 0 }; - -static const uint8_t shuffle_alpha0_mask01[16] = { 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1 }; - -static const uint8_t shuffle_alpha0_mask23[16] = { 2, 3, 2, 3, 2, 3, 2, 3, - 2, 3, 2, 3, 2, 3, 2, 3 }; - -static const uint8_t shuffle_alpha0_mask45[16] = { 4, 5, 4, 5, 4, 5, 4, 5, - 4, 5, 4, 5, 4, 5, 4, 5 }; - -static const uint8_t shuffle_alpha0_mask67[16] = { 6, 7, 6, 7, 6, 7, 6, 7, - 6, 7, 6, 7, 6, 7, 6, 7 }; - -static const uint8_t shuffle_gamma0_mask0[16] = { 0, 1, 2, 3, 0, 1, 2, 3, - 0, 1, 2, 3, 0, 1, 2, 3 }; -static const uint8_t shuffle_gamma0_mask1[16] = { 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7 }; -static const uint8_t shuffle_gamma0_mask2[16] = { 8, 9, 10, 11, 8, 9, 10, 11, - 8, 9, 10, 11, 8, 9, 10, 11 }; -static const uint8_t shuffle_gamma0_mask3[16] = { - 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15 -}; - -static INLINE void filter_src_pixels(__m128i src, __m128i *tmp, __m128i *coeff, - const int offset_bits_horiz, - const int reduce_bits_horiz, int k) { - const __m128i src_even = - _mm_shuffle_epi8(src, _mm_loadu_si128((__m128i *)even_mask)); - const __m128i src_odd = - _mm_shuffle_epi8(src, _mm_loadu_si128((__m128i *)odd_mask)); - // The pixel order we need for 'src' is: - // 0 2 2 4 4 6 6 8 1 3 3 5 5 7 7 9 - const __m128i src_02 = _mm_unpacklo_epi64(src_even, src_odd); - const __m128i res_02 = _mm_maddubs_epi16(src_02, coeff[0]); - // 4 6 6 8 8 10 10 12 5 7 7 9 9 11 11 13 - const __m128i src_46 = _mm_unpacklo_epi64(_mm_srli_si128(src_even, 4), - _mm_srli_si128(src_odd, 4)); - const __m128i res_46 = _mm_maddubs_epi16(src_46, coeff[1]); - // 1 3 3 5 5 7 7 9 2 4 4 6 6 8 8 10 - const __m128i src_13 = - _mm_unpacklo_epi64(src_odd, _mm_srli_si128(src_even, 2)); - const __m128i res_13 = _mm_maddubs_epi16(src_13, coeff[2]); - // 5 7 7 9 9 11 11 13 6 8 8 10 10 12 12 14 - const __m128i src_57 = _mm_unpacklo_epi64(_mm_srli_si128(src_odd, 4), - _mm_srli_si128(src_even, 6)); - const __m128i res_57 = _mm_maddubs_epi16(src_57, coeff[3]); - - const __m128i round_const = _mm_set1_epi16((1 << offset_bits_horiz) + - ((1 << reduce_bits_horiz) >> 1)); - - // Note: The values res_02 + res_46 and res_13 + res_57 both - // fit into int16s at this point, but their sum may be too wide to fit - // into an int16. However, once we also add round_const, the sum of - // all of these fits into a uint16. - // - // The wrapping behaviour of _mm_add_* is used here to make sure we - // get the correct result despite converting between different - // (implicit) types. - const __m128i res_even = _mm_add_epi16(res_02, res_46); - const __m128i res_odd = _mm_add_epi16(res_13, res_57); - const __m128i res = - _mm_add_epi16(_mm_add_epi16(res_even, res_odd), round_const); - tmp[k + 7] = _mm_srl_epi16(res, _mm_cvtsi32_si128(reduce_bits_horiz)); -} - -static INLINE void prepare_horizontal_filter_coeff(int alpha, int sx, - __m128i *coeff) { - // Filter even-index pixels - const __m128i tmp_0 = _mm_loadl_epi64( - (__m128i *)&filter_8bit[(sx + 0 * alpha) >> WARPEDDIFF_PREC_BITS]); - const __m128i tmp_1 = _mm_loadl_epi64( - (__m128i *)&filter_8bit[(sx + 1 * alpha) >> WARPEDDIFF_PREC_BITS]); - const __m128i tmp_2 = _mm_loadl_epi64( - (__m128i *)&filter_8bit[(sx + 2 * alpha) >> WARPEDDIFF_PREC_BITS]); - const __m128i tmp_3 = _mm_loadl_epi64( - (__m128i *)&filter_8bit[(sx + 3 * alpha) >> WARPEDDIFF_PREC_BITS]); - const __m128i tmp_4 = _mm_loadl_epi64( - (__m128i *)&filter_8bit[(sx + 4 * alpha) >> WARPEDDIFF_PREC_BITS]); - const __m128i tmp_5 = _mm_loadl_epi64( - (__m128i *)&filter_8bit[(sx + 5 * alpha) >> WARPEDDIFF_PREC_BITS]); - const __m128i tmp_6 = _mm_loadl_epi64( - (__m128i *)&filter_8bit[(sx + 6 * alpha) >> WARPEDDIFF_PREC_BITS]); - const __m128i tmp_7 = _mm_loadl_epi64( - (__m128i *)&filter_8bit[(sx + 7 * alpha) >> WARPEDDIFF_PREC_BITS]); - - // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 0 2 - const __m128i tmp_8 = _mm_unpacklo_epi16(tmp_0, tmp_2); - // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 1 3 - const __m128i tmp_9 = _mm_unpacklo_epi16(tmp_1, tmp_3); - // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 4 6 - const __m128i tmp_10 = _mm_unpacklo_epi16(tmp_4, tmp_6); - // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 5 7 - const __m128i tmp_11 = _mm_unpacklo_epi16(tmp_5, tmp_7); - - // Coeffs 0 2 0 2 0 2 0 2 4 6 4 6 4 6 4 6 for pixels 0 2 4 6 - const __m128i tmp_12 = _mm_unpacklo_epi32(tmp_8, tmp_10); - // Coeffs 1 3 1 3 1 3 1 3 5 7 5 7 5 7 5 7 for pixels 0 2 4 6 - const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_8, tmp_10); - // Coeffs 0 2 0 2 0 2 0 2 4 6 4 6 4 6 4 6 for pixels 1 3 5 7 - const __m128i tmp_14 = _mm_unpacklo_epi32(tmp_9, tmp_11); - // Coeffs 1 3 1 3 1 3 1 3 5 7 5 7 5 7 5 7 for pixels 1 3 5 7 - const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_9, tmp_11); - - // Coeffs 0 2 for pixels 0 2 4 6 1 3 5 7 - coeff[0] = _mm_unpacklo_epi64(tmp_12, tmp_14); - // Coeffs 4 6 for pixels 0 2 4 6 1 3 5 7 - coeff[1] = _mm_unpackhi_epi64(tmp_12, tmp_14); - // Coeffs 1 3 for pixels 0 2 4 6 1 3 5 7 - coeff[2] = _mm_unpacklo_epi64(tmp_13, tmp_15); - // Coeffs 5 7 for pixels 0 2 4 6 1 3 5 7 - coeff[3] = _mm_unpackhi_epi64(tmp_13, tmp_15); -} - -static INLINE void prepare_horizontal_filter_coeff_alpha0(int sx, - __m128i *coeff) { - // Filter even-index pixels - const __m128i tmp_0 = - _mm_loadl_epi64((__m128i *)&filter_8bit[sx >> WARPEDDIFF_PREC_BITS]); - - // Coeffs 0 2 for pixels 0 2 4 6 1 3 5 7 - coeff[0] = _mm_shuffle_epi8( - tmp_0, _mm_loadu_si128((__m128i *)shuffle_alpha0_mask01)); - // Coeffs 4 6 for pixels 0 2 4 6 1 3 5 7 - coeff[1] = _mm_shuffle_epi8( - tmp_0, _mm_loadu_si128((__m128i *)shuffle_alpha0_mask23)); - // Coeffs 1 3 for pixels 0 2 4 6 1 3 5 7 - coeff[2] = _mm_shuffle_epi8( - tmp_0, _mm_loadu_si128((__m128i *)shuffle_alpha0_mask45)); - // Coeffs 5 7 for pixels 0 2 4 6 1 3 5 7 - coeff[3] = _mm_shuffle_epi8( - tmp_0, _mm_loadu_si128((__m128i *)shuffle_alpha0_mask67)); -} - -static INLINE void horizontal_filter(__m128i src, __m128i *tmp, int sx, - int alpha, int k, - const int offset_bits_horiz, - const int reduce_bits_horiz) { - __m128i coeff[4]; - prepare_horizontal_filter_coeff(alpha, sx, coeff); - filter_src_pixels(src, tmp, coeff, offset_bits_horiz, reduce_bits_horiz, k); -} - -static INLINE void warp_horizontal_filter(const uint8_t *ref, __m128i *tmp, - int stride, int32_t ix4, int32_t iy4, - int32_t sx4, int alpha, int beta, - int p_height, int height, int i, - const int offset_bits_horiz, - const int reduce_bits_horiz) { - int k; - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - int sx = sx4 + beta * (k + 4); - - // Load source pixels - const __m128i src = - _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7)); - horizontal_filter(src, tmp, sx, alpha, k, offset_bits_horiz, - reduce_bits_horiz); - } -} - -static INLINE void warp_horizontal_filter_alpha0( - const uint8_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4, - int32_t sx4, int alpha, int beta, int p_height, int height, int i, - const int offset_bits_horiz, const int reduce_bits_horiz) { - (void)alpha; - int k; - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - int sx = sx4 + beta * (k + 4); - - // Load source pixels - const __m128i src = - _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7)); - - __m128i coeff[4]; - prepare_horizontal_filter_coeff_alpha0(sx, coeff); - filter_src_pixels(src, tmp, coeff, offset_bits_horiz, reduce_bits_horiz, k); - } -} - -static INLINE void warp_horizontal_filter_beta0( - const uint8_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4, - int32_t sx4, int alpha, int beta, int p_height, int height, int i, - const int offset_bits_horiz, const int reduce_bits_horiz) { - (void)beta; - int k; - __m128i coeff[4]; - prepare_horizontal_filter_coeff(alpha, sx4, coeff); - - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - - // Load source pixels - const __m128i src = - _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7)); - filter_src_pixels(src, tmp, coeff, offset_bits_horiz, reduce_bits_horiz, k); - } -} - -static INLINE void warp_horizontal_filter_alpha0_beta0( - const uint8_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4, - int32_t sx4, int alpha, int beta, int p_height, int height, int i, - const int offset_bits_horiz, const int reduce_bits_horiz) { - (void)beta; - (void)alpha; - int k; - - __m128i coeff[4]; - prepare_horizontal_filter_coeff_alpha0(sx4, coeff); - - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - - // Load source pixels - const __m128i src = - _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7)); - filter_src_pixels(src, tmp, coeff, offset_bits_horiz, reduce_bits_horiz, k); - } -} - -static INLINE void unpack_weights_and_set_round_const( - ConvolveParams *conv_params, const int round_bits, const int offset_bits, - __m128i *res_sub_const, __m128i *round_bits_const, __m128i *wt) { - *res_sub_const = - _mm_set1_epi16(-(1 << (offset_bits - conv_params->round_1)) - - (1 << (offset_bits - conv_params->round_1 - 1))); - *round_bits_const = _mm_set1_epi16(((1 << round_bits) >> 1)); - - const int w0 = conv_params->fwd_offset; - const int w1 = conv_params->bck_offset; - const __m128i wt0 = _mm_set1_epi16(w0); - const __m128i wt1 = _mm_set1_epi16(w1); - *wt = _mm_unpacklo_epi16(wt0, wt1); -} - -static INLINE void prepare_vertical_filter_coeffs(int gamma, int sy, - __m128i *coeffs) { - const __m128i tmp_0 = _mm_loadu_si128( - (__m128i *)(warped_filter + ((sy + 0 * gamma) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_2 = _mm_loadu_si128( - (__m128i *)(warped_filter + ((sy + 2 * gamma) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_4 = _mm_loadu_si128( - (__m128i *)(warped_filter + ((sy + 4 * gamma) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_6 = _mm_loadu_si128( - (__m128i *)(warped_filter + ((sy + 6 * gamma) >> WARPEDDIFF_PREC_BITS))); - - const __m128i tmp_8 = _mm_unpacklo_epi32(tmp_0, tmp_2); - const __m128i tmp_10 = _mm_unpacklo_epi32(tmp_4, tmp_6); - const __m128i tmp_12 = _mm_unpackhi_epi32(tmp_0, tmp_2); - const __m128i tmp_14 = _mm_unpackhi_epi32(tmp_4, tmp_6); - - // even coeffs - coeffs[0] = _mm_unpacklo_epi64(tmp_8, tmp_10); - coeffs[1] = _mm_unpackhi_epi64(tmp_8, tmp_10); - coeffs[2] = _mm_unpacklo_epi64(tmp_12, tmp_14); - coeffs[3] = _mm_unpackhi_epi64(tmp_12, tmp_14); - - const __m128i tmp_1 = _mm_loadu_si128( - (__m128i *)(warped_filter + ((sy + 1 * gamma) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_3 = _mm_loadu_si128( - (__m128i *)(warped_filter + ((sy + 3 * gamma) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_5 = _mm_loadu_si128( - (__m128i *)(warped_filter + ((sy + 5 * gamma) >> WARPEDDIFF_PREC_BITS))); - const __m128i tmp_7 = _mm_loadu_si128( - (__m128i *)(warped_filter + ((sy + 7 * gamma) >> WARPEDDIFF_PREC_BITS))); - - const __m128i tmp_9 = _mm_unpacklo_epi32(tmp_1, tmp_3); - const __m128i tmp_11 = _mm_unpacklo_epi32(tmp_5, tmp_7); - const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_1, tmp_3); - const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_5, tmp_7); - - // odd coeffs - coeffs[4] = _mm_unpacklo_epi64(tmp_9, tmp_11); - coeffs[5] = _mm_unpackhi_epi64(tmp_9, tmp_11); - coeffs[6] = _mm_unpacklo_epi64(tmp_13, tmp_15); - coeffs[7] = _mm_unpackhi_epi64(tmp_13, tmp_15); -} - -static INLINE void prepare_vertical_filter_coeffs_gamma0(int sy, - __m128i *coeffs) { - const __m128i tmp_0 = _mm_loadu_si128( - (__m128i *)(warped_filter + (sy >> WARPEDDIFF_PREC_BITS))); - - // even coeffs - coeffs[0] = - _mm_shuffle_epi8(tmp_0, _mm_loadu_si128((__m128i *)shuffle_gamma0_mask0)); - coeffs[1] = - _mm_shuffle_epi8(tmp_0, _mm_loadu_si128((__m128i *)shuffle_gamma0_mask1)); - coeffs[2] = - _mm_shuffle_epi8(tmp_0, _mm_loadu_si128((__m128i *)shuffle_gamma0_mask2)); - coeffs[3] = - _mm_shuffle_epi8(tmp_0, _mm_loadu_si128((__m128i *)shuffle_gamma0_mask3)); - - // odd coeffs - coeffs[4] = coeffs[0]; - coeffs[5] = coeffs[1]; - coeffs[6] = coeffs[2]; - coeffs[7] = coeffs[3]; -} - -static INLINE void filter_src_pixels_vertical(__m128i *tmp, __m128i *coeffs, - __m128i *res_lo, __m128i *res_hi, - int k) { - // Load from tmp and rearrange pairs of consecutive rows into the - // column order 0 0 2 2 4 4 6 6; 1 1 3 3 5 5 7 7 - const __m128i *src = tmp + (k + 4); - const __m128i src_0 = _mm_unpacklo_epi16(src[0], src[1]); - const __m128i src_2 = _mm_unpacklo_epi16(src[2], src[3]); - const __m128i src_4 = _mm_unpacklo_epi16(src[4], src[5]); - const __m128i src_6 = _mm_unpacklo_epi16(src[6], src[7]); - - const __m128i res_0 = _mm_madd_epi16(src_0, coeffs[0]); - const __m128i res_2 = _mm_madd_epi16(src_2, coeffs[1]); - const __m128i res_4 = _mm_madd_epi16(src_4, coeffs[2]); - const __m128i res_6 = _mm_madd_epi16(src_6, coeffs[3]); - - const __m128i res_even = - _mm_add_epi32(_mm_add_epi32(res_0, res_2), _mm_add_epi32(res_4, res_6)); - - // Filter odd-index pixels - const __m128i src_1 = _mm_unpackhi_epi16(src[0], src[1]); - const __m128i src_3 = _mm_unpackhi_epi16(src[2], src[3]); - const __m128i src_5 = _mm_unpackhi_epi16(src[4], src[5]); - const __m128i src_7 = _mm_unpackhi_epi16(src[6], src[7]); - - const __m128i res_1 = _mm_madd_epi16(src_1, coeffs[4]); - const __m128i res_3 = _mm_madd_epi16(src_3, coeffs[5]); - const __m128i res_5 = _mm_madd_epi16(src_5, coeffs[6]); - const __m128i res_7 = _mm_madd_epi16(src_7, coeffs[7]); - - const __m128i res_odd = - _mm_add_epi32(_mm_add_epi32(res_1, res_3), _mm_add_epi32(res_5, res_7)); - - // Rearrange pixels back into the order 0 ... 7 - *res_lo = _mm_unpacklo_epi32(res_even, res_odd); - *res_hi = _mm_unpackhi_epi32(res_even, res_odd); -} - -static INLINE void store_vertical_filter_output( - __m128i *res_lo, __m128i *res_hi, const __m128i *res_add_const, - const __m128i *wt, const __m128i *res_sub_const, __m128i *round_bits_const, - uint8_t *pred, ConvolveParams *conv_params, int i, int j, int k, - const int reduce_bits_vert, int p_stride, int p_width, - const int round_bits) { - __m128i res_lo_1 = *res_lo; - __m128i res_hi_1 = *res_hi; - - if (conv_params->is_compound) { - __m128i *const p = - (__m128i *)&conv_params->dst[(i + k + 4) * conv_params->dst_stride + j]; - res_lo_1 = _mm_srai_epi32(_mm_add_epi32(res_lo_1, *res_add_const), - reduce_bits_vert); - const __m128i temp_lo_16 = _mm_packus_epi32(res_lo_1, res_lo_1); - __m128i res_lo_16; - if (conv_params->do_average) { - __m128i *const dst8 = (__m128i *)&pred[(i + k + 4) * p_stride + j]; - const __m128i p_16 = _mm_loadl_epi64(p); - - if (conv_params->use_jnt_comp_avg) { - const __m128i p_16_lo = _mm_unpacklo_epi16(p_16, temp_lo_16); - const __m128i wt_res_lo = _mm_madd_epi16(p_16_lo, *wt); - const __m128i shifted_32 = - _mm_srai_epi32(wt_res_lo, DIST_PRECISION_BITS); - res_lo_16 = _mm_packus_epi32(shifted_32, shifted_32); - } else { - res_lo_16 = _mm_srai_epi16(_mm_add_epi16(p_16, temp_lo_16), 1); - } - - res_lo_16 = _mm_add_epi16(res_lo_16, *res_sub_const); - - res_lo_16 = _mm_srai_epi16(_mm_add_epi16(res_lo_16, *round_bits_const), - round_bits); - __m128i res_8_lo = _mm_packus_epi16(res_lo_16, res_lo_16); - *(uint32_t *)dst8 = _mm_cvtsi128_si32(res_8_lo); - } else { - _mm_storel_epi64(p, temp_lo_16); - } - if (p_width > 4) { - __m128i *const p4 = - (__m128i *)&conv_params - ->dst[(i + k + 4) * conv_params->dst_stride + j + 4]; - res_hi_1 = _mm_srai_epi32(_mm_add_epi32(res_hi_1, *res_add_const), - reduce_bits_vert); - const __m128i temp_hi_16 = _mm_packus_epi32(res_hi_1, res_hi_1); - __m128i res_hi_16; - - if (conv_params->do_average) { - __m128i *const dst8_4 = - (__m128i *)&pred[(i + k + 4) * p_stride + j + 4]; - const __m128i p4_16 = _mm_loadl_epi64(p4); - - if (conv_params->use_jnt_comp_avg) { - const __m128i p_16_hi = _mm_unpacklo_epi16(p4_16, temp_hi_16); - const __m128i wt_res_hi = _mm_madd_epi16(p_16_hi, *wt); - const __m128i shifted_32 = - _mm_srai_epi32(wt_res_hi, DIST_PRECISION_BITS); - res_hi_16 = _mm_packus_epi32(shifted_32, shifted_32); - } else { - res_hi_16 = _mm_srai_epi16(_mm_add_epi16(p4_16, temp_hi_16), 1); - } - res_hi_16 = _mm_add_epi16(res_hi_16, *res_sub_const); - - res_hi_16 = _mm_srai_epi16(_mm_add_epi16(res_hi_16, *round_bits_const), - round_bits); - __m128i res_8_hi = _mm_packus_epi16(res_hi_16, res_hi_16); - *(uint32_t *)dst8_4 = _mm_cvtsi128_si32(res_8_hi); - - } else { - _mm_storel_epi64(p4, temp_hi_16); - } - } - } else { - const __m128i res_lo_round = _mm_srai_epi32( - _mm_add_epi32(res_lo_1, *res_add_const), reduce_bits_vert); - const __m128i res_hi_round = _mm_srai_epi32( - _mm_add_epi32(res_hi_1, *res_add_const), reduce_bits_vert); - - const __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round); - __m128i res_8bit = _mm_packus_epi16(res_16bit, res_16bit); - - // Store, blending with 'pred' if needed - __m128i *const p = (__m128i *)&pred[(i + k + 4) * p_stride + j]; - - // Note: If we're outputting a 4x4 block, we need to be very careful - // to only output 4 pixels at this point, to avoid encode/decode - // mismatches when encoding with multiple threads. - if (p_width == 4) { - *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit); - } else { - _mm_storel_epi64(p, res_8bit); - } - } -} - -static INLINE void warp_vertical_filter( - uint8_t *pred, __m128i *tmp, ConvolveParams *conv_params, int16_t gamma, - int16_t delta, int p_height, int p_stride, int p_width, int i, int j, - int sy4, const int reduce_bits_vert, const __m128i *res_add_const, - const int round_bits, const int offset_bits) { - int k; - __m128i res_sub_const, round_bits_const, wt; - unpack_weights_and_set_round_const(conv_params, round_bits, offset_bits, - &res_sub_const, &round_bits_const, &wt); - // Vertical filter - for (k = -4; k < AOMMIN(4, p_height - i - 4); ++k) { - int sy = sy4 + delta * (k + 4); - - __m128i coeffs[8]; - prepare_vertical_filter_coeffs(gamma, sy, coeffs); - - __m128i res_lo; - __m128i res_hi; - filter_src_pixels_vertical(tmp, coeffs, &res_lo, &res_hi, k); - - store_vertical_filter_output(&res_lo, &res_hi, res_add_const, &wt, - &res_sub_const, &round_bits_const, pred, - conv_params, i, j, k, reduce_bits_vert, - p_stride, p_width, round_bits); - } -} - -static INLINE void warp_vertical_filter_gamma0( - uint8_t *pred, __m128i *tmp, ConvolveParams *conv_params, int16_t gamma, - int16_t delta, int p_height, int p_stride, int p_width, int i, int j, - int sy4, const int reduce_bits_vert, const __m128i *res_add_const, - const int round_bits, const int offset_bits) { - int k; - (void)gamma; - __m128i res_sub_const, round_bits_const, wt; - unpack_weights_and_set_round_const(conv_params, round_bits, offset_bits, - &res_sub_const, &round_bits_const, &wt); - // Vertical filter - for (k = -4; k < AOMMIN(4, p_height - i - 4); ++k) { - int sy = sy4 + delta * (k + 4); - - __m128i coeffs[8]; - prepare_vertical_filter_coeffs_gamma0(sy, coeffs); - - __m128i res_lo; - __m128i res_hi; - filter_src_pixels_vertical(tmp, coeffs, &res_lo, &res_hi, k); - - store_vertical_filter_output(&res_lo, &res_hi, res_add_const, &wt, - &res_sub_const, &round_bits_const, pred, - conv_params, i, j, k, reduce_bits_vert, - p_stride, p_width, round_bits); - } -} - -static INLINE void warp_vertical_filter_delta0( - uint8_t *pred, __m128i *tmp, ConvolveParams *conv_params, int16_t gamma, - int16_t delta, int p_height, int p_stride, int p_width, int i, int j, - int sy4, const int reduce_bits_vert, const __m128i *res_add_const, - const int round_bits, const int offset_bits) { - (void)delta; - int k; - __m128i res_sub_const, round_bits_const, wt; - unpack_weights_and_set_round_const(conv_params, round_bits, offset_bits, - &res_sub_const, &round_bits_const, &wt); - - __m128i coeffs[8]; - prepare_vertical_filter_coeffs(gamma, sy4, coeffs); - // Vertical filter - for (k = -4; k < AOMMIN(4, p_height - i - 4); ++k) { - __m128i res_lo; - __m128i res_hi; - filter_src_pixels_vertical(tmp, coeffs, &res_lo, &res_hi, k); - - store_vertical_filter_output(&res_lo, &res_hi, res_add_const, &wt, - &res_sub_const, &round_bits_const, pred, - conv_params, i, j, k, reduce_bits_vert, - p_stride, p_width, round_bits); - } -} - -static INLINE void warp_vertical_filter_gamma0_delta0( - uint8_t *pred, __m128i *tmp, ConvolveParams *conv_params, int16_t gamma, - int16_t delta, int p_height, int p_stride, int p_width, int i, int j, - int sy4, const int reduce_bits_vert, const __m128i *res_add_const, - const int round_bits, const int offset_bits) { - (void)delta; - (void)gamma; - int k; - __m128i res_sub_const, round_bits_const, wt; - unpack_weights_and_set_round_const(conv_params, round_bits, offset_bits, - &res_sub_const, &round_bits_const, &wt); - - __m128i coeffs[8]; - prepare_vertical_filter_coeffs_gamma0(sy4, coeffs); - // Vertical filter - for (k = -4; k < AOMMIN(4, p_height - i - 4); ++k) { - __m128i res_lo; - __m128i res_hi; - filter_src_pixels_vertical(tmp, coeffs, &res_lo, &res_hi, k); - - store_vertical_filter_output(&res_lo, &res_hi, res_add_const, &wt, - &res_sub_const, &round_bits_const, pred, - conv_params, i, j, k, reduce_bits_vert, - p_stride, p_width, round_bits); - } -} - -static INLINE void prepare_warp_vertical_filter( - uint8_t *pred, __m128i *tmp, ConvolveParams *conv_params, int16_t gamma, - int16_t delta, int p_height, int p_stride, int p_width, int i, int j, - int sy4, const int reduce_bits_vert, const __m128i *res_add_const, - const int round_bits, const int offset_bits) { - if (gamma == 0 && delta == 0) - warp_vertical_filter_gamma0_delta0( - pred, tmp, conv_params, gamma, delta, p_height, p_stride, p_width, i, j, - sy4, reduce_bits_vert, res_add_const, round_bits, offset_bits); - else if (gamma == 0 && delta != 0) - warp_vertical_filter_gamma0(pred, tmp, conv_params, gamma, delta, p_height, - p_stride, p_width, i, j, sy4, reduce_bits_vert, - res_add_const, round_bits, offset_bits); - else if (gamma != 0 && delta == 0) - warp_vertical_filter_delta0(pred, tmp, conv_params, gamma, delta, p_height, - p_stride, p_width, i, j, sy4, reduce_bits_vert, - res_add_const, round_bits, offset_bits); - else - warp_vertical_filter(pred, tmp, conv_params, gamma, delta, p_height, - p_stride, p_width, i, j, sy4, reduce_bits_vert, - res_add_const, round_bits, offset_bits); -} - -static INLINE void prepare_warp_horizontal_filter( - const uint8_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4, - int32_t sx4, int alpha, int beta, int p_height, int height, int i, - const int offset_bits_horiz, const int reduce_bits_horiz) { - if (alpha == 0 && beta == 0) - warp_horizontal_filter_alpha0_beta0(ref, tmp, stride, ix4, iy4, sx4, alpha, - beta, p_height, height, i, - offset_bits_horiz, reduce_bits_horiz); - else if (alpha == 0 && beta != 0) - warp_horizontal_filter_alpha0(ref, tmp, stride, ix4, iy4, sx4, alpha, beta, - p_height, height, i, offset_bits_horiz, - reduce_bits_horiz); - else if (alpha != 0 && beta == 0) - warp_horizontal_filter_beta0(ref, tmp, stride, ix4, iy4, sx4, alpha, beta, - p_height, height, i, offset_bits_horiz, - reduce_bits_horiz); - else - warp_horizontal_filter(ref, tmp, stride, ix4, iy4, sx4, alpha, beta, - p_height, height, i, offset_bits_horiz, - reduce_bits_horiz); -} - -void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width, - int height, int stride, uint8_t *pred, int p_col, - int p_row, int p_width, int p_height, int p_stride, - int subsampling_x, int subsampling_y, - ConvolveParams *conv_params, int16_t alpha, - int16_t beta, int16_t gamma, int16_t delta) { - __m128i tmp[15]; - int i, j, k; - const int bd = 8; - const int reduce_bits_horiz = conv_params->round_0; - const int reduce_bits_vert = conv_params->is_compound - ? conv_params->round_1 - : 2 * FILTER_BITS - reduce_bits_horiz; - const int offset_bits_horiz = bd + FILTER_BITS - 1; - assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL)); - - const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz; - const __m128i reduce_bits_vert_const = - _mm_set1_epi32(((1 << reduce_bits_vert) >> 1)); - const __m128i res_add_const = _mm_set1_epi32(1 << offset_bits_vert); - const int round_bits = - 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; - const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; - assert(IMPLIES(conv_params->do_average, conv_params->is_compound)); - - /* Note: For this code to work, the left/right frame borders need to be - extended by at least 13 pixels each. By the time we get here, other - code will have set up this border, but we allow an explicit check - for debugging purposes. - */ - /*for (i = 0; i < height; ++i) { - for (j = 0; j < 13; ++j) { - assert(ref[i * stride - 13 + j] == ref[i * stride]); - assert(ref[i * stride + width + j] == ref[i * stride + (width - 1)]); - } - }*/ - __m128i res_add_const_1; - if (conv_params->is_compound == 1) { - res_add_const_1 = _mm_add_epi32(reduce_bits_vert_const, res_add_const); - } else { - res_add_const_1 = _mm_set1_epi32(-(1 << (bd + reduce_bits_vert - 1)) + - ((1 << reduce_bits_vert) >> 1)); - } - - for (i = 0; i < p_height; i += 8) { - for (j = 0; j < p_width; j += 8) { - const int32_t src_x = (p_col + j + 4) << subsampling_x; - const int32_t src_y = (p_row + i + 4) << subsampling_y; - const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0]; - const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1]; - const int32_t x4 = dst_x >> subsampling_x; - const int32_t y4 = dst_y >> subsampling_y; - - int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS; - int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); - int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS; - int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); - - // Add in all the constant terms, including rounding and offset - sx4 += alpha * (-4) + beta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) + - (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS); - sy4 += gamma * (-4) + delta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) + - (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS); - - sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); - sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); - - // Horizontal filter - // If the block is aligned such that, after clamping, every sample - // would be taken from the leftmost/rightmost column, then we can - // skip the expensive horizontal filter. - if (ix4 <= -7) { - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - tmp[k + 7] = _mm_set1_epi16( - (1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) + - ref[iy * stride] * (1 << (FILTER_BITS - reduce_bits_horiz))); - } - } else if (ix4 >= width + 6) { - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - tmp[k + 7] = - _mm_set1_epi16((1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) + - ref[iy * stride + (width - 1)] * - (1 << (FILTER_BITS - reduce_bits_horiz))); - } - } else if (((ix4 - 7) < 0) || ((ix4 + 9) > width)) { - const int out_of_boundary_left = -(ix4 - 6); - const int out_of_boundary_right = (ix4 + 8) - width; - for (k = -7; k < AOMMIN(8, p_height - i); ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - int sx = sx4 + beta * (k + 4); - - // Load source pixels - __m128i src = - _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7)); - if (out_of_boundary_left >= 0) { - const __m128i shuffle_reg_left = - _mm_loadu_si128((__m128i *)warp_pad_left[out_of_boundary_left]); - src = _mm_shuffle_epi8(src, shuffle_reg_left); - } - if (out_of_boundary_right >= 0) { - const __m128i shuffle_reg_right = _mm_loadu_si128( - (__m128i *)warp_pad_right[out_of_boundary_right]); - src = _mm_shuffle_epi8(src, shuffle_reg_right); - } - horizontal_filter(src, tmp, sx, alpha, k, offset_bits_horiz, - reduce_bits_horiz); - } - } else { - prepare_warp_horizontal_filter(ref, tmp, stride, ix4, iy4, sx4, alpha, - beta, p_height, height, i, - offset_bits_horiz, reduce_bits_horiz); - } - - // Vertical filter - prepare_warp_vertical_filter( - pred, tmp, conv_params, gamma, delta, p_height, p_stride, p_width, i, - j, sy4, reduce_bits_vert, &res_add_const_1, round_bits, offset_bits); - } - } -} diff --git a/third_party/aom/av1/common/x86/wiener_convolve_avx2.c b/third_party/aom/av1/common/x86/wiener_convolve_avx2.c deleted file mode 100644 index 87a6e1239..000000000 --- a/third_party/aom/av1/common/x86/wiener_convolve_avx2.c +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <immintrin.h> -#include <assert.h> - -#include "config/av1_rtcd.h" - -#include "av1/common/convolve.h" -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/aom_filter.h" -#include "aom_dsp/x86/synonyms.h" -#include "aom_dsp/x86/synonyms_avx2.h" - -// 128-bit xmmwords are written as [ ... ] with the MSB on the left. -// 256-bit ymmwords are written as two xmmwords, [ ... ][ ... ] with the MSB -// on the left. -// A row of, say, 8-bit pixels with values p0, p1, p2, ..., p30, p31 will be -// loaded and stored as [ p31 ... p17 p16 ][ p15 ... p1 p0 ]. -void av1_wiener_convolve_add_src_avx2(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, - const ConvolveParams *conv_params) { - const int bd = 8; - assert(x_step_q4 == 16 && y_step_q4 == 16); - assert(!(w & 7)); - (void)x_step_q4; - (void)y_step_q4; - - DECLARE_ALIGNED(32, uint16_t, - temp[(MAX_SB_SIZE + SUBPEL_TAPS - 1) * MAX_SB_SIZE]); - int intermediate_height = h + SUBPEL_TAPS - 2; - memset(temp + (intermediate_height * MAX_SB_SIZE), 0, MAX_SB_SIZE); - const int center_tap = ((SUBPEL_TAPS - 1) / 2); - const uint8_t *const src_ptr = src - center_tap * src_stride - center_tap; - - const __m128i zero_128 = _mm_setzero_si128(); - const __m256i zero_256 = _mm256_setzero_si256(); - - // Add an offset to account for the "add_src" part of the convolve function. - const __m128i offset = _mm_insert_epi16(zero_128, 1 << FILTER_BITS, 3); - - const __m256i clamp_low = zero_256; - const __m256i clamp_high = - _mm256_set1_epi16(WIENER_CLAMP_LIMIT(conv_params->round_0, bd) - 1); - - /* Horizontal filter */ - { - // coeffs [ f7 f6 f5 f4 f3 f2 f1 f0 ] - const __m128i coeffs_x = _mm_add_epi16(xx_loadu_128(filter_x), offset); - - // coeffs [ f3 f2 f3 f2 f1 f0 f1 f0 ] - const __m128i coeffs_0123 = _mm_unpacklo_epi32(coeffs_x, coeffs_x); - // coeffs [ f7 f6 f7 f6 f5 f4 f5 f4 ] - const __m128i coeffs_4567 = _mm_unpackhi_epi32(coeffs_x, coeffs_x); - - // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ] - const __m128i coeffs_01_128 = _mm_unpacklo_epi64(coeffs_0123, coeffs_0123); - // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ] - const __m128i coeffs_23_128 = _mm_unpackhi_epi64(coeffs_0123, coeffs_0123); - // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ] - const __m128i coeffs_45_128 = _mm_unpacklo_epi64(coeffs_4567, coeffs_4567); - // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ] - const __m128i coeffs_67_128 = _mm_unpackhi_epi64(coeffs_4567, coeffs_4567); - - // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ][ f1 f0 f1 f0 f1 f0 f1 f0 ] - const __m256i coeffs_01 = yy_set_m128i(coeffs_01_128, coeffs_01_128); - // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ][ f3 f2 f3 f2 f3 f2 f3 f2 ] - const __m256i coeffs_23 = yy_set_m128i(coeffs_23_128, coeffs_23_128); - // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ][ f5 f4 f5 f4 f5 f4 f5 f4 ] - const __m256i coeffs_45 = yy_set_m128i(coeffs_45_128, coeffs_45_128); - // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ][ f7 f6 f7 f6 f7 f6 f7 f6 ] - const __m256i coeffs_67 = yy_set_m128i(coeffs_67_128, coeffs_67_128); - - const __m256i round_const = _mm256_set1_epi32( - (1 << (conv_params->round_0 - 1)) + (1 << (bd + FILTER_BITS - 1))); - - for (int i = 0; i < intermediate_height; ++i) { - for (int j = 0; j < w; j += 16) { - const uint8_t *data_ij = src_ptr + i * src_stride + j; - - // Load 8-bit src data - const __m128i data_0 = xx_loadu_128(data_ij + 0); - const __m128i data_1 = xx_loadu_128(data_ij + 1); - const __m128i data_2 = xx_loadu_128(data_ij + 2); - const __m128i data_3 = xx_loadu_128(data_ij + 3); - const __m128i data_4 = xx_loadu_128(data_ij + 4); - const __m128i data_5 = xx_loadu_128(data_ij + 5); - const __m128i data_6 = xx_loadu_128(data_ij + 6); - const __m128i data_7 = xx_loadu_128(data_ij + 7); - - // (Zero-)Extend 8-bit data to 16-bit data - const __m256i src_0 = _mm256_cvtepu8_epi16(data_0); - const __m256i src_1 = _mm256_cvtepu8_epi16(data_1); - const __m256i src_2 = _mm256_cvtepu8_epi16(data_2); - const __m256i src_3 = _mm256_cvtepu8_epi16(data_3); - const __m256i src_4 = _mm256_cvtepu8_epi16(data_4); - const __m256i src_5 = _mm256_cvtepu8_epi16(data_5); - const __m256i src_6 = _mm256_cvtepu8_epi16(data_6); - const __m256i src_7 = _mm256_cvtepu8_epi16(data_7); - - // Multiply src data by filter coeffs and sum pairs - const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01); - const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01); - const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23); - const __m256i res_3 = _mm256_madd_epi16(src_3, coeffs_23); - const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45); - const __m256i res_5 = _mm256_madd_epi16(src_5, coeffs_45); - const __m256i res_6 = _mm256_madd_epi16(src_6, coeffs_67); - const __m256i res_7 = _mm256_madd_epi16(src_7, coeffs_67); - - // Calculate scalar product for even- and odd-indices separately, - // increasing to 32-bit precision - const __m256i res_even_sum = _mm256_add_epi32( - _mm256_add_epi32(res_0, res_4), _mm256_add_epi32(res_2, res_6)); - const __m256i res_odd_sum = _mm256_add_epi32( - _mm256_add_epi32(res_1, res_5), _mm256_add_epi32(res_3, res_7)); - - const __m256i res_even = _mm256_srai_epi32( - _mm256_add_epi32(res_even_sum, round_const), conv_params->round_0); - const __m256i res_odd = _mm256_srai_epi32( - _mm256_add_epi32(res_odd_sum, round_const), conv_params->round_0); - - // Reduce to 16-bit precision and pack even- and odd-index results - // back into one register. The _mm256_packs_epi32 intrinsic returns - // a register with the pixels ordered as follows: - // [ 15 13 11 9 14 12 10 8 ] [ 7 5 3 1 6 4 2 0 ] - const __m256i res = _mm256_packs_epi32(res_even, res_odd); - const __m256i res_clamped = - _mm256_min_epi16(_mm256_max_epi16(res, clamp_low), clamp_high); - - // Store in a temporary array - yy_storeu_256(temp + i * MAX_SB_SIZE + j, res_clamped); - } - } - } - - /* Vertical filter */ - { - // coeffs [ g7 g6 g5 g4 g3 g2 g1 g0 ] - const __m128i coeffs_y = _mm_add_epi16(xx_loadu_128(filter_y), offset); - - // coeffs [ g3 g2 g3 g2 g1 g0 g1 g0 ] - const __m128i coeffs_0123 = _mm_unpacklo_epi32(coeffs_y, coeffs_y); - // coeffs [ g7 g6 g7 g6 g5 g4 g5 g4 ] - const __m128i coeffs_4567 = _mm_unpackhi_epi32(coeffs_y, coeffs_y); - - // coeffs [ g1 g0 g1 g0 g1 g0 g1 g0 ] - const __m128i coeffs_01_128 = _mm_unpacklo_epi64(coeffs_0123, coeffs_0123); - // coeffs [ g3 g2 g3 g2 g3 g2 g3 g2 ] - const __m128i coeffs_23_128 = _mm_unpackhi_epi64(coeffs_0123, coeffs_0123); - // coeffs [ g5 g4 g5 g4 g5 g4 g5 g4 ] - const __m128i coeffs_45_128 = _mm_unpacklo_epi64(coeffs_4567, coeffs_4567); - // coeffs [ g7 g6 g7 g6 g7 g6 g7 g6 ] - const __m128i coeffs_67_128 = _mm_unpackhi_epi64(coeffs_4567, coeffs_4567); - - // coeffs [ g1 g0 g1 g0 g1 g0 g1 g0 ][ g1 g0 g1 g0 g1 g0 g1 g0 ] - const __m256i coeffs_01 = yy_set_m128i(coeffs_01_128, coeffs_01_128); - // coeffs [ g3 g2 g3 g2 g3 g2 g3 g2 ][ g3 g2 g3 g2 g3 g2 g3 g2 ] - const __m256i coeffs_23 = yy_set_m128i(coeffs_23_128, coeffs_23_128); - // coeffs [ g5 g4 g5 g4 g5 g4 g5 g4 ][ g5 g4 g5 g4 g5 g4 g5 g4 ] - const __m256i coeffs_45 = yy_set_m128i(coeffs_45_128, coeffs_45_128); - // coeffs [ g7 g6 g7 g6 g7 g6 g7 g6 ][ g7 g6 g7 g6 g7 g6 g7 g6 ] - const __m256i coeffs_67 = yy_set_m128i(coeffs_67_128, coeffs_67_128); - - const __m256i round_const = - _mm256_set1_epi32((1 << (conv_params->round_1 - 1)) - - (1 << (bd + conv_params->round_1 - 1))); - - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; j += 16) { - const uint16_t *data_ij = temp + i * MAX_SB_SIZE + j; - - // Load 16-bit data from the output of the horizontal filter in - // which the pixels are ordered as follows: - // [ 15 13 11 9 14 12 10 8 ] [ 7 5 3 1 6 4 2 0 ] - const __m256i data_0 = yy_loadu_256(data_ij + 0 * MAX_SB_SIZE); - const __m256i data_1 = yy_loadu_256(data_ij + 1 * MAX_SB_SIZE); - const __m256i data_2 = yy_loadu_256(data_ij + 2 * MAX_SB_SIZE); - const __m256i data_3 = yy_loadu_256(data_ij + 3 * MAX_SB_SIZE); - const __m256i data_4 = yy_loadu_256(data_ij + 4 * MAX_SB_SIZE); - const __m256i data_5 = yy_loadu_256(data_ij + 5 * MAX_SB_SIZE); - const __m256i data_6 = yy_loadu_256(data_ij + 6 * MAX_SB_SIZE); - const __m256i data_7 = yy_loadu_256(data_ij + 7 * MAX_SB_SIZE); - - // Filter the even-indices, increasing to 32-bit precision - const __m256i src_0 = _mm256_unpacklo_epi16(data_0, data_1); - const __m256i src_2 = _mm256_unpacklo_epi16(data_2, data_3); - const __m256i src_4 = _mm256_unpacklo_epi16(data_4, data_5); - const __m256i src_6 = _mm256_unpacklo_epi16(data_6, data_7); - - const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01); - const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23); - const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45); - const __m256i res_6 = _mm256_madd_epi16(src_6, coeffs_67); - - const __m256i res_even = _mm256_add_epi32( - _mm256_add_epi32(res_0, res_2), _mm256_add_epi32(res_4, res_6)); - - // Filter the odd-indices, increasing to 32-bit precision - const __m256i src_1 = _mm256_unpackhi_epi16(data_0, data_1); - const __m256i src_3 = _mm256_unpackhi_epi16(data_2, data_3); - const __m256i src_5 = _mm256_unpackhi_epi16(data_4, data_5); - const __m256i src_7 = _mm256_unpackhi_epi16(data_6, data_7); - - const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01); - const __m256i res_3 = _mm256_madd_epi16(src_3, coeffs_23); - const __m256i res_5 = _mm256_madd_epi16(src_5, coeffs_45); - const __m256i res_7 = _mm256_madd_epi16(src_7, coeffs_67); - - const __m256i res_odd = _mm256_add_epi32( - _mm256_add_epi32(res_1, res_3), _mm256_add_epi32(res_5, res_7)); - - // Pixels are currently in the following order: - // res_even order: [ 14 12 10 8 ] [ 6 4 2 0 ] - // res_odd order: [ 15 13 11 9 ] [ 7 5 3 1 ] - // - // Rearrange the pixels into the following order: - // res_lo order: [ 11 10 9 8 ] [ 3 2 1 0 ] - // res_hi order: [ 15 14 13 12 ] [ 7 6 5 4 ] - const __m256i res_lo = _mm256_unpacklo_epi32(res_even, res_odd); - const __m256i res_hi = _mm256_unpackhi_epi32(res_even, res_odd); - - const __m256i res_lo_round = _mm256_srai_epi32( - _mm256_add_epi32(res_lo, round_const), conv_params->round_1); - const __m256i res_hi_round = _mm256_srai_epi32( - _mm256_add_epi32(res_hi, round_const), conv_params->round_1); - - // Reduce to 16-bit precision and pack into the correct order: - // [ 15 14 13 12 11 10 9 8 ][ 7 6 5 4 3 2 1 0 ] - const __m256i res_16bit = - _mm256_packs_epi32(res_lo_round, res_hi_round); - - // Reduce to 8-bit precision. This messes up the order: - // [ - - - - - - - - 15 14 13 12 11 10 9 8 ] - // [ - - - - - - - - 7 6 5 4 3 2 1 0 ] - const __m256i res_8bit = - _mm256_packus_epi16(res_16bit, zero_256 /* don't care value */); - - // Swap the two central 32-bit values to get the order: - // [ - - - - - - - - - - - - - - - - ] - // [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ] - const __m256i res_8bit2 = _mm256_permute4x64_epi64(res_8bit, 0xd8); - - // Store the lower 128-bit lane in the dst array - xx_storeu_128(dst + i * dst_stride + j, - _mm256_castsi256_si128(res_8bit2)); - } - } - } -} diff --git a/third_party/aom/av1/common/x86/wiener_convolve_sse2.c b/third_party/aom/av1/common/x86/wiener_convolve_sse2.c deleted file mode 100644 index f9d00b733..000000000 --- a/third_party/aom/av1/common/x86/wiener_convolve_sse2.c +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include <emmintrin.h> -#include <assert.h> - -#include "config/av1_rtcd.h" - -#include "av1/common/convolve.h" -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/aom_filter.h" - -void av1_wiener_convolve_add_src_sse2(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, - const ConvolveParams *conv_params) { - const int bd = 8; - assert(x_step_q4 == 16 && y_step_q4 == 16); - assert(!(w & 7)); - (void)x_step_q4; - (void)y_step_q4; - - DECLARE_ALIGNED(16, uint16_t, - temp[(MAX_SB_SIZE + SUBPEL_TAPS - 1) * MAX_SB_SIZE]); - int intermediate_height = h + SUBPEL_TAPS - 2; - memset(temp + (intermediate_height * MAX_SB_SIZE), 0, MAX_SB_SIZE); - int i, j; - const int center_tap = ((SUBPEL_TAPS - 1) / 2); - const uint8_t *const src_ptr = src - center_tap * src_stride - center_tap; - - const __m128i zero = _mm_setzero_si128(); - // Add an offset to account for the "add_src" part of the convolve function. - const __m128i offset = _mm_insert_epi16(zero, 1 << FILTER_BITS, 3); - - /* Horizontal filter */ - { - const __m128i coeffs_x = - _mm_add_epi16(_mm_loadu_si128((__m128i *)filter_x), offset); - - // coeffs 0 1 0 1 2 3 2 3 - const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x); - // coeffs 4 5 4 5 6 7 6 7 - const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x); - - // coeffs 0 1 0 1 0 1 0 1 - const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0); - // coeffs 2 3 2 3 2 3 2 3 - const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0); - // coeffs 4 5 4 5 4 5 4 5 - const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1); - // coeffs 6 7 6 7 6 7 6 7 - const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1); - - const __m128i round_const = _mm_set1_epi32( - (1 << (conv_params->round_0 - 1)) + (1 << (bd + FILTER_BITS - 1))); - - for (i = 0; i < intermediate_height; ++i) { - for (j = 0; j < w; j += 8) { - const __m128i data = - _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]); - - // Filter even-index pixels - const __m128i src_0 = _mm_unpacklo_epi8(data, zero); - const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01); - const __m128i src_2 = _mm_unpacklo_epi8(_mm_srli_si128(data, 2), zero); - const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23); - const __m128i src_4 = _mm_unpacklo_epi8(_mm_srli_si128(data, 4), zero); - const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); - const __m128i src_6 = _mm_unpacklo_epi8(_mm_srli_si128(data, 6), zero); - const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67); - - __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4), - _mm_add_epi32(res_2, res_6)); - res_even = _mm_srai_epi32(_mm_add_epi32(res_even, round_const), - conv_params->round_0); - - // Filter odd-index pixels - const __m128i src_1 = _mm_unpacklo_epi8(_mm_srli_si128(data, 1), zero); - const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); - const __m128i src_3 = _mm_unpacklo_epi8(_mm_srli_si128(data, 3), zero); - const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23); - const __m128i src_5 = _mm_unpacklo_epi8(_mm_srli_si128(data, 5), zero); - const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45); - const __m128i src_7 = _mm_unpacklo_epi8(_mm_srli_si128(data, 7), zero); - const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67); - - __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5), - _mm_add_epi32(res_3, res_7)); - res_odd = _mm_srai_epi32(_mm_add_epi32(res_odd, round_const), - conv_params->round_0); - - // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7 - __m128i res = _mm_packs_epi32(res_even, res_odd); - res = _mm_min_epi16( - _mm_max_epi16(res, zero), - _mm_set1_epi16(WIENER_CLAMP_LIMIT(conv_params->round_0, bd) - 1)); - _mm_storeu_si128((__m128i *)&temp[i * MAX_SB_SIZE + j], res); - } - } - } - - /* Vertical filter */ - { - const __m128i coeffs_y = - _mm_add_epi16(_mm_loadu_si128((__m128i *)filter_y), offset); - - // coeffs 0 1 0 1 2 3 2 3 - const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y); - // coeffs 4 5 4 5 6 7 6 7 - const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y); - - // coeffs 0 1 0 1 0 1 0 1 - const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0); - // coeffs 2 3 2 3 2 3 2 3 - const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0); - // coeffs 4 5 4 5 4 5 4 5 - const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1); - // coeffs 6 7 6 7 6 7 6 7 - const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1); - - const __m128i round_const = - _mm_set1_epi32((1 << (conv_params->round_1 - 1)) - - (1 << (bd + conv_params->round_1 - 1))); - - for (i = 0; i < h; ++i) { - for (j = 0; j < w; j += 8) { - // Filter even-index pixels - const uint16_t *data = &temp[i * MAX_SB_SIZE + j]; - const __m128i src_0 = - _mm_unpacklo_epi16(*(__m128i *)(data + 0 * MAX_SB_SIZE), - *(__m128i *)(data + 1 * MAX_SB_SIZE)); - const __m128i src_2 = - _mm_unpacklo_epi16(*(__m128i *)(data + 2 * MAX_SB_SIZE), - *(__m128i *)(data + 3 * MAX_SB_SIZE)); - const __m128i src_4 = - _mm_unpacklo_epi16(*(__m128i *)(data + 4 * MAX_SB_SIZE), - *(__m128i *)(data + 5 * MAX_SB_SIZE)); - const __m128i src_6 = - _mm_unpacklo_epi16(*(__m128i *)(data + 6 * MAX_SB_SIZE), - *(__m128i *)(data + 7 * MAX_SB_SIZE)); - - const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01); - const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23); - const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); - const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67); - - const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2), - _mm_add_epi32(res_4, res_6)); - - // Filter odd-index pixels - const __m128i src_1 = - _mm_unpackhi_epi16(*(__m128i *)(data + 0 * MAX_SB_SIZE), - *(__m128i *)(data + 1 * MAX_SB_SIZE)); - const __m128i src_3 = - _mm_unpackhi_epi16(*(__m128i *)(data + 2 * MAX_SB_SIZE), - *(__m128i *)(data + 3 * MAX_SB_SIZE)); - const __m128i src_5 = - _mm_unpackhi_epi16(*(__m128i *)(data + 4 * MAX_SB_SIZE), - *(__m128i *)(data + 5 * MAX_SB_SIZE)); - const __m128i src_7 = - _mm_unpackhi_epi16(*(__m128i *)(data + 6 * MAX_SB_SIZE), - *(__m128i *)(data + 7 * MAX_SB_SIZE)); - - const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); - const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23); - const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45); - const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67); - - const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3), - _mm_add_epi32(res_5, res_7)); - - // Rearrange pixels back into the order 0 ... 7 - const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd); - const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd); - - const __m128i res_lo_round = _mm_srai_epi32( - _mm_add_epi32(res_lo, round_const), conv_params->round_1); - const __m128i res_hi_round = _mm_srai_epi32( - _mm_add_epi32(res_hi, round_const), conv_params->round_1); - - const __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round); - __m128i res_8bit = _mm_packus_epi16(res_16bit, res_16bit); - - __m128i *const p = (__m128i *)&dst[i * dst_stride + j]; - _mm_storel_epi64(p, res_8bit); - } - } - } -} |