diff options
Diffstat (limited to 'media/libaom/config')
32 files changed, 5061 insertions, 2503 deletions
diff --git a/media/libaom/config/generic/aom_config.asm b/media/libaom/config/generic/aom_config.asm index 483fbc80f..782a28b7f 100644 --- a/media/libaom/config/generic/aom_config.asm +++ b/media/libaom/config/generic/aom_config.asm @@ -49,8 +49,6 @@ .equ CONFIG_STATIC_MSVCRT , 0 .equ CONFIG_SPATIAL_RESAMPLING , 1 .equ CONFIG_REALTIME_ONLY , 0 -.equ CONFIG_ONTHEFLY_BITPACKING , 0 -.equ CONFIG_ERROR_CONCEALMENT , 0 .equ CONFIG_SHARED , 0 .equ CONFIG_STATIC , 1 .equ CONFIG_SMALL , 0 @@ -63,73 +61,71 @@ .equ CONFIG_INSPECTION , 0 .equ CONFIG_DECODE_PERF_TESTS , 0 .equ CONFIG_ENCODE_PERF_TESTS , 0 +.equ CONFIG_BITSTREAM_DEBUG , 0 +.equ CONFIG_SYMBOLRATE , 0 .equ CONFIG_COEFFICIENT_RANGE_CHECKING , 0 .equ CONFIG_LOWBITDEPTH , 1 .equ CONFIG_HIGHBITDEPTH , 1 .equ CONFIG_EXPERIMENTAL , 0 .equ CONFIG_SIZE_LIMIT , 1 -.equ CONFIG_COLORSPACE_HEADERS , 0 .equ CONFIG_FP_MB_STATS , 0 .equ CONFIG_CDEF , 1 +.equ CONFIG_CDEF_SINGLEPASS , 1 .equ CONFIG_VAR_TX , 1 .equ CONFIG_RECT_TX , 1 .equ CONFIG_RECT_TX_EXT , 0 .equ CONFIG_TPL_MV , 0 .equ CONFIG_DUAL_FILTER , 1 -.equ CONFIG_CONVOLVE_ROUND , 0 +.equ CONFIG_CONVOLVE_ROUND , 1 .equ CONFIG_COMPOUND_ROUND , 0 .equ CONFIG_EXT_TX , 1 -.equ CONFIG_DPCM_INTRA , 0 .equ CONFIG_TX64X64 , 0 .equ CONFIG_EXT_INTRA , 1 .equ CONFIG_INTRA_INTERP , 0 .equ CONFIG_FILTER_INTRA , 0 -.equ CONFIG_INTRA_EDGE , 0 +.equ CONFIG_INTRA_EDGE , 1 .equ CONFIG_INTRABC , 0 -.equ CONFIG_EXT_INTER , 1 .equ CONFIG_INTERINTRA , 1 .equ CONFIG_WEDGE , 1 .equ CONFIG_COMPOUND_SEGMENT , 1 .equ CONFIG_EXT_REFS , 1 -.equ CONFIG_ALTREF2 , 0 -.equ CONFIG_SPEED_REFS , 0 -.equ CONFIG_GF_GROUPS , 0 -.equ CONFIG_FLEX_REFS , 0 .equ CONFIG_GLOBAL_MOTION , 1 .equ CONFIG_NEW_QUANT , 0 .equ CONFIG_SUPERTX , 0 .equ CONFIG_ANS , 0 -.equ CONFIG_LOOP_RESTORATION , 0 +.equ CONFIG_LOOP_RESTORATION , 1 +.equ CONFIG_STRIPED_LOOP_RESTORATION , 0 .equ CONFIG_EXT_PARTITION , 0 .equ CONFIG_EXT_PARTITION_TYPES , 0 +.equ CONFIG_EXT_PARTITION_TYPES_AB , 0 .equ CONFIG_UNPOISON_PARTITION_CTX , 0 .equ CONFIG_EXT_TILE , 0 .equ CONFIG_MOTION_VAR , 1 .equ CONFIG_NCOBMC , 0 .equ CONFIG_WARPED_MOTION , 1 .equ CONFIG_Q_ADAPT_PROBS , 0 -.equ CONFIG_BITSTREAM_DEBUG , 0 .equ CONFIG_INTER_STATS_ONLY , 0 -.equ CONFIG_ALT_INTRA , 1 -.equ CONFIG_PALETTE , 1 .equ CONFIG_PALETTE_DELTA_ENCODING , 0 .equ CONFIG_RAWBITS , 0 -.equ CONFIG_EC_SMALLMUL , 1 +.equ CONFIG_KF_CTX , 0 .equ CONFIG_PVQ , 0 .equ CONFIG_CFL , 0 .equ CONFIG_XIPHRC , 0 .equ CONFIG_DCT_ONLY , 0 +.equ CONFIG_DAALA_TX , 0 .equ CONFIG_DAALA_DCT4 , 0 .equ CONFIG_DAALA_DCT8 , 0 +.equ CONFIG_DAALA_DCT16 , 0 +.equ CONFIG_DAALA_DCT32 , 0 +.equ CONFIG_DAALA_DCT64 , 0 .equ CONFIG_CB4X4 , 1 .equ CONFIG_CHROMA_2X2 , 0 .equ CONFIG_CHROMA_SUB8X8 , 1 .equ CONFIG_FRAME_SIZE , 0 -.equ CONFIG_DELTA_Q , 1 .equ CONFIG_EXT_DELTA_Q , 1 .equ CONFIG_ADAPT_SCAN , 0 -.equ CONFIG_FILTER_7BIT , 1 .equ CONFIG_PARALLEL_DEBLOCKING , 1 +.equ CONFIG_DEBLOCK_13TAP , 0 .equ CONFIG_LOOPFILTERING_ACROSS_TILES , 1 .equ CONFIG_TEMPMV_SIGNALING , 1 .equ CONFIG_RD_DEBUG , 0 @@ -138,30 +134,47 @@ .equ CONFIG_ENTROPY_STATS , 0 .equ CONFIG_MASKED_TX , 0 .equ CONFIG_DEPENDENT_HORZTILES , 0 -.equ CONFIG_DIST_8X8 , 0 -.equ CONFIG_DAALA_DIST , 0 -.equ CONFIG_TRIPRED , 0 +.equ CONFIG_DIST_8X8 , 1 .equ CONFIG_PALETTE_THROUGHPUT , 1 .equ CONFIG_REF_ADAPT , 0 .equ CONFIG_LV_MAP , 0 +.equ CONFIG_CTX1D , 0 .equ CONFIG_TXK_SEL , 0 .equ CONFIG_MV_COMPRESS , 1 +.equ CONFIG_SEGMENT_ZEROMV , 0 .equ CONFIG_FRAME_SUPERRES , 0 .equ CONFIG_NEW_MULTISYMBOL , 0 .equ CONFIG_COMPOUND_SINGLEREF , 0 -.equ CONFIG_AOM_QM , 0 +.equ CONFIG_AOM_QM , 1 .equ CONFIG_ONE_SIDED_COMPOUND , 1 -.equ CONFIG_EXT_COMP_REFS , 0 +.equ CONFIG_EXT_COMP_REFS , 1 .equ CONFIG_SMOOTH_HV , 1 .equ CONFIG_VAR_REFS , 0 -.equ CONFIG_RECT_INTRA_PRED , 1 .equ CONFIG_LGT , 0 +.equ CONFIG_LGT_FROM_PRED , 0 .equ CONFIG_SBL_SYMBOL , 0 .equ CONFIG_NCOBMC_ADAPT_WEIGHT , 0 .equ CONFIG_BGSPRITE , 0 .equ CONFIG_VAR_TX_NO_TX_MODE , 0 .equ CONFIG_MRC_TX , 0 .equ CONFIG_LPF_DIRECT , 0 -.equ CONFIG_UV_LVL , 0 +.equ CONFIG_LOOPFILTER_LEVEL , 0 +.equ CONFIG_NO_FRAME_CONTEXT_SIGNALING , 0 +.equ CONFIG_TXMG , 1 +.equ CONFIG_MAX_TILE , 0 +.equ CONFIG_HASH_ME , 0 +.equ CONFIG_COLORSPACE_HEADERS , 0 +.equ CONFIG_MFMV , 0 +.equ CONFIG_FRAME_MARKER , 0 +.equ CONFIG_JNT_COMP , 0 +.equ CONFIG_FRAME_SIGN_BIAS , 0 +.equ CONFIG_EXT_SKIP , 0 +.equ CONFIG_OBU , 0 +.equ CONFIG_AMVR , 0 +.equ CONFIG_LPF_SB , 0 +.equ CONFIG_OPT_REF_MV , 0 +.equ CONFIG_TMV , 0 +.equ CONFIG_RESTRICT_COMPRESSED_HDR , 0 +.equ CONFIG_HORZONLY_FRAME_SUPERRES , 0 .equ CONFIG_ANALYZER , 0 .section .note.GNU-stack,"",%progbits diff --git a/media/libaom/config/generic/aom_config.h b/media/libaom/config/generic/aom_config.h index 27934329c..6757221e3 100644 --- a/media/libaom/config/generic/aom_config.h +++ b/media/libaom/config/generic/aom_config.h @@ -59,8 +59,6 @@ #define CONFIG_STATIC_MSVCRT 0 #define CONFIG_SPATIAL_RESAMPLING 1 #define CONFIG_REALTIME_ONLY 0 -#define CONFIG_ONTHEFLY_BITPACKING 0 -#define CONFIG_ERROR_CONCEALMENT 0 #define CONFIG_SHARED 0 #define CONFIG_STATIC 1 #define CONFIG_SMALL 0 @@ -73,73 +71,71 @@ #define CONFIG_INSPECTION 0 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_ENCODE_PERF_TESTS 0 +#define CONFIG_BITSTREAM_DEBUG 0 +#define CONFIG_SYMBOLRATE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_LOWBITDEPTH 1 #define CONFIG_HIGHBITDEPTH 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_SIZE_LIMIT 1 -#define CONFIG_COLORSPACE_HEADERS 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_CDEF 1 +#define CONFIG_CDEF_SINGLEPASS 1 #define CONFIG_VAR_TX 1 #define CONFIG_RECT_TX 1 #define CONFIG_RECT_TX_EXT 0 #define CONFIG_TPL_MV 0 #define CONFIG_DUAL_FILTER 1 -#define CONFIG_CONVOLVE_ROUND 0 +#define CONFIG_CONVOLVE_ROUND 1 #define CONFIG_COMPOUND_ROUND 0 #define CONFIG_EXT_TX 1 -#define CONFIG_DPCM_INTRA 0 #define CONFIG_TX64X64 0 #define CONFIG_EXT_INTRA 1 #define CONFIG_INTRA_INTERP 0 #define CONFIG_FILTER_INTRA 0 -#define CONFIG_INTRA_EDGE 0 +#define CONFIG_INTRA_EDGE 1 #define CONFIG_INTRABC 0 -#define CONFIG_EXT_INTER 1 #define CONFIG_INTERINTRA 1 #define CONFIG_WEDGE 1 #define CONFIG_COMPOUND_SEGMENT 1 #define CONFIG_EXT_REFS 1 -#define CONFIG_ALTREF2 0 -#define CONFIG_SPEED_REFS 0 -#define CONFIG_GF_GROUPS 0 -#define CONFIG_FLEX_REFS 0 #define CONFIG_GLOBAL_MOTION 1 #define CONFIG_NEW_QUANT 0 #define CONFIG_SUPERTX 0 #define CONFIG_ANS 0 -#define CONFIG_LOOP_RESTORATION 0 +#define CONFIG_LOOP_RESTORATION 1 +#define CONFIG_STRIPED_LOOP_RESTORATION 0 #define CONFIG_EXT_PARTITION 0 #define CONFIG_EXT_PARTITION_TYPES 0 +#define CONFIG_EXT_PARTITION_TYPES_AB 0 #define CONFIG_UNPOISON_PARTITION_CTX 0 #define CONFIG_EXT_TILE 0 #define CONFIG_MOTION_VAR 1 #define CONFIG_NCOBMC 0 #define CONFIG_WARPED_MOTION 1 #define CONFIG_Q_ADAPT_PROBS 0 -#define CONFIG_BITSTREAM_DEBUG 0 #define CONFIG_INTER_STATS_ONLY 0 -#define CONFIG_ALT_INTRA 1 -#define CONFIG_PALETTE 1 #define CONFIG_PALETTE_DELTA_ENCODING 0 #define CONFIG_RAWBITS 0 -#define CONFIG_EC_SMALLMUL 1 +#define CONFIG_KF_CTX 0 #define CONFIG_PVQ 0 #define CONFIG_CFL 0 #define CONFIG_XIPHRC 0 #define CONFIG_DCT_ONLY 0 +#define CONFIG_DAALA_TX 0 #define CONFIG_DAALA_DCT4 0 #define CONFIG_DAALA_DCT8 0 +#define CONFIG_DAALA_DCT16 0 +#define CONFIG_DAALA_DCT32 0 +#define CONFIG_DAALA_DCT64 0 #define CONFIG_CB4X4 1 #define CONFIG_CHROMA_2X2 0 #define CONFIG_CHROMA_SUB8X8 1 #define CONFIG_FRAME_SIZE 0 -#define CONFIG_DELTA_Q 1 #define CONFIG_EXT_DELTA_Q 1 #define CONFIG_ADAPT_SCAN 0 -#define CONFIG_FILTER_7BIT 1 #define CONFIG_PARALLEL_DEBLOCKING 1 +#define CONFIG_DEBLOCK_13TAP 0 #define CONFIG_LOOPFILTERING_ACROSS_TILES 1 #define CONFIG_TEMPMV_SIGNALING 1 #define CONFIG_RD_DEBUG 0 @@ -148,31 +144,48 @@ #define CONFIG_ENTROPY_STATS 0 #define CONFIG_MASKED_TX 0 #define CONFIG_DEPENDENT_HORZTILES 0 -#define CONFIG_DIST_8X8 0 -#define CONFIG_DAALA_DIST 0 -#define CONFIG_TRIPRED 0 +#define CONFIG_DIST_8X8 1 #define CONFIG_PALETTE_THROUGHPUT 1 #define CONFIG_REF_ADAPT 0 #define CONFIG_LV_MAP 0 +#define CONFIG_CTX1D 0 #define CONFIG_TXK_SEL 0 #define CONFIG_MV_COMPRESS 1 +#define CONFIG_SEGMENT_ZEROMV 0 #define CONFIG_FRAME_SUPERRES 0 #define CONFIG_NEW_MULTISYMBOL 0 #define CONFIG_COMPOUND_SINGLEREF 0 -#define CONFIG_AOM_QM 0 +#define CONFIG_AOM_QM 1 #define CONFIG_ONE_SIDED_COMPOUND 1 -#define CONFIG_EXT_COMP_REFS 0 +#define CONFIG_EXT_COMP_REFS 1 #define CONFIG_SMOOTH_HV 1 #define CONFIG_VAR_REFS 0 -#define CONFIG_RECT_INTRA_PRED 1 #define CONFIG_LGT 0 +#define CONFIG_LGT_FROM_PRED 0 #define CONFIG_SBL_SYMBOL 0 #define CONFIG_NCOBMC_ADAPT_WEIGHT 0 #define CONFIG_BGSPRITE 0 #define CONFIG_VAR_TX_NO_TX_MODE 0 #define CONFIG_MRC_TX 0 #define CONFIG_LPF_DIRECT 0 -#define CONFIG_UV_LVL 0 +#define CONFIG_LOOPFILTER_LEVEL 0 +#define CONFIG_NO_FRAME_CONTEXT_SIGNALING 0 +#define CONFIG_TXMG 1 +#define CONFIG_MAX_TILE 0 +#define CONFIG_HASH_ME 0 +#define CONFIG_COLORSPACE_HEADERS 0 +#define CONFIG_MFMV 0 +#define CONFIG_FRAME_MARKER 0 +#define CONFIG_JNT_COMP 0 +#define CONFIG_FRAME_SIGN_BIAS 0 +#define CONFIG_EXT_SKIP 0 +#define CONFIG_OBU 0 +#define CONFIG_AMVR 0 +#define CONFIG_LPF_SB 0 +#define CONFIG_OPT_REF_MV 0 +#define CONFIG_TMV 0 +#define CONFIG_RESTRICT_COMPRESSED_HDR 0 +#define CONFIG_HORZONLY_FRAME_SUPERRES 0 #define CONFIG_ANALYZER 0 #define DECODE_WIDTH_LIMIT 8192 #define DECODE_HEIGHT_LIMIT 4608 diff --git a/media/libaom/config/generic/aom_dsp_rtcd.h b/media/libaom/config/generic/aom_dsp_rtcd.h index 259eb3f1d..a4cb22cde 100644 --- a/media/libaom/config/generic/aom_dsp_rtcd.h +++ b/media/libaom/config/generic/aom_dsp_rtcd.h @@ -20,6 +20,9 @@ extern "C" { #endif +void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx); +#define aom_blend_a64_d32_mask aom_blend_a64_d32_mask_c + void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); #define aom_blend_a64_hmask aom_blend_a64_hmask_c @@ -44,6 +47,24 @@ void aom_comp_mask_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred, int void aom_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); #define aom_convolve8 aom_convolve8_c +void aom_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src aom_convolve8_add_src_c + +void aom_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_hip aom_convolve8_add_src_hip_c + +void aom_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_horiz aom_convolve8_add_src_horiz_c + +void aom_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_horiz_hip aom_convolve8_add_src_horiz_hip_c + +void aom_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_vert aom_convolve8_add_src_vert_c + +void aom_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_vert_hip aom_convolve8_add_src_vert_hip_c + void aom_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); #define aom_convolve8_avg aom_convolve8_avg_c @@ -1211,6 +1232,24 @@ void aom_highbd_comp_mask_upsampled_pred_c(uint16_t *comp_pred, const uint8_t *p void aom_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define aom_highbd_convolve8 aom_highbd_convolve8_c +void aom_highbd_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src aom_highbd_convolve8_add_src_c + +void aom_highbd_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_hip aom_highbd_convolve8_add_src_hip_c + +void aom_highbd_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz aom_highbd_convolve8_add_src_horiz_c + +void aom_highbd_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz_hip aom_highbd_convolve8_add_src_horiz_hip_c + +void aom_highbd_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert aom_highbd_convolve8_add_src_vert_c + +void aom_highbd_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert_hip aom_highbd_convolve8_add_src_vert_hip_c + void aom_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define aom_highbd_convolve8_avg aom_highbd_convolve8_avg_c diff --git a/media/libaom/config/generic/av1_rtcd.h b/media/libaom/config/generic/av1_rtcd.h index 85c65c4d8..c6b68534e 100644 --- a/media/libaom/config/generic/av1_rtcd.h +++ b/media/libaom/config/generic/av1_rtcd.h @@ -31,39 +31,38 @@ struct search_site_config; struct mv; union int_mv; struct yv12_buffer_config; -typedef uint16_t od_dering_in; #ifdef __cplusplus extern "C" { #endif -void aom_clpf_block_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -#define aom_clpf_block aom_clpf_block_c +void apply_selfguided_restoration_c(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); +#define apply_selfguided_restoration apply_selfguided_restoration_c -void aom_clpf_block_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -#define aom_clpf_block_hbd aom_clpf_block_hbd_c - -void aom_clpf_hblock_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -#define aom_clpf_hblock aom_clpf_hblock_c - -void aom_clpf_hblock_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -#define aom_clpf_hblock_hbd aom_clpf_hblock_hbd_c +void apply_selfguided_restoration_highbd_c(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); +#define apply_selfguided_restoration_highbd apply_selfguided_restoration_highbd_c int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); #define av1_block_error av1_block_error_c +void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params); +#define av1_convolve_2d av1_convolve_2d_c + +void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); +#define av1_convolve_2d_scale av1_convolve_2d_scale_c + void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); #define av1_convolve_horiz av1_convolve_horiz_c +void av1_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); +#define av1_convolve_rounding av1_convolve_rounding_c + void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); #define av1_convolve_vert av1_convolve_vert_c int av1_diamond_search_sad_c(struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); #define av1_diamond_search_sad av1_diamond_search_sad_c -void av1_fdct8x8_quant_c(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -#define av1_fdct8x8_quant av1_fdct8x8_quant_c - void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht16x16 av1_fht16x16_c @@ -106,46 +105,49 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, struct t void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht8x8 av1_fht8x8_c +void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength); +#define av1_filter_intra_edge av1_filter_intra_edge_c + +void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength); +#define av1_filter_intra_edge_high av1_filter_intra_edge_high_c + int av1_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); #define av1_full_range_search av1_full_range_search_c int av1_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); #define av1_full_search_sad av1_full_search_sad_c -void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type); +void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bsx, int bsy, TX_TYPE tx_type); #define av1_fwd_idtx av1_fwd_idtx_c -void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x16 av1_fwd_txfm2d_16x16_c -void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x32 av1_fwd_txfm2d_16x32_c -void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x8 av1_fwd_txfm2d_16x8_c -void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_32x16 av1_fwd_txfm2d_32x16_c -void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_32x32 av1_fwd_txfm2d_32x32_c -void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_4x4 av1_fwd_txfm2d_4x4_c -void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_4x8 av1_fwd_txfm2d_4x8_c -void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -#define av1_fwd_txfm2d_64x64 av1_fwd_txfm2d_64x64_c - -void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x16 av1_fwd_txfm2d_8x16_c -void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x4 av1_fwd_txfm2d_8x4_c -void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x8 av1_fwd_txfm2d_8x8_c void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); @@ -172,6 +174,12 @@ void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c +void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); +#define av1_highbd_convolve_2d av1_highbd_convolve_2d_c + +void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); +#define av1_highbd_convolve_2d_scale av1_highbd_convolve_2d_scale_c + void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve_avg av1_highbd_convolve_avg_c @@ -184,6 +192,9 @@ void av1_highbd_convolve_horiz_c(const uint16_t *src, int src_stride, uint16_t * void av1_highbd_convolve_init_c(void); #define av1_highbd_convolve_init av1_highbd_convolve_init_c +void av1_highbd_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); +#define av1_highbd_convolve_rounding av1_highbd_convolve_rounding_c + void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); #define av1_highbd_convolve_vert av1_highbd_convolve_vert_c @@ -229,9 +240,6 @@ void av1_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int dest void av1_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht8x8_64_add av1_highbd_iht8x8_64_add_c -void av1_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); -#define av1_highbd_quantize_b av1_highbd_quantize_b_c - void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); #define av1_highbd_quantize_fp av1_highbd_quantize_fp_c @@ -241,6 +249,12 @@ void av1_highbd_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, ui void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); #define av1_highbd_warp_affine av1_highbd_warp_affine_c +void av1_highpass_filter_c(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +#define av1_highpass_filter av1_highpass_filter_c + +void av1_highpass_filter_highbd_c(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +#define av1_highpass_filter_highbd av1_highpass_filter_highbd_c + void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); #define av1_iht16x16_256_add av1_iht16x16_256_add_c @@ -283,43 +297,40 @@ void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_iht8x8_64_add av1_iht8x8_64_add_c -void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x16 av1_inv_txfm2d_add_16x16_c -void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x32 av1_inv_txfm2d_add_16x32_c -void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x8 av1_inv_txfm2d_add_16x8_c -void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_32x16 av1_inv_txfm2d_add_32x16_c -void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_32x32 av1_inv_txfm2d_add_32x32_c -void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_4x4 av1_inv_txfm2d_add_4x4_c -void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_4x8 av1_inv_txfm2d_add_4x8_c -void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -#define av1_inv_txfm2d_add_64x64 av1_inv_txfm2d_add_64x64_c - -void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x16 av1_inv_txfm2d_add_8x16_c -void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x4 av1_inv_txfm2d_add_8x4_c -void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x8 av1_inv_txfm2d_add_8x8_c void av1_lowbd_convolve_init_c(void); #define av1_lowbd_convolve_init av1_lowbd_convolve_init_c -void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); +void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale); #define av1_quantize_b av1_quantize_b_c void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -328,9 +339,21 @@ void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_ void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); #define av1_quantize_fp_32x32 av1_quantize_fp_32x32_c +void av1_selfguided_restoration_c(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +#define av1_selfguided_restoration av1_selfguided_restoration_c + +void av1_selfguided_restoration_highbd_c(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); +#define av1_selfguided_restoration_highbd av1_selfguided_restoration_highbd_c + void av1_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); #define av1_temporal_filter_apply av1_temporal_filter_apply_c +void av1_upsample_intra_edge_c(uint8_t *p, int sz); +#define av1_upsample_intra_edge av1_upsample_intra_edge_c + +void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd); +#define av1_upsample_intra_edge_high av1_upsample_intra_edge_high_c + void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); #define av1_warp_affine av1_warp_affine_c @@ -343,20 +366,14 @@ int av1_wedge_sign_from_residuals_c(const int16_t *ds, const uint8_t *m, int N, uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d, const uint8_t *m, int N); #define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_c -double compute_cross_correlation_c(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); -#define compute_cross_correlation compute_cross_correlation_c - -void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -#define copy_4x4_16bit_to_16bit copy_4x4_16bit_to_16bit_c - -void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -#define copy_4x4_16bit_to_8bit copy_4x4_16bit_to_8bit_c +void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +#define cdef_filter_block cdef_filter_block_c -void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -#define copy_8x8_16bit_to_16bit copy_8x8_16bit_to_16bit_c +int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +#define cdef_find_dir cdef_find_dir_c -void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -#define copy_8x8_16bit_to_8bit copy_8x8_16bit_to_8bit_c +double compute_cross_correlation_c(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); +#define compute_cross_correlation compute_cross_correlation_c void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); #define copy_rect8_16bit_to_16bit copy_rect8_16bit_to_16bit_c @@ -364,15 +381,6 @@ void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); #define copy_rect8_8bit_to_16bit copy_rect8_8bit_to_16bit_c -int od_dir_find8_c(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -#define od_dir_find8 od_dir_find8_c - -void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -#define od_filter_dering_direction_4x4 od_filter_dering_direction_4x4_c - -void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -#define od_filter_dering_direction_8x8 od_filter_dering_direction_8x8_c - void aom_rtcd(void); #include "aom_config.h" diff --git a/media/libaom/config/linux/arm/aom_config.asm b/media/libaom/config/linux/arm/aom_config.asm index 70a8ca732..1da814ec1 100644 --- a/media/libaom/config/linux/arm/aom_config.asm +++ b/media/libaom/config/linux/arm/aom_config.asm @@ -49,8 +49,6 @@ .equ CONFIG_STATIC_MSVCRT , 0 .equ CONFIG_SPATIAL_RESAMPLING , 1 .equ CONFIG_REALTIME_ONLY , 1 -.equ CONFIG_ONTHEFLY_BITPACKING , 0 -.equ CONFIG_ERROR_CONCEALMENT , 0 .equ CONFIG_SHARED , 0 .equ CONFIG_STATIC , 1 .equ CONFIG_SMALL , 0 @@ -63,73 +61,71 @@ .equ CONFIG_INSPECTION , 0 .equ CONFIG_DECODE_PERF_TESTS , 0 .equ CONFIG_ENCODE_PERF_TESTS , 0 +.equ CONFIG_BITSTREAM_DEBUG , 0 +.equ CONFIG_SYMBOLRATE , 0 .equ CONFIG_COEFFICIENT_RANGE_CHECKING , 0 .equ CONFIG_LOWBITDEPTH , 1 .equ CONFIG_HIGHBITDEPTH , 1 .equ CONFIG_EXPERIMENTAL , 0 .equ CONFIG_SIZE_LIMIT , 1 -.equ CONFIG_COLORSPACE_HEADERS , 0 .equ CONFIG_FP_MB_STATS , 0 .equ CONFIG_CDEF , 1 +.equ CONFIG_CDEF_SINGLEPASS , 1 .equ CONFIG_VAR_TX , 1 .equ CONFIG_RECT_TX , 1 .equ CONFIG_RECT_TX_EXT , 0 .equ CONFIG_TPL_MV , 0 .equ CONFIG_DUAL_FILTER , 1 -.equ CONFIG_CONVOLVE_ROUND , 0 +.equ CONFIG_CONVOLVE_ROUND , 1 .equ CONFIG_COMPOUND_ROUND , 0 .equ CONFIG_EXT_TX , 1 -.equ CONFIG_DPCM_INTRA , 0 .equ CONFIG_TX64X64 , 0 .equ CONFIG_EXT_INTRA , 1 .equ CONFIG_INTRA_INTERP , 0 .equ CONFIG_FILTER_INTRA , 0 -.equ CONFIG_INTRA_EDGE , 0 +.equ CONFIG_INTRA_EDGE , 1 .equ CONFIG_INTRABC , 0 -.equ CONFIG_EXT_INTER , 1 .equ CONFIG_INTERINTRA , 1 .equ CONFIG_WEDGE , 1 .equ CONFIG_COMPOUND_SEGMENT , 1 .equ CONFIG_EXT_REFS , 1 -.equ CONFIG_ALTREF2 , 0 -.equ CONFIG_SPEED_REFS , 0 -.equ CONFIG_GF_GROUPS , 0 -.equ CONFIG_FLEX_REFS , 0 .equ CONFIG_GLOBAL_MOTION , 1 .equ CONFIG_NEW_QUANT , 0 .equ CONFIG_SUPERTX , 0 .equ CONFIG_ANS , 0 -.equ CONFIG_LOOP_RESTORATION , 0 +.equ CONFIG_LOOP_RESTORATION , 1 +.equ CONFIG_STRIPED_LOOP_RESTORATION , 0 .equ CONFIG_EXT_PARTITION , 0 .equ CONFIG_EXT_PARTITION_TYPES , 0 +.equ CONFIG_EXT_PARTITION_TYPES_AB , 0 .equ CONFIG_UNPOISON_PARTITION_CTX , 0 .equ CONFIG_EXT_TILE , 0 .equ CONFIG_MOTION_VAR , 1 .equ CONFIG_NCOBMC , 0 .equ CONFIG_WARPED_MOTION , 1 .equ CONFIG_Q_ADAPT_PROBS , 0 -.equ CONFIG_BITSTREAM_DEBUG , 0 .equ CONFIG_INTER_STATS_ONLY , 0 -.equ CONFIG_ALT_INTRA , 1 -.equ CONFIG_PALETTE , 1 .equ CONFIG_PALETTE_DELTA_ENCODING , 0 .equ CONFIG_RAWBITS , 0 -.equ CONFIG_EC_SMALLMUL , 1 +.equ CONFIG_KF_CTX , 0 .equ CONFIG_PVQ , 0 .equ CONFIG_CFL , 0 .equ CONFIG_XIPHRC , 0 .equ CONFIG_DCT_ONLY , 0 +.equ CONFIG_DAALA_TX , 0 .equ CONFIG_DAALA_DCT4 , 0 .equ CONFIG_DAALA_DCT8 , 0 +.equ CONFIG_DAALA_DCT16 , 0 +.equ CONFIG_DAALA_DCT32 , 0 +.equ CONFIG_DAALA_DCT64 , 0 .equ CONFIG_CB4X4 , 1 .equ CONFIG_CHROMA_2X2 , 0 .equ CONFIG_CHROMA_SUB8X8 , 1 .equ CONFIG_FRAME_SIZE , 0 -.equ CONFIG_DELTA_Q , 1 .equ CONFIG_EXT_DELTA_Q , 1 .equ CONFIG_ADAPT_SCAN , 0 -.equ CONFIG_FILTER_7BIT , 1 .equ CONFIG_PARALLEL_DEBLOCKING , 1 +.equ CONFIG_DEBLOCK_13TAP , 0 .equ CONFIG_LOOPFILTERING_ACROSS_TILES , 1 .equ CONFIG_TEMPMV_SIGNALING , 1 .equ CONFIG_RD_DEBUG , 0 @@ -138,30 +134,47 @@ .equ CONFIG_ENTROPY_STATS , 0 .equ CONFIG_MASKED_TX , 0 .equ CONFIG_DEPENDENT_HORZTILES , 0 -.equ CONFIG_DIST_8X8 , 0 -.equ CONFIG_DAALA_DIST , 0 -.equ CONFIG_TRIPRED , 0 +.equ CONFIG_DIST_8X8 , 1 .equ CONFIG_PALETTE_THROUGHPUT , 1 .equ CONFIG_REF_ADAPT , 0 .equ CONFIG_LV_MAP , 0 +.equ CONFIG_CTX1D , 0 .equ CONFIG_TXK_SEL , 0 .equ CONFIG_MV_COMPRESS , 1 +.equ CONFIG_SEGMENT_ZEROMV , 0 .equ CONFIG_FRAME_SUPERRES , 0 .equ CONFIG_NEW_MULTISYMBOL , 0 .equ CONFIG_COMPOUND_SINGLEREF , 0 -.equ CONFIG_AOM_QM , 0 +.equ CONFIG_AOM_QM , 1 .equ CONFIG_ONE_SIDED_COMPOUND , 1 -.equ CONFIG_EXT_COMP_REFS , 0 +.equ CONFIG_EXT_COMP_REFS , 1 .equ CONFIG_SMOOTH_HV , 1 .equ CONFIG_VAR_REFS , 0 -.equ CONFIG_RECT_INTRA_PRED , 1 .equ CONFIG_LGT , 0 +.equ CONFIG_LGT_FROM_PRED , 0 .equ CONFIG_SBL_SYMBOL , 0 .equ CONFIG_NCOBMC_ADAPT_WEIGHT , 0 .equ CONFIG_BGSPRITE , 0 .equ CONFIG_VAR_TX_NO_TX_MODE , 0 .equ CONFIG_MRC_TX , 0 .equ CONFIG_LPF_DIRECT , 0 -.equ CONFIG_UV_LVL , 0 +.equ CONFIG_LOOPFILTER_LEVEL , 0 +.equ CONFIG_NO_FRAME_CONTEXT_SIGNALING , 0 +.equ CONFIG_TXMG , 1 +.equ CONFIG_MAX_TILE , 0 +.equ CONFIG_HASH_ME , 0 +.equ CONFIG_COLORSPACE_HEADERS , 0 +.equ CONFIG_MFMV , 0 +.equ CONFIG_FRAME_MARKER , 0 +.equ CONFIG_JNT_COMP , 0 +.equ CONFIG_FRAME_SIGN_BIAS , 0 +.equ CONFIG_EXT_SKIP , 0 +.equ CONFIG_OBU , 0 +.equ CONFIG_AMVR , 0 +.equ CONFIG_LPF_SB , 0 +.equ CONFIG_OPT_REF_MV , 0 +.equ CONFIG_TMV , 0 +.equ CONFIG_RESTRICT_COMPRESSED_HDR , 0 +.equ CONFIG_HORZONLY_FRAME_SUPERRES , 0 .equ CONFIG_ANALYZER , 0 .section .note.GNU-stack,"",%progbits diff --git a/media/libaom/config/linux/arm/aom_config.h b/media/libaom/config/linux/arm/aom_config.h index 8f4cba92e..07653806c 100644 --- a/media/libaom/config/linux/arm/aom_config.h +++ b/media/libaom/config/linux/arm/aom_config.h @@ -59,8 +59,6 @@ #define CONFIG_STATIC_MSVCRT 0 #define CONFIG_SPATIAL_RESAMPLING 1 #define CONFIG_REALTIME_ONLY 1 -#define CONFIG_ONTHEFLY_BITPACKING 0 -#define CONFIG_ERROR_CONCEALMENT 0 #define CONFIG_SHARED 0 #define CONFIG_STATIC 1 #define CONFIG_SMALL 0 @@ -73,73 +71,71 @@ #define CONFIG_INSPECTION 0 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_ENCODE_PERF_TESTS 0 +#define CONFIG_BITSTREAM_DEBUG 0 +#define CONFIG_SYMBOLRATE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_LOWBITDEPTH 1 #define CONFIG_HIGHBITDEPTH 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_SIZE_LIMIT 1 -#define CONFIG_COLORSPACE_HEADERS 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_CDEF 1 +#define CONFIG_CDEF_SINGLEPASS 1 #define CONFIG_VAR_TX 1 #define CONFIG_RECT_TX 1 #define CONFIG_RECT_TX_EXT 0 #define CONFIG_TPL_MV 0 #define CONFIG_DUAL_FILTER 1 -#define CONFIG_CONVOLVE_ROUND 0 +#define CONFIG_CONVOLVE_ROUND 1 #define CONFIG_COMPOUND_ROUND 0 #define CONFIG_EXT_TX 1 -#define CONFIG_DPCM_INTRA 0 #define CONFIG_TX64X64 0 #define CONFIG_EXT_INTRA 1 #define CONFIG_INTRA_INTERP 0 #define CONFIG_FILTER_INTRA 0 -#define CONFIG_INTRA_EDGE 0 +#define CONFIG_INTRA_EDGE 1 #define CONFIG_INTRABC 0 -#define CONFIG_EXT_INTER 1 #define CONFIG_INTERINTRA 1 #define CONFIG_WEDGE 1 #define CONFIG_COMPOUND_SEGMENT 1 #define CONFIG_EXT_REFS 1 -#define CONFIG_ALTREF2 0 -#define CONFIG_SPEED_REFS 0 -#define CONFIG_GF_GROUPS 0 -#define CONFIG_FLEX_REFS 0 #define CONFIG_GLOBAL_MOTION 1 #define CONFIG_NEW_QUANT 0 #define CONFIG_SUPERTX 0 #define CONFIG_ANS 0 -#define CONFIG_LOOP_RESTORATION 0 +#define CONFIG_LOOP_RESTORATION 1 +#define CONFIG_STRIPED_LOOP_RESTORATION 0 #define CONFIG_EXT_PARTITION 0 #define CONFIG_EXT_PARTITION_TYPES 0 +#define CONFIG_EXT_PARTITION_TYPES_AB 0 #define CONFIG_UNPOISON_PARTITION_CTX 0 #define CONFIG_EXT_TILE 0 #define CONFIG_MOTION_VAR 1 #define CONFIG_NCOBMC 0 #define CONFIG_WARPED_MOTION 1 #define CONFIG_Q_ADAPT_PROBS 0 -#define CONFIG_BITSTREAM_DEBUG 0 #define CONFIG_INTER_STATS_ONLY 0 -#define CONFIG_ALT_INTRA 1 -#define CONFIG_PALETTE 1 #define CONFIG_PALETTE_DELTA_ENCODING 0 #define CONFIG_RAWBITS 0 -#define CONFIG_EC_SMALLMUL 1 +#define CONFIG_KF_CTX 0 #define CONFIG_PVQ 0 #define CONFIG_CFL 0 #define CONFIG_XIPHRC 0 #define CONFIG_DCT_ONLY 0 +#define CONFIG_DAALA_TX 0 #define CONFIG_DAALA_DCT4 0 #define CONFIG_DAALA_DCT8 0 +#define CONFIG_DAALA_DCT16 0 +#define CONFIG_DAALA_DCT32 0 +#define CONFIG_DAALA_DCT64 0 #define CONFIG_CB4X4 1 #define CONFIG_CHROMA_2X2 0 #define CONFIG_CHROMA_SUB8X8 1 #define CONFIG_FRAME_SIZE 0 -#define CONFIG_DELTA_Q 1 #define CONFIG_EXT_DELTA_Q 1 #define CONFIG_ADAPT_SCAN 0 -#define CONFIG_FILTER_7BIT 1 #define CONFIG_PARALLEL_DEBLOCKING 1 +#define CONFIG_DEBLOCK_13TAP 0 #define CONFIG_LOOPFILTERING_ACROSS_TILES 1 #define CONFIG_TEMPMV_SIGNALING 1 #define CONFIG_RD_DEBUG 0 @@ -148,31 +144,48 @@ #define CONFIG_ENTROPY_STATS 0 #define CONFIG_MASKED_TX 0 #define CONFIG_DEPENDENT_HORZTILES 0 -#define CONFIG_DIST_8X8 0 -#define CONFIG_DAALA_DIST 0 -#define CONFIG_TRIPRED 0 +#define CONFIG_DIST_8X8 1 #define CONFIG_PALETTE_THROUGHPUT 1 #define CONFIG_REF_ADAPT 0 #define CONFIG_LV_MAP 0 +#define CONFIG_CTX1D 0 #define CONFIG_TXK_SEL 0 #define CONFIG_MV_COMPRESS 1 +#define CONFIG_SEGMENT_ZEROMV 0 #define CONFIG_FRAME_SUPERRES 0 #define CONFIG_NEW_MULTISYMBOL 0 #define CONFIG_COMPOUND_SINGLEREF 0 -#define CONFIG_AOM_QM 0 +#define CONFIG_AOM_QM 1 #define CONFIG_ONE_SIDED_COMPOUND 1 -#define CONFIG_EXT_COMP_REFS 0 +#define CONFIG_EXT_COMP_REFS 1 #define CONFIG_SMOOTH_HV 1 #define CONFIG_VAR_REFS 0 -#define CONFIG_RECT_INTRA_PRED 1 #define CONFIG_LGT 0 +#define CONFIG_LGT_FROM_PRED 0 #define CONFIG_SBL_SYMBOL 0 #define CONFIG_NCOBMC_ADAPT_WEIGHT 0 #define CONFIG_BGSPRITE 0 #define CONFIG_VAR_TX_NO_TX_MODE 0 #define CONFIG_MRC_TX 0 #define CONFIG_LPF_DIRECT 0 -#define CONFIG_UV_LVL 0 +#define CONFIG_LOOPFILTER_LEVEL 0 +#define CONFIG_NO_FRAME_CONTEXT_SIGNALING 0 +#define CONFIG_TXMG 1 +#define CONFIG_MAX_TILE 0 +#define CONFIG_HASH_ME 0 +#define CONFIG_COLORSPACE_HEADERS 0 +#define CONFIG_MFMV 0 +#define CONFIG_FRAME_MARKER 0 +#define CONFIG_JNT_COMP 0 +#define CONFIG_FRAME_SIGN_BIAS 0 +#define CONFIG_EXT_SKIP 0 +#define CONFIG_OBU 0 +#define CONFIG_AMVR 0 +#define CONFIG_LPF_SB 0 +#define CONFIG_OPT_REF_MV 0 +#define CONFIG_TMV 0 +#define CONFIG_RESTRICT_COMPRESSED_HDR 0 +#define CONFIG_HORZONLY_FRAME_SUPERRES 0 #define CONFIG_ANALYZER 0 #define DECODE_WIDTH_LIMIT 8192 #define DECODE_HEIGHT_LIMIT 4608 diff --git a/media/libaom/config/linux/arm/aom_dsp_rtcd.h b/media/libaom/config/linux/arm/aom_dsp_rtcd.h index e00871066..f3c4ef490 100644 --- a/media/libaom/config/linux/arm/aom_dsp_rtcd.h +++ b/media/libaom/config/linux/arm/aom_dsp_rtcd.h @@ -20,6 +20,9 @@ extern "C" { #endif +void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx); +#define aom_blend_a64_d32_mask aom_blend_a64_d32_mask_c + void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); #define aom_blend_a64_hmask aom_blend_a64_hmask_c @@ -45,6 +48,24 @@ void aom_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptr void aom_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*aom_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src aom_convolve8_add_src_c + +void aom_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_hip aom_convolve8_add_src_hip_c + +void aom_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_horiz aom_convolve8_add_src_horiz_c + +void aom_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_horiz_hip aom_convolve8_add_src_horiz_hip_c + +void aom_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_vert aom_convolve8_add_src_vert_c + +void aom_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_vert_hip aom_convolve8_add_src_vert_hip_c + void aom_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void aom_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*aom_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -1245,6 +1266,24 @@ void aom_highbd_comp_mask_upsampled_pred_c(uint16_t *comp_pred, const uint8_t *p void aom_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define aom_highbd_convolve8 aom_highbd_convolve8_c +void aom_highbd_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src aom_highbd_convolve8_add_src_c + +void aom_highbd_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_hip aom_highbd_convolve8_add_src_hip_c + +void aom_highbd_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz aom_highbd_convolve8_add_src_horiz_c + +void aom_highbd_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz_hip aom_highbd_convolve8_add_src_horiz_hip_c + +void aom_highbd_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert aom_highbd_convolve8_add_src_vert_c + +void aom_highbd_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert_hip aom_highbd_convolve8_add_src_vert_hip_c + void aom_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define aom_highbd_convolve8_avg aom_highbd_convolve8_avg_c @@ -2244,52 +2283,40 @@ void aom_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride #define aom_iwht4x4_1_add aom_iwht4x4_1_add_c void aom_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_horizontal_4 aom_lpf_horizontal_4_c void aom_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_horizontal_4_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*aom_lpf_horizontal_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_horizontal_4_dual aom_lpf_horizontal_4_dual_c void aom_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_horizontal_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_horizontal_8 aom_lpf_horizontal_8_c void aom_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_horizontal_8_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*aom_lpf_horizontal_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_horizontal_8_dual aom_lpf_horizontal_8_dual_c void aom_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_horizontal_edge_16 aom_lpf_horizontal_edge_16_c void aom_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_horizontal_edge_8 aom_lpf_horizontal_edge_8_c void aom_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_vertical_16_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_vertical_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_vertical_16 aom_lpf_vertical_16_c void aom_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_vertical_16_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_vertical_16_dual)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_vertical_16_dual aom_lpf_vertical_16_dual_c void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_vertical_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_vertical_4 aom_lpf_vertical_4_c void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_vertical_4_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*aom_lpf_vertical_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_vertical_4_dual aom_lpf_vertical_4_dual_c void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_vertical_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_vertical_8 aom_lpf_vertical_8_c void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_vertical_8_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*aom_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_vertical_8_dual aom_lpf_vertical_8_dual_c unsigned int aom_masked_sad16x16_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); #define aom_masked_sad16x16 aom_masked_sad16x16_c @@ -3126,30 +3153,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_NEON) aom_int_pro_col = aom_int_pro_col_neon; aom_int_pro_row = aom_int_pro_row_c; if (flags & HAS_NEON) aom_int_pro_row = aom_int_pro_row_neon; - aom_lpf_horizontal_4 = aom_lpf_horizontal_4_c; - if (flags & HAS_NEON) aom_lpf_horizontal_4 = aom_lpf_horizontal_4_neon; - aom_lpf_horizontal_4_dual = aom_lpf_horizontal_4_dual_c; - if (flags & HAS_NEON) aom_lpf_horizontal_4_dual = aom_lpf_horizontal_4_dual_neon; - aom_lpf_horizontal_8 = aom_lpf_horizontal_8_c; - if (flags & HAS_NEON) aom_lpf_horizontal_8 = aom_lpf_horizontal_8_neon; - aom_lpf_horizontal_8_dual = aom_lpf_horizontal_8_dual_c; - if (flags & HAS_NEON) aom_lpf_horizontal_8_dual = aom_lpf_horizontal_8_dual_neon; - aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_c; - if (flags & HAS_NEON) aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_neon; - aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_c; - if (flags & HAS_NEON) aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_neon; - aom_lpf_vertical_16 = aom_lpf_vertical_16_c; - if (flags & HAS_NEON) aom_lpf_vertical_16 = aom_lpf_vertical_16_neon; - aom_lpf_vertical_16_dual = aom_lpf_vertical_16_dual_c; - if (flags & HAS_NEON) aom_lpf_vertical_16_dual = aom_lpf_vertical_16_dual_neon; - aom_lpf_vertical_4 = aom_lpf_vertical_4_c; - if (flags & HAS_NEON) aom_lpf_vertical_4 = aom_lpf_vertical_4_neon; - aom_lpf_vertical_4_dual = aom_lpf_vertical_4_dual_c; - if (flags & HAS_NEON) aom_lpf_vertical_4_dual = aom_lpf_vertical_4_dual_neon; - aom_lpf_vertical_8 = aom_lpf_vertical_8_c; - if (flags & HAS_NEON) aom_lpf_vertical_8 = aom_lpf_vertical_8_neon; - aom_lpf_vertical_8_dual = aom_lpf_vertical_8_dual_c; - if (flags & HAS_NEON) aom_lpf_vertical_8_dual = aom_lpf_vertical_8_dual_neon; aom_minmax_8x8 = aom_minmax_8x8_c; if (flags & HAS_NEON) aom_minmax_8x8 = aom_minmax_8x8_neon; aom_mse16x16 = aom_mse16x16_c; diff --git a/media/libaom/config/linux/arm/av1_rtcd.h b/media/libaom/config/linux/arm/av1_rtcd.h index 7e0fc1f78..c415e6dd9 100644 --- a/media/libaom/config/linux/arm/av1_rtcd.h +++ b/media/libaom/config/linux/arm/av1_rtcd.h @@ -31,43 +31,38 @@ struct search_site_config; struct mv; union int_mv; struct yv12_buffer_config; -typedef uint16_t od_dering_in; #ifdef __cplusplus extern "C" { #endif -void aom_clpf_block_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_neon(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_block)(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void apply_selfguided_restoration_c(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); +#define apply_selfguided_restoration apply_selfguided_restoration_c -void aom_clpf_block_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_neon(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_block_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_hblock_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_neon(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_hblock)(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_hblock_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_neon(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_hblock_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void apply_selfguided_restoration_highbd_c(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); +#define apply_selfguided_restoration_highbd apply_selfguided_restoration_highbd_c int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); #define av1_block_error av1_block_error_c +void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params); +#define av1_convolve_2d av1_convolve_2d_c + +void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); +#define av1_convolve_2d_scale av1_convolve_2d_scale_c + void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); #define av1_convolve_horiz av1_convolve_horiz_c +void av1_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); +#define av1_convolve_rounding av1_convolve_rounding_c + void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); #define av1_convolve_vert av1_convolve_vert_c int av1_diamond_search_sad_c(struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); #define av1_diamond_search_sad av1_diamond_search_sad_c -void av1_fdct8x8_quant_c(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -#define av1_fdct8x8_quant av1_fdct8x8_quant_c - void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht16x16 av1_fht16x16_c @@ -110,46 +105,49 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, struct t void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht8x8 av1_fht8x8_c +void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength); +#define av1_filter_intra_edge av1_filter_intra_edge_c + +void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength); +#define av1_filter_intra_edge_high av1_filter_intra_edge_high_c + int av1_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); #define av1_full_range_search av1_full_range_search_c int av1_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); #define av1_full_search_sad av1_full_search_sad_c -void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type); +void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bsx, int bsy, TX_TYPE tx_type); #define av1_fwd_idtx av1_fwd_idtx_c -void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x16 av1_fwd_txfm2d_16x16_c -void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x32 av1_fwd_txfm2d_16x32_c -void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x8 av1_fwd_txfm2d_16x8_c -void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_32x16 av1_fwd_txfm2d_32x16_c -void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_32x32 av1_fwd_txfm2d_32x32_c -void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_4x4 av1_fwd_txfm2d_4x4_c -void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_4x8 av1_fwd_txfm2d_4x8_c -void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -#define av1_fwd_txfm2d_64x64 av1_fwd_txfm2d_64x64_c - -void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x16 av1_fwd_txfm2d_8x16_c -void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x4 av1_fwd_txfm2d_8x4_c -void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x8 av1_fwd_txfm2d_8x8_c void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); @@ -176,6 +174,12 @@ void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c +void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); +#define av1_highbd_convolve_2d av1_highbd_convolve_2d_c + +void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); +#define av1_highbd_convolve_2d_scale av1_highbd_convolve_2d_scale_c + void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve_avg av1_highbd_convolve_avg_c @@ -188,6 +192,9 @@ void av1_highbd_convolve_horiz_c(const uint16_t *src, int src_stride, uint16_t * void av1_highbd_convolve_init_c(void); #define av1_highbd_convolve_init av1_highbd_convolve_init_c +void av1_highbd_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); +#define av1_highbd_convolve_rounding av1_highbd_convolve_rounding_c + void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); #define av1_highbd_convolve_vert av1_highbd_convolve_vert_c @@ -233,9 +240,6 @@ void av1_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int dest void av1_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht8x8_64_add av1_highbd_iht8x8_64_add_c -void av1_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); -#define av1_highbd_quantize_b av1_highbd_quantize_b_c - void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); #define av1_highbd_quantize_fp av1_highbd_quantize_fp_c @@ -245,6 +249,12 @@ void av1_highbd_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, ui void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); #define av1_highbd_warp_affine av1_highbd_warp_affine_c +void av1_highpass_filter_c(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +#define av1_highpass_filter av1_highpass_filter_c + +void av1_highpass_filter_highbd_c(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +#define av1_highpass_filter_highbd av1_highpass_filter_highbd_c + void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); #define av1_iht16x16_256_add av1_iht16x16_256_add_c @@ -287,43 +297,40 @@ void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_iht8x8_64_add av1_iht8x8_64_add_c -void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x16 av1_inv_txfm2d_add_16x16_c -void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x32 av1_inv_txfm2d_add_16x32_c -void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x8 av1_inv_txfm2d_add_16x8_c -void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_32x16 av1_inv_txfm2d_add_32x16_c -void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_32x32 av1_inv_txfm2d_add_32x32_c -void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_4x4 av1_inv_txfm2d_add_4x4_c -void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_4x8 av1_inv_txfm2d_add_4x8_c -void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -#define av1_inv_txfm2d_add_64x64 av1_inv_txfm2d_add_64x64_c - -void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x16 av1_inv_txfm2d_add_8x16_c -void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x4 av1_inv_txfm2d_add_8x4_c -void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x8 av1_inv_txfm2d_add_8x8_c void av1_lowbd_convolve_init_c(void); #define av1_lowbd_convolve_init av1_lowbd_convolve_init_c -void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); +void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale); #define av1_quantize_b av1_quantize_b_c void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -332,9 +339,21 @@ void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_ void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); #define av1_quantize_fp_32x32 av1_quantize_fp_32x32_c +void av1_selfguided_restoration_c(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +#define av1_selfguided_restoration av1_selfguided_restoration_c + +void av1_selfguided_restoration_highbd_c(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); +#define av1_selfguided_restoration_highbd av1_selfguided_restoration_highbd_c + void av1_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); #define av1_temporal_filter_apply av1_temporal_filter_apply_c +void av1_upsample_intra_edge_c(uint8_t *p, int sz); +#define av1_upsample_intra_edge av1_upsample_intra_edge_c + +void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd); +#define av1_upsample_intra_edge_high av1_upsample_intra_edge_high_c + void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); #define av1_warp_affine av1_warp_affine_c @@ -347,24 +366,16 @@ int av1_wedge_sign_from_residuals_c(const int16_t *ds, const uint8_t *m, int N, uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d, const uint8_t *m, int N); #define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_c -double compute_cross_correlation_c(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); -#define compute_cross_correlation compute_cross_correlation_c - -void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_neon(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_4x4_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_neon(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_4x4_16bit_to_8bit)(uint8_t *dst, int dstride, const uint16_t *src, int sstride); +void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_neon(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +RTCD_EXTERN void (*cdef_filter_block)(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); -void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_neon(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_8x8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride); +int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_neon(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +RTCD_EXTERN int (*cdef_find_dir)(const uint16_t *img, int stride, int32_t *var, int coeff_shift); -void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_neon(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_8x8_16bit_to_8bit)(uint8_t *dst, int dstride, const uint16_t *src, int sstride); +double compute_cross_correlation_c(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); +#define compute_cross_correlation compute_cross_correlation_c void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_neon(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); @@ -374,18 +385,6 @@ void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src, void copy_rect8_8bit_to_16bit_neon(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); RTCD_EXTERN void (*copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); -int od_dir_find8_c(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_neon(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -RTCD_EXTERN int (*od_dir_find8)(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); - -void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_neon(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -RTCD_EXTERN void (*od_filter_dering_direction_4x4)(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); - -void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_neon(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -RTCD_EXTERN void (*od_filter_dering_direction_8x8)(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); - void aom_rtcd(void); #include "aom_config.h" @@ -398,32 +397,14 @@ static void setup_rtcd_internal(void) (void)flags; - aom_clpf_block = aom_clpf_block_c; - if (flags & HAS_NEON) aom_clpf_block = aom_clpf_block_neon; - aom_clpf_block_hbd = aom_clpf_block_hbd_c; - if (flags & HAS_NEON) aom_clpf_block_hbd = aom_clpf_block_hbd_neon; - aom_clpf_hblock = aom_clpf_hblock_c; - if (flags & HAS_NEON) aom_clpf_hblock = aom_clpf_hblock_neon; - aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_c; - if (flags & HAS_NEON) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_neon; - copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_c; - if (flags & HAS_NEON) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_neon; - copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_c; - if (flags & HAS_NEON) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_neon; - copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_c; - if (flags & HAS_NEON) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_neon; - copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_c; - if (flags & HAS_NEON) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_neon; + cdef_filter_block = cdef_filter_block_c; + if (flags & HAS_NEON) cdef_filter_block = cdef_filter_block_neon; + cdef_find_dir = cdef_find_dir_c; + if (flags & HAS_NEON) cdef_find_dir = cdef_find_dir_neon; copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_c; if (flags & HAS_NEON) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_neon; copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_c; if (flags & HAS_NEON) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_neon; - od_dir_find8 = od_dir_find8_c; - if (flags & HAS_NEON) od_dir_find8 = od_dir_find8_neon; - od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_c; - if (flags & HAS_NEON) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_neon; - od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_c; - if (flags & HAS_NEON) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_neon; } #endif diff --git a/media/libaom/config/linux/ia32/aom_config.asm b/media/libaom/config/linux/ia32/aom_config.asm index dafa5d818..161c1fee3 100644 --- a/media/libaom/config/linux/ia32/aom_config.asm +++ b/media/libaom/config/linux/ia32/aom_config.asm @@ -46,8 +46,6 @@ CONFIG_AV1 equ 1 CONFIG_STATIC_MSVCRT equ 0 CONFIG_SPATIAL_RESAMPLING equ 1 CONFIG_REALTIME_ONLY equ 0 -CONFIG_ONTHEFLY_BITPACKING equ 0 -CONFIG_ERROR_CONCEALMENT equ 0 CONFIG_SHARED equ 0 CONFIG_STATIC equ 1 CONFIG_SMALL equ 0 @@ -60,73 +58,71 @@ CONFIG_ACCOUNTING equ 0 CONFIG_INSPECTION equ 0 CONFIG_DECODE_PERF_TESTS equ 0 CONFIG_ENCODE_PERF_TESTS equ 0 +CONFIG_BITSTREAM_DEBUG equ 0 +CONFIG_SYMBOLRATE equ 0 CONFIG_COEFFICIENT_RANGE_CHECKING equ 0 CONFIG_LOWBITDEPTH equ 1 CONFIG_HIGHBITDEPTH equ 1 CONFIG_EXPERIMENTAL equ 0 CONFIG_SIZE_LIMIT equ 1 -CONFIG_COLORSPACE_HEADERS equ 0 CONFIG_FP_MB_STATS equ 0 CONFIG_CDEF equ 1 +CONFIG_CDEF_SINGLEPASS equ 1 CONFIG_VAR_TX equ 1 CONFIG_RECT_TX equ 1 CONFIG_RECT_TX_EXT equ 0 CONFIG_TPL_MV equ 0 CONFIG_DUAL_FILTER equ 1 -CONFIG_CONVOLVE_ROUND equ 0 +CONFIG_CONVOLVE_ROUND equ 1 CONFIG_COMPOUND_ROUND equ 0 CONFIG_EXT_TX equ 1 -CONFIG_DPCM_INTRA equ 0 CONFIG_TX64X64 equ 0 CONFIG_EXT_INTRA equ 1 CONFIG_INTRA_INTERP equ 0 CONFIG_FILTER_INTRA equ 0 -CONFIG_INTRA_EDGE equ 0 +CONFIG_INTRA_EDGE equ 1 CONFIG_INTRABC equ 0 -CONFIG_EXT_INTER equ 1 CONFIG_INTERINTRA equ 1 CONFIG_WEDGE equ 1 CONFIG_COMPOUND_SEGMENT equ 1 CONFIG_EXT_REFS equ 1 -CONFIG_ALTREF2 equ 0 -CONFIG_SPEED_REFS equ 0 -CONFIG_GF_GROUPS equ 0 -CONFIG_FLEX_REFS equ 0 CONFIG_GLOBAL_MOTION equ 1 CONFIG_NEW_QUANT equ 0 CONFIG_SUPERTX equ 0 CONFIG_ANS equ 0 -CONFIG_LOOP_RESTORATION equ 0 +CONFIG_LOOP_RESTORATION equ 1 +CONFIG_STRIPED_LOOP_RESTORATION equ 0 CONFIG_EXT_PARTITION equ 0 CONFIG_EXT_PARTITION_TYPES equ 0 +CONFIG_EXT_PARTITION_TYPES_AB equ 0 CONFIG_UNPOISON_PARTITION_CTX equ 0 CONFIG_EXT_TILE equ 0 CONFIG_MOTION_VAR equ 1 CONFIG_NCOBMC equ 0 CONFIG_WARPED_MOTION equ 1 CONFIG_Q_ADAPT_PROBS equ 0 -CONFIG_BITSTREAM_DEBUG equ 0 CONFIG_INTER_STATS_ONLY equ 0 -CONFIG_ALT_INTRA equ 1 -CONFIG_PALETTE equ 1 CONFIG_PALETTE_DELTA_ENCODING equ 0 CONFIG_RAWBITS equ 0 -CONFIG_EC_SMALLMUL equ 1 +CONFIG_KF_CTX equ 0 CONFIG_PVQ equ 0 CONFIG_CFL equ 0 CONFIG_XIPHRC equ 0 CONFIG_DCT_ONLY equ 0 +CONFIG_DAALA_TX equ 0 CONFIG_DAALA_DCT4 equ 0 CONFIG_DAALA_DCT8 equ 0 +CONFIG_DAALA_DCT16 equ 0 +CONFIG_DAALA_DCT32 equ 0 +CONFIG_DAALA_DCT64 equ 0 CONFIG_CB4X4 equ 1 CONFIG_CHROMA_2X2 equ 0 CONFIG_CHROMA_SUB8X8 equ 1 CONFIG_FRAME_SIZE equ 0 -CONFIG_DELTA_Q equ 1 CONFIG_EXT_DELTA_Q equ 1 CONFIG_ADAPT_SCAN equ 0 -CONFIG_FILTER_7BIT equ 1 CONFIG_PARALLEL_DEBLOCKING equ 1 +CONFIG_DEBLOCK_13TAP equ 0 CONFIG_LOOPFILTERING_ACROSS_TILES equ 1 CONFIG_TEMPMV_SIGNALING equ 1 CONFIG_RD_DEBUG equ 0 @@ -135,29 +131,46 @@ CONFIG_COEF_INTERLEAVE equ 0 CONFIG_ENTROPY_STATS equ 0 CONFIG_MASKED_TX equ 0 CONFIG_DEPENDENT_HORZTILES equ 0 -CONFIG_DIST_8X8 equ 0 -CONFIG_DAALA_DIST equ 0 -CONFIG_TRIPRED equ 0 +CONFIG_DIST_8X8 equ 1 CONFIG_PALETTE_THROUGHPUT equ 1 CONFIG_REF_ADAPT equ 0 CONFIG_LV_MAP equ 0 +CONFIG_CTX1D equ 0 CONFIG_TXK_SEL equ 0 CONFIG_MV_COMPRESS equ 1 +CONFIG_SEGMENT_ZEROMV equ 0 CONFIG_FRAME_SUPERRES equ 0 CONFIG_NEW_MULTISYMBOL equ 0 CONFIG_COMPOUND_SINGLEREF equ 0 -CONFIG_AOM_QM equ 0 +CONFIG_AOM_QM equ 1 CONFIG_ONE_SIDED_COMPOUND equ 1 -CONFIG_EXT_COMP_REFS equ 0 +CONFIG_EXT_COMP_REFS equ 1 CONFIG_SMOOTH_HV equ 1 CONFIG_VAR_REFS equ 0 -CONFIG_RECT_INTRA_PRED equ 1 CONFIG_LGT equ 0 +CONFIG_LGT_FROM_PRED equ 0 CONFIG_SBL_SYMBOL equ 0 CONFIG_NCOBMC_ADAPT_WEIGHT equ 0 CONFIG_BGSPRITE equ 0 CONFIG_VAR_TX_NO_TX_MODE equ 0 CONFIG_MRC_TX equ 0 CONFIG_LPF_DIRECT equ 0 -CONFIG_UV_LVL equ 0 +CONFIG_LOOPFILTER_LEVEL equ 0 +CONFIG_NO_FRAME_CONTEXT_SIGNALING equ 0 +CONFIG_TXMG equ 1 +CONFIG_MAX_TILE equ 0 +CONFIG_HASH_ME equ 0 +CONFIG_COLORSPACE_HEADERS equ 0 +CONFIG_MFMV equ 0 +CONFIG_FRAME_MARKER equ 0 +CONFIG_JNT_COMP equ 0 +CONFIG_FRAME_SIGN_BIAS equ 0 +CONFIG_EXT_SKIP equ 0 +CONFIG_OBU equ 0 +CONFIG_AMVR equ 0 +CONFIG_LPF_SB equ 0 +CONFIG_OPT_REF_MV equ 0 +CONFIG_TMV equ 0 +CONFIG_RESTRICT_COMPRESSED_HDR equ 0 +CONFIG_HORZONLY_FRAME_SUPERRES equ 0 CONFIG_ANALYZER equ 0 diff --git a/media/libaom/config/linux/ia32/aom_config.h b/media/libaom/config/linux/ia32/aom_config.h index ebf569adb..4d3f5e3bb 100644 --- a/media/libaom/config/linux/ia32/aom_config.h +++ b/media/libaom/config/linux/ia32/aom_config.h @@ -59,8 +59,6 @@ #define CONFIG_STATIC_MSVCRT 0 #define CONFIG_SPATIAL_RESAMPLING 1 #define CONFIG_REALTIME_ONLY 0 -#define CONFIG_ONTHEFLY_BITPACKING 0 -#define CONFIG_ERROR_CONCEALMENT 0 #define CONFIG_SHARED 0 #define CONFIG_STATIC 1 #define CONFIG_SMALL 0 @@ -73,73 +71,71 @@ #define CONFIG_INSPECTION 0 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_ENCODE_PERF_TESTS 0 +#define CONFIG_BITSTREAM_DEBUG 0 +#define CONFIG_SYMBOLRATE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_LOWBITDEPTH 1 #define CONFIG_HIGHBITDEPTH 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_SIZE_LIMIT 1 -#define CONFIG_COLORSPACE_HEADERS 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_CDEF 1 +#define CONFIG_CDEF_SINGLEPASS 1 #define CONFIG_VAR_TX 1 #define CONFIG_RECT_TX 1 #define CONFIG_RECT_TX_EXT 0 #define CONFIG_TPL_MV 0 #define CONFIG_DUAL_FILTER 1 -#define CONFIG_CONVOLVE_ROUND 0 +#define CONFIG_CONVOLVE_ROUND 1 #define CONFIG_COMPOUND_ROUND 0 #define CONFIG_EXT_TX 1 -#define CONFIG_DPCM_INTRA 0 #define CONFIG_TX64X64 0 #define CONFIG_EXT_INTRA 1 #define CONFIG_INTRA_INTERP 0 #define CONFIG_FILTER_INTRA 0 -#define CONFIG_INTRA_EDGE 0 +#define CONFIG_INTRA_EDGE 1 #define CONFIG_INTRABC 0 -#define CONFIG_EXT_INTER 1 #define CONFIG_INTERINTRA 1 #define CONFIG_WEDGE 1 #define CONFIG_COMPOUND_SEGMENT 1 #define CONFIG_EXT_REFS 1 -#define CONFIG_ALTREF2 0 -#define CONFIG_SPEED_REFS 0 -#define CONFIG_GF_GROUPS 0 -#define CONFIG_FLEX_REFS 0 #define CONFIG_GLOBAL_MOTION 1 #define CONFIG_NEW_QUANT 0 #define CONFIG_SUPERTX 0 #define CONFIG_ANS 0 -#define CONFIG_LOOP_RESTORATION 0 +#define CONFIG_LOOP_RESTORATION 1 +#define CONFIG_STRIPED_LOOP_RESTORATION 0 #define CONFIG_EXT_PARTITION 0 #define CONFIG_EXT_PARTITION_TYPES 0 +#define CONFIG_EXT_PARTITION_TYPES_AB 0 #define CONFIG_UNPOISON_PARTITION_CTX 0 #define CONFIG_EXT_TILE 0 #define CONFIG_MOTION_VAR 1 #define CONFIG_NCOBMC 0 #define CONFIG_WARPED_MOTION 1 #define CONFIG_Q_ADAPT_PROBS 0 -#define CONFIG_BITSTREAM_DEBUG 0 #define CONFIG_INTER_STATS_ONLY 0 -#define CONFIG_ALT_INTRA 1 -#define CONFIG_PALETTE 1 #define CONFIG_PALETTE_DELTA_ENCODING 0 #define CONFIG_RAWBITS 0 -#define CONFIG_EC_SMALLMUL 1 +#define CONFIG_KF_CTX 0 #define CONFIG_PVQ 0 #define CONFIG_CFL 0 #define CONFIG_XIPHRC 0 #define CONFIG_DCT_ONLY 0 +#define CONFIG_DAALA_TX 0 #define CONFIG_DAALA_DCT4 0 #define CONFIG_DAALA_DCT8 0 +#define CONFIG_DAALA_DCT16 0 +#define CONFIG_DAALA_DCT32 0 +#define CONFIG_DAALA_DCT64 0 #define CONFIG_CB4X4 1 #define CONFIG_CHROMA_2X2 0 #define CONFIG_CHROMA_SUB8X8 1 #define CONFIG_FRAME_SIZE 0 -#define CONFIG_DELTA_Q 1 #define CONFIG_EXT_DELTA_Q 1 #define CONFIG_ADAPT_SCAN 0 -#define CONFIG_FILTER_7BIT 1 #define CONFIG_PARALLEL_DEBLOCKING 1 +#define CONFIG_DEBLOCK_13TAP 0 #define CONFIG_LOOPFILTERING_ACROSS_TILES 1 #define CONFIG_TEMPMV_SIGNALING 1 #define CONFIG_RD_DEBUG 0 @@ -148,31 +144,48 @@ #define CONFIG_ENTROPY_STATS 0 #define CONFIG_MASKED_TX 0 #define CONFIG_DEPENDENT_HORZTILES 0 -#define CONFIG_DIST_8X8 0 -#define CONFIG_DAALA_DIST 0 -#define CONFIG_TRIPRED 0 +#define CONFIG_DIST_8X8 1 #define CONFIG_PALETTE_THROUGHPUT 1 #define CONFIG_REF_ADAPT 0 #define CONFIG_LV_MAP 0 +#define CONFIG_CTX1D 0 #define CONFIG_TXK_SEL 0 #define CONFIG_MV_COMPRESS 1 +#define CONFIG_SEGMENT_ZEROMV 0 #define CONFIG_FRAME_SUPERRES 0 #define CONFIG_NEW_MULTISYMBOL 0 #define CONFIG_COMPOUND_SINGLEREF 0 -#define CONFIG_AOM_QM 0 +#define CONFIG_AOM_QM 1 #define CONFIG_ONE_SIDED_COMPOUND 1 -#define CONFIG_EXT_COMP_REFS 0 +#define CONFIG_EXT_COMP_REFS 1 #define CONFIG_SMOOTH_HV 1 #define CONFIG_VAR_REFS 0 -#define CONFIG_RECT_INTRA_PRED 1 #define CONFIG_LGT 0 +#define CONFIG_LGT_FROM_PRED 0 #define CONFIG_SBL_SYMBOL 0 #define CONFIG_NCOBMC_ADAPT_WEIGHT 0 #define CONFIG_BGSPRITE 0 #define CONFIG_VAR_TX_NO_TX_MODE 0 #define CONFIG_MRC_TX 0 #define CONFIG_LPF_DIRECT 0 -#define CONFIG_UV_LVL 0 +#define CONFIG_LOOPFILTER_LEVEL 0 +#define CONFIG_NO_FRAME_CONTEXT_SIGNALING 0 +#define CONFIG_TXMG 1 +#define CONFIG_MAX_TILE 0 +#define CONFIG_HASH_ME 0 +#define CONFIG_COLORSPACE_HEADERS 0 +#define CONFIG_MFMV 0 +#define CONFIG_FRAME_MARKER 0 +#define CONFIG_JNT_COMP 0 +#define CONFIG_FRAME_SIGN_BIAS 0 +#define CONFIG_EXT_SKIP 0 +#define CONFIG_OBU 0 +#define CONFIG_AMVR 0 +#define CONFIG_LPF_SB 0 +#define CONFIG_OPT_REF_MV 0 +#define CONFIG_TMV 0 +#define CONFIG_RESTRICT_COMPRESSED_HDR 0 +#define CONFIG_HORZONLY_FRAME_SUPERRES 0 #define CONFIG_ANALYZER 0 #define DECODE_WIDTH_LIMIT 8192 #define DECODE_HEIGHT_LIMIT 4608 diff --git a/media/libaom/config/linux/ia32/aom_dsp_rtcd.h b/media/libaom/config/linux/ia32/aom_dsp_rtcd.h index 964ccb4d2..f6529fafb 100644 --- a/media/libaom/config/linux/ia32/aom_dsp_rtcd.h +++ b/media/libaom/config/linux/ia32/aom_dsp_rtcd.h @@ -20,6 +20,9 @@ extern "C" { #endif +void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx); +#define aom_blend_a64_d32_mask aom_blend_a64_d32_mask_c + void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); void aom_blend_a64_hmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); RTCD_EXTERN void (*aom_blend_a64_hmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); @@ -51,6 +54,28 @@ void aom_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, void aom_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*aom_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_hip_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src_hip)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_horiz_hip aom_convolve8_add_src_horiz_hip_c + +void aom_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_vert_hip aom_convolve8_add_src_vert_hip_c + void aom_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void aom_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void aom_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -312,19 +337,24 @@ void aom_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uin RTCD_EXTERN void (*aom_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_c +void aom_dc_128_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_c +void aom_dc_128_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c void aom_dc_128_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_32x16 aom_dc_128_predictor_32x16_c +void aom_dc_128_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_128_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_128_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -332,13 +362,16 @@ void aom_dc_128_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8 RTCD_EXTERN void (*aom_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_c +void aom_dc_128_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_c +void aom_dc_128_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_8x4 aom_dc_128_predictor_8x4_c +void aom_dc_128_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -349,19 +382,24 @@ void aom_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const ui RTCD_EXTERN void (*aom_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_c +void aom_dc_left_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_c +void aom_dc_left_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c void aom_dc_left_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_32x16 aom_dc_left_predictor_32x16_c +void aom_dc_left_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_left_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_left_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -369,13 +407,16 @@ void aom_dc_left_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint RTCD_EXTERN void (*aom_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_c +void aom_dc_left_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_c +void aom_dc_left_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_8x4 aom_dc_left_predictor_8x4_c +void aom_dc_left_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -386,19 +427,24 @@ void aom_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t RTCD_EXTERN void (*aom_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_16x32 aom_dc_predictor_16x32_c +void aom_dc_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_16x8 aom_dc_predictor_16x8_c +void aom_dc_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_32x16 aom_dc_predictor_32x16_c +void aom_dc_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -406,13 +452,16 @@ void aom_dc_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t * RTCD_EXTERN void (*aom_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_4x8 aom_dc_predictor_4x8_c +void aom_dc_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_8x16 aom_dc_predictor_8x16_c +void aom_dc_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_8x4 aom_dc_predictor_8x4_c +void aom_dc_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -423,19 +472,24 @@ void aom_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uin RTCD_EXTERN void (*aom_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_c +void aom_dc_top_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_c +void aom_dc_top_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c void aom_dc_top_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_32x16 aom_dc_top_predictor_32x16_c +void aom_dc_top_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_top_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_top_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -443,13 +497,16 @@ void aom_dc_top_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8 RTCD_EXTERN void (*aom_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_c +void aom_dc_top_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_c +void aom_dc_top_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_8x4 aom_dc_top_predictor_8x4_c +void aom_dc_top_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -502,19 +559,23 @@ void aom_h_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t RTCD_EXTERN void (*aom_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_16x32 aom_h_predictor_16x32_c +void aom_h_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_16x8 aom_h_predictor_16x8_c +void aom_h_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_h_predictor_2x2 aom_h_predictor_2x2_c void aom_h_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_32x16 aom_h_predictor_32x16_c +void aom_h_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_h_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -522,13 +583,16 @@ void aom_h_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a RTCD_EXTERN void (*aom_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_4x8 aom_h_predictor_4x8_c +void aom_h_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_8x16 aom_h_predictor_8x16_c +void aom_h_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_8x4 aom_h_predictor_8x4_c +void aom_h_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -1452,6 +1516,25 @@ void aom_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *d void aom_highbd_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); RTCD_EXTERN void (*aom_highbd_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src aom_highbd_convolve8_add_src_c + +void aom_highbd_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_hip_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +RTCD_EXTERN void (*aom_highbd_convolve8_add_src_hip)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); + +void aom_highbd_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz aom_highbd_convolve8_add_src_horiz_c + +void aom_highbd_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz_hip aom_highbd_convolve8_add_src_horiz_hip_c + +void aom_highbd_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert aom_highbd_convolve8_add_src_vert_c + +void aom_highbd_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert_hip aom_highbd_convolve8_add_src_vert_hip_c + void aom_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_convolve8_avg_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); RTCD_EXTERN void (*aom_highbd_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); @@ -1483,7 +1566,8 @@ void aom_highbd_convolve_copy_avx2(const uint8_t *src, ptrdiff_t src_stride, uin RTCD_EXTERN void (*aom_highbd_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_d117_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_16x16 aom_highbd_d117_predictor_16x16_c +void aom_highbd_d117_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d117_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d117_predictor_16x32 aom_highbd_d117_predictor_16x32_c @@ -1498,10 +1582,12 @@ void aom_highbd_d117_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d117_predictor_32x16 aom_highbd_d117_predictor_32x16_c void aom_highbd_d117_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_32x32 aom_highbd_d117_predictor_32x32_c +void aom_highbd_d117_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d117_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_4x4 aom_highbd_d117_predictor_4x4_c +void aom_highbd_d117_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d117_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d117_predictor_4x8 aom_highbd_d117_predictor_4x8_c @@ -1513,10 +1599,12 @@ void aom_highbd_d117_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d117_predictor_8x4 aom_highbd_d117_predictor_8x4_c void aom_highbd_d117_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_8x8 aom_highbd_d117_predictor_8x8_c +void aom_highbd_d117_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_16x16 aom_highbd_d135_predictor_16x16_c +void aom_highbd_d135_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d135_predictor_16x32 aom_highbd_d135_predictor_16x32_c @@ -1531,10 +1619,12 @@ void aom_highbd_d135_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d135_predictor_32x16 aom_highbd_d135_predictor_32x16_c void aom_highbd_d135_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_32x32 aom_highbd_d135_predictor_32x32_c +void aom_highbd_d135_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_4x4 aom_highbd_d135_predictor_4x4_c +void aom_highbd_d135_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d135_predictor_4x8 aom_highbd_d135_predictor_4x8_c @@ -1546,10 +1636,12 @@ void aom_highbd_d135_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d135_predictor_8x4 aom_highbd_d135_predictor_8x4_c void aom_highbd_d135_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_8x8 aom_highbd_d135_predictor_8x8_c +void aom_highbd_d135_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_16x16 aom_highbd_d153_predictor_16x16_c +void aom_highbd_d153_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d153_predictor_16x32 aom_highbd_d153_predictor_16x32_c @@ -1564,10 +1656,12 @@ void aom_highbd_d153_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d153_predictor_32x16 aom_highbd_d153_predictor_32x16_c void aom_highbd_d153_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_32x32 aom_highbd_d153_predictor_32x32_c +void aom_highbd_d153_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_4x4 aom_highbd_d153_predictor_4x4_c +void aom_highbd_d153_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d153_predictor_4x8 aom_highbd_d153_predictor_4x8_c @@ -1579,7 +1673,8 @@ void aom_highbd_d153_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d153_predictor_8x4 aom_highbd_d153_predictor_8x4_c void aom_highbd_d153_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_8x8 aom_highbd_d153_predictor_8x8_c +void aom_highbd_d153_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d207e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d207e_predictor_16x16 aom_highbd_d207e_predictor_16x16_c @@ -1615,37 +1710,47 @@ void aom_highbd_d207e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const u #define aom_highbd_d207e_predictor_8x8 aom_highbd_d207e_predictor_8x8_c void aom_highbd_d45e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x16 aom_highbd_d45e_predictor_16x16_c +void aom_highbd_d45e_predictor_16x16_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x32 aom_highbd_d45e_predictor_16x32_c +void aom_highbd_d45e_predictor_16x32_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x8 aom_highbd_d45e_predictor_16x8_c +void aom_highbd_d45e_predictor_16x8_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d45e_predictor_2x2 aom_highbd_d45e_predictor_2x2_c void aom_highbd_d45e_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_32x16 aom_highbd_d45e_predictor_32x16_c +void aom_highbd_d45e_predictor_32x16_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_32x32 aom_highbd_d45e_predictor_32x32_c +void aom_highbd_d45e_predictor_32x32_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_4x4 aom_highbd_d45e_predictor_4x4_c +void aom_highbd_d45e_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_4x8 aom_highbd_d45e_predictor_4x8_c +void aom_highbd_d45e_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x16 aom_highbd_d45e_predictor_8x16_c +void aom_highbd_d45e_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x4 aom_highbd_d45e_predictor_8x4_c +void aom_highbd_d45e_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x8 aom_highbd_d45e_predictor_8x8_c +void aom_highbd_d45e_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d63e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d63e_predictor_16x16 aom_highbd_d63e_predictor_16x16_c @@ -1681,86 +1786,109 @@ void aom_highbd_d63e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d63e_predictor_8x8 aom_highbd_d63e_predictor_8x8_c void aom_highbd_dc_128_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x16 aom_highbd_dc_128_predictor_16x16_c +void aom_highbd_dc_128_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x32 aom_highbd_dc_128_predictor_16x32_c +void aom_highbd_dc_128_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x8 aom_highbd_dc_128_predictor_16x8_c +void aom_highbd_dc_128_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_128_predictor_2x2 aom_highbd_dc_128_predictor_2x2_c void aom_highbd_dc_128_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_32x16 aom_highbd_dc_128_predictor_32x16_c +void aom_highbd_dc_128_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_32x32 aom_highbd_dc_128_predictor_32x32_c +void aom_highbd_dc_128_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_4x4 aom_highbd_dc_128_predictor_4x4_c +void aom_highbd_dc_128_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_4x8 aom_highbd_dc_128_predictor_4x8_c +void aom_highbd_dc_128_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x16 aom_highbd_dc_128_predictor_8x16_c +void aom_highbd_dc_128_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x4 aom_highbd_dc_128_predictor_8x4_c +void aom_highbd_dc_128_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x8 aom_highbd_dc_128_predictor_8x8_c +void aom_highbd_dc_128_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x16 aom_highbd_dc_left_predictor_16x16_c +void aom_highbd_dc_left_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x32 aom_highbd_dc_left_predictor_16x32_c +void aom_highbd_dc_left_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x8 aom_highbd_dc_left_predictor_16x8_c +void aom_highbd_dc_left_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_left_predictor_2x2 aom_highbd_dc_left_predictor_2x2_c void aom_highbd_dc_left_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_32x16 aom_highbd_dc_left_predictor_32x16_c +void aom_highbd_dc_left_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_32x32 aom_highbd_dc_left_predictor_32x32_c +void aom_highbd_dc_left_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_4x4 aom_highbd_dc_left_predictor_4x4_c +void aom_highbd_dc_left_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_4x8 aom_highbd_dc_left_predictor_4x8_c +void aom_highbd_dc_left_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x16 aom_highbd_dc_left_predictor_8x16_c +void aom_highbd_dc_left_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x4 aom_highbd_dc_left_predictor_8x4_c +void aom_highbd_dc_left_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x8 aom_highbd_dc_left_predictor_8x8_c +void aom_highbd_dc_left_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); RTCD_EXTERN void (*aom_highbd_dc_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_16x32 aom_highbd_dc_predictor_16x32_c +void aom_highbd_dc_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_16x8 aom_highbd_dc_predictor_16x8_c +void aom_highbd_dc_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_predictor_2x2 aom_highbd_dc_predictor_2x2_c void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_32x16 aom_highbd_dc_predictor_32x16_c +void aom_highbd_dc_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -1771,50 +1899,63 @@ void aom_highbd_dc_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const u RTCD_EXTERN void (*aom_highbd_dc_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_4x8 aom_highbd_dc_predictor_4x8_c +void aom_highbd_dc_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_8x16 aom_highbd_dc_predictor_8x16_c +void aom_highbd_dc_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_8x4 aom_highbd_dc_predictor_8x4_c +void aom_highbd_dc_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); RTCD_EXTERN void (*aom_highbd_dc_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x16 aom_highbd_dc_top_predictor_16x16_c +void aom_highbd_dc_top_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x32 aom_highbd_dc_top_predictor_16x32_c +void aom_highbd_dc_top_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x8 aom_highbd_dc_top_predictor_16x8_c +void aom_highbd_dc_top_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_top_predictor_2x2 aom_highbd_dc_top_predictor_2x2_c void aom_highbd_dc_top_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_32x16 aom_highbd_dc_top_predictor_32x16_c +void aom_highbd_dc_top_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_32x32 aom_highbd_dc_top_predictor_32x32_c +void aom_highbd_dc_top_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_4x4 aom_highbd_dc_top_predictor_4x4_c +void aom_highbd_dc_top_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_4x8 aom_highbd_dc_top_predictor_4x8_c +void aom_highbd_dc_top_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x16 aom_highbd_dc_top_predictor_8x16_c +void aom_highbd_dc_top_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x4 aom_highbd_dc_top_predictor_8x4_c +void aom_highbd_dc_top_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x8 aom_highbd_dc_top_predictor_8x8_c +void aom_highbd_dc_top_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride); void aom_highbd_fdct16x16_sse2(const int16_t *input, tran_low_t *output, int stride); @@ -1837,37 +1978,47 @@ void aom_highbd_fdct8x8_sse2(const int16_t *input, tran_low_t *output, int strid RTCD_EXTERN void (*aom_highbd_fdct8x8)(const int16_t *input, tran_low_t *output, int stride); void aom_highbd_h_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x16 aom_highbd_h_predictor_16x16_c +void aom_highbd_h_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x32 aom_highbd_h_predictor_16x32_c +void aom_highbd_h_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x8 aom_highbd_h_predictor_16x8_c +void aom_highbd_h_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_h_predictor_2x2 aom_highbd_h_predictor_2x2_c void aom_highbd_h_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_32x16 aom_highbd_h_predictor_32x16_c +void aom_highbd_h_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_32x32 aom_highbd_h_predictor_32x32_c +void aom_highbd_h_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_4x4 aom_highbd_h_predictor_4x4_c +void aom_highbd_h_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_4x8 aom_highbd_h_predictor_4x8_c +void aom_highbd_h_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x16 aom_highbd_h_predictor_8x16_c +void aom_highbd_h_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x4 aom_highbd_h_predictor_8x4_c +void aom_highbd_h_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x8 aom_highbd_h_predictor_8x8_c +void aom_highbd_h_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); #define aom_highbd_iwht4x4_16_add aom_highbd_iwht4x4_16_add_c @@ -1881,6 +2032,7 @@ RTCD_EXTERN void (*aom_highbd_lpf_horizontal_4)(uint16_t *s, int pitch, const ui void aom_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +void aom_highbd_lpf_horizontal_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); RTCD_EXTERN void (*aom_highbd_lpf_horizontal_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1889,10 +2041,12 @@ RTCD_EXTERN void (*aom_highbd_lpf_horizontal_8)(uint16_t *s, int pitch, const ui void aom_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +void aom_highbd_lpf_horizontal_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); RTCD_EXTERN void (*aom_highbd_lpf_horizontal_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +void aom_highbd_lpf_horizontal_edge_16_avx2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); RTCD_EXTERN void (*aom_highbd_lpf_horizontal_edge_16)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1905,6 +2059,7 @@ RTCD_EXTERN void (*aom_highbd_lpf_vertical_16)(uint16_t *s, int pitch, const uin void aom_highbd_lpf_vertical_16_dual_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +void aom_highbd_lpf_vertical_16_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); RTCD_EXTERN void (*aom_highbd_lpf_vertical_16_dual)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1913,6 +2068,7 @@ RTCD_EXTERN void (*aom_highbd_lpf_vertical_4)(uint16_t *s, int pitch, const uint void aom_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +void aom_highbd_lpf_vertical_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); RTCD_EXTERN void (*aom_highbd_lpf_vertical_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1921,6 +2077,7 @@ RTCD_EXTERN void (*aom_highbd_lpf_vertical_8)(uint16_t *s, int pitch, const uint void aom_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +void aom_highbd_lpf_vertical_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); RTCD_EXTERN void (*aom_highbd_lpf_vertical_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); unsigned int aom_highbd_masked_sad16x16_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); @@ -2502,16 +2659,19 @@ void aom_highbd_v_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const RTCD_EXTERN void (*aom_highbd_v_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_16x32 aom_highbd_v_predictor_16x32_c +void aom_highbd_v_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_16x8 aom_highbd_v_predictor_16x8_c +void aom_highbd_v_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_v_predictor_2x2 aom_highbd_v_predictor_2x2_c void aom_highbd_v_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_32x16 aom_highbd_v_predictor_32x16_c +void aom_highbd_v_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -2522,13 +2682,16 @@ void aom_highbd_v_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const ui RTCD_EXTERN void (*aom_highbd_v_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_4x8 aom_highbd_v_predictor_4x8_c +void aom_highbd_v_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_8x16 aom_highbd_v_predictor_8x16_c +void aom_highbd_v_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_8x4 aom_highbd_v_predictor_8x4_c +void aom_highbd_v_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -2618,25 +2781,21 @@ void aom_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, con RTCD_EXTERN void (*aom_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*aom_lpf_horizontal_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_horizontal_4_dual aom_lpf_horizontal_4_dual_c void aom_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); RTCD_EXTERN void (*aom_lpf_horizontal_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_horizontal_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*aom_lpf_horizontal_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_horizontal_8_dual aom_lpf_horizontal_8_dual_c void aom_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_edge_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_edge_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); RTCD_EXTERN void (*aom_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_edge_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_edge_8_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); RTCD_EXTERN void (*aom_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); @@ -2644,24 +2803,21 @@ void aom_lpf_vertical_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, cons RTCD_EXTERN void (*aom_lpf_vertical_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_vertical_16_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_vertical_16_dual)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_vertical_16_dual aom_lpf_vertical_16_dual_c void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); RTCD_EXTERN void (*aom_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*aom_lpf_vertical_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_vertical_4_dual aom_lpf_vertical_4_dual_c void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); RTCD_EXTERN void (*aom_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*aom_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_vertical_8_dual aom_lpf_vertical_8_dual_c unsigned int aom_masked_sad16x16_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); unsigned int aom_masked_sad16x16_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); @@ -2932,37 +3088,52 @@ unsigned int aom_obmc_variance8x8_sse4_1(const uint8_t *pre, int pre_stride, con RTCD_EXTERN unsigned int (*aom_obmc_variance8x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); void aom_paeth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_c +void aom_paeth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_c +void aom_paeth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_c +void aom_paeth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x8_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c void aom_paeth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_c +void aom_paeth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_c +void aom_paeth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_c +void aom_paeth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_c +void aom_paeth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_c +void aom_paeth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_c +void aom_paeth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_c +void aom_paeth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void aom_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -3261,37 +3432,47 @@ void aom_smooth_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_ #define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_c void aom_smooth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x16 aom_smooth_predictor_16x16_c +void aom_smooth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x32 aom_smooth_predictor_16x32_c +void aom_smooth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x8 aom_smooth_predictor_16x8_c +void aom_smooth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c void aom_smooth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_32x16 aom_smooth_predictor_32x16_c +void aom_smooth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_32x32 aom_smooth_predictor_32x32_c +void aom_smooth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_4x4 aom_smooth_predictor_4x4_c +void aom_smooth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_4x8 aom_smooth_predictor_4x8_c +void aom_smooth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x16 aom_smooth_predictor_8x16_c +void aom_smooth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x4 aom_smooth_predictor_8x4_c +void aom_smooth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x8 aom_smooth_predictor_8x8_c +void aom_smooth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_c @@ -3481,19 +3662,24 @@ void aom_v_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t RTCD_EXTERN void (*aom_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_16x32 aom_v_predictor_16x32_c +void aom_v_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_16x8 aom_v_predictor_16x8_c +void aom_v_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_v_predictor_2x2 aom_v_predictor_2x2_c void aom_v_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_32x16 aom_v_predictor_32x16_c +void aom_v_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_v_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_v_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -3501,13 +3687,16 @@ void aom_v_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a RTCD_EXTERN void (*aom_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_4x8 aom_v_predictor_4x8_c +void aom_v_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_8x16 aom_v_predictor_8x16_c +void aom_v_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_8x4 aom_v_predictor_8x4_c +void aom_v_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -3617,6 +3806,14 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_convolve8 = aom_convolve8_sse2; if (flags & HAS_SSSE3) aom_convolve8 = aom_convolve8_ssse3; if (flags & HAS_AVX2) aom_convolve8 = aom_convolve8_avx2; + aom_convolve8_add_src = aom_convolve8_add_src_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src = aom_convolve8_add_src_ssse3; + aom_convolve8_add_src_hip = aom_convolve8_add_src_hip_c; + if (flags & HAS_SSE2) aom_convolve8_add_src_hip = aom_convolve8_add_src_hip_sse2; + aom_convolve8_add_src_horiz = aom_convolve8_add_src_horiz_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src_horiz = aom_convolve8_add_src_horiz_ssse3; + aom_convolve8_add_src_vert = aom_convolve8_add_src_vert_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src_vert = aom_convolve8_add_src_vert_ssse3; aom_convolve8_avg = aom_convolve8_avg_c; if (flags & HAS_SSE2) aom_convolve8_avg = aom_convolve8_avg_sse2; if (flags & HAS_SSSE3) aom_convolve8_avg = aom_convolve8_avg_ssse3; @@ -3650,34 +3847,90 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_d63e_predictor_4x4 = aom_d63e_predictor_4x4_ssse3; aom_dc_128_predictor_16x16 = aom_dc_128_predictor_16x16_c; if (flags & HAS_SSE2) aom_dc_128_predictor_16x16 = aom_dc_128_predictor_16x16_sse2; + aom_dc_128_predictor_16x32 = aom_dc_128_predictor_16x32_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_16x32 = aom_dc_128_predictor_16x32_sse2; + aom_dc_128_predictor_16x8 = aom_dc_128_predictor_16x8_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_16x8 = aom_dc_128_predictor_16x8_sse2; + aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_avx2; aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_c; if (flags & HAS_SSE2) aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_avx2; aom_dc_128_predictor_4x4 = aom_dc_128_predictor_4x4_c; if (flags & HAS_SSE2) aom_dc_128_predictor_4x4 = aom_dc_128_predictor_4x4_sse2; + aom_dc_128_predictor_4x8 = aom_dc_128_predictor_4x8_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_4x8 = aom_dc_128_predictor_4x8_sse2; + aom_dc_128_predictor_8x16 = aom_dc_128_predictor_8x16_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_8x16 = aom_dc_128_predictor_8x16_sse2; + aom_dc_128_predictor_8x4 = aom_dc_128_predictor_8x4_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_8x4 = aom_dc_128_predictor_8x4_sse2; aom_dc_128_predictor_8x8 = aom_dc_128_predictor_8x8_c; if (flags & HAS_SSE2) aom_dc_128_predictor_8x8 = aom_dc_128_predictor_8x8_sse2; aom_dc_left_predictor_16x16 = aom_dc_left_predictor_16x16_c; if (flags & HAS_SSE2) aom_dc_left_predictor_16x16 = aom_dc_left_predictor_16x16_sse2; + aom_dc_left_predictor_16x32 = aom_dc_left_predictor_16x32_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_16x32 = aom_dc_left_predictor_16x32_sse2; + aom_dc_left_predictor_16x8 = aom_dc_left_predictor_16x8_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_16x8 = aom_dc_left_predictor_16x8_sse2; + aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_avx2; aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_c; if (flags & HAS_SSE2) aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_avx2; aom_dc_left_predictor_4x4 = aom_dc_left_predictor_4x4_c; if (flags & HAS_SSE2) aom_dc_left_predictor_4x4 = aom_dc_left_predictor_4x4_sse2; + aom_dc_left_predictor_4x8 = aom_dc_left_predictor_4x8_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_4x8 = aom_dc_left_predictor_4x8_sse2; + aom_dc_left_predictor_8x16 = aom_dc_left_predictor_8x16_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_8x16 = aom_dc_left_predictor_8x16_sse2; + aom_dc_left_predictor_8x4 = aom_dc_left_predictor_8x4_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_8x4 = aom_dc_left_predictor_8x4_sse2; aom_dc_left_predictor_8x8 = aom_dc_left_predictor_8x8_c; if (flags & HAS_SSE2) aom_dc_left_predictor_8x8 = aom_dc_left_predictor_8x8_sse2; aom_dc_predictor_16x16 = aom_dc_predictor_16x16_c; if (flags & HAS_SSE2) aom_dc_predictor_16x16 = aom_dc_predictor_16x16_sse2; + aom_dc_predictor_16x32 = aom_dc_predictor_16x32_c; + if (flags & HAS_SSE2) aom_dc_predictor_16x32 = aom_dc_predictor_16x32_sse2; + aom_dc_predictor_16x8 = aom_dc_predictor_16x8_c; + if (flags & HAS_SSE2) aom_dc_predictor_16x8 = aom_dc_predictor_16x8_sse2; + aom_dc_predictor_32x16 = aom_dc_predictor_32x16_c; + if (flags & HAS_SSE2) aom_dc_predictor_32x16 = aom_dc_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_predictor_32x16 = aom_dc_predictor_32x16_avx2; aom_dc_predictor_32x32 = aom_dc_predictor_32x32_c; if (flags & HAS_SSE2) aom_dc_predictor_32x32 = aom_dc_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_predictor_32x32 = aom_dc_predictor_32x32_avx2; aom_dc_predictor_4x4 = aom_dc_predictor_4x4_c; if (flags & HAS_SSE2) aom_dc_predictor_4x4 = aom_dc_predictor_4x4_sse2; + aom_dc_predictor_4x8 = aom_dc_predictor_4x8_c; + if (flags & HAS_SSE2) aom_dc_predictor_4x8 = aom_dc_predictor_4x8_sse2; + aom_dc_predictor_8x16 = aom_dc_predictor_8x16_c; + if (flags & HAS_SSE2) aom_dc_predictor_8x16 = aom_dc_predictor_8x16_sse2; + aom_dc_predictor_8x4 = aom_dc_predictor_8x4_c; + if (flags & HAS_SSE2) aom_dc_predictor_8x4 = aom_dc_predictor_8x4_sse2; aom_dc_predictor_8x8 = aom_dc_predictor_8x8_c; if (flags & HAS_SSE2) aom_dc_predictor_8x8 = aom_dc_predictor_8x8_sse2; aom_dc_top_predictor_16x16 = aom_dc_top_predictor_16x16_c; if (flags & HAS_SSE2) aom_dc_top_predictor_16x16 = aom_dc_top_predictor_16x16_sse2; + aom_dc_top_predictor_16x32 = aom_dc_top_predictor_16x32_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_16x32 = aom_dc_top_predictor_16x32_sse2; + aom_dc_top_predictor_16x8 = aom_dc_top_predictor_16x8_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_16x8 = aom_dc_top_predictor_16x8_sse2; + aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_avx2; aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_c; if (flags & HAS_SSE2) aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_avx2; aom_dc_top_predictor_4x4 = aom_dc_top_predictor_4x4_c; if (flags & HAS_SSE2) aom_dc_top_predictor_4x4 = aom_dc_top_predictor_4x4_sse2; + aom_dc_top_predictor_4x8 = aom_dc_top_predictor_4x8_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_4x8 = aom_dc_top_predictor_4x8_sse2; + aom_dc_top_predictor_8x16 = aom_dc_top_predictor_8x16_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_8x16 = aom_dc_top_predictor_8x16_sse2; + aom_dc_top_predictor_8x4 = aom_dc_top_predictor_8x4_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_8x4 = aom_dc_top_predictor_8x4_sse2; aom_dc_top_predictor_8x8 = aom_dc_top_predictor_8x8_c; if (flags & HAS_SSE2) aom_dc_top_predictor_8x8 = aom_dc_top_predictor_8x8_sse2; aom_fdct16x16 = aom_fdct16x16_c; @@ -3703,10 +3956,23 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_get_mb_ss = aom_get_mb_ss_sse2; aom_h_predictor_16x16 = aom_h_predictor_16x16_c; if (flags & HAS_SSE2) aom_h_predictor_16x16 = aom_h_predictor_16x16_sse2; + aom_h_predictor_16x32 = aom_h_predictor_16x32_c; + if (flags & HAS_SSE2) aom_h_predictor_16x32 = aom_h_predictor_16x32_sse2; + aom_h_predictor_16x8 = aom_h_predictor_16x8_c; + if (flags & HAS_SSE2) aom_h_predictor_16x8 = aom_h_predictor_16x8_sse2; + aom_h_predictor_32x16 = aom_h_predictor_32x16_c; + if (flags & HAS_SSE2) aom_h_predictor_32x16 = aom_h_predictor_32x16_sse2; aom_h_predictor_32x32 = aom_h_predictor_32x32_c; if (flags & HAS_SSE2) aom_h_predictor_32x32 = aom_h_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_h_predictor_32x32 = aom_h_predictor_32x32_avx2; aom_h_predictor_4x4 = aom_h_predictor_4x4_c; if (flags & HAS_SSE2) aom_h_predictor_4x4 = aom_h_predictor_4x4_sse2; + aom_h_predictor_4x8 = aom_h_predictor_4x8_c; + if (flags & HAS_SSE2) aom_h_predictor_4x8 = aom_h_predictor_4x8_sse2; + aom_h_predictor_8x16 = aom_h_predictor_8x16_c; + if (flags & HAS_SSE2) aom_h_predictor_8x16 = aom_h_predictor_8x16_sse2; + aom_h_predictor_8x4 = aom_h_predictor_8x4_c; + if (flags & HAS_SSE2) aom_h_predictor_8x4 = aom_h_predictor_8x4_sse2; aom_h_predictor_8x8 = aom_h_predictor_8x8_c; if (flags & HAS_SSE2) aom_h_predictor_8x8 = aom_h_predictor_8x8_sse2; aom_hadamard_16x16 = aom_hadamard_16x16_c; @@ -4075,6 +4341,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_highbd_comp_avg_upsampled_pred = aom_highbd_comp_avg_upsampled_pred_sse2; aom_highbd_convolve8 = aom_highbd_convolve8_c; if (flags & HAS_AVX2) aom_highbd_convolve8 = aom_highbd_convolve8_avx2; + aom_highbd_convolve8_add_src_hip = aom_highbd_convolve8_add_src_hip_c; + if (flags & HAS_SSSE3) aom_highbd_convolve8_add_src_hip = aom_highbd_convolve8_add_src_hip_ssse3; aom_highbd_convolve8_avg = aom_highbd_convolve8_avg_c; if (flags & HAS_AVX2) aom_highbd_convolve8_avg = aom_highbd_convolve8_avg_avx2; aom_highbd_convolve8_avg_horiz = aom_highbd_convolve8_avg_horiz_c; @@ -4091,14 +4359,130 @@ static void setup_rtcd_internal(void) aom_highbd_convolve_copy = aom_highbd_convolve_copy_c; if (flags & HAS_SSE2) aom_highbd_convolve_copy = aom_highbd_convolve_copy_sse2; if (flags & HAS_AVX2) aom_highbd_convolve_copy = aom_highbd_convolve_copy_avx2; + aom_highbd_d117_predictor_16x16 = aom_highbd_d117_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_16x16 = aom_highbd_d117_predictor_16x16_ssse3; + aom_highbd_d117_predictor_32x32 = aom_highbd_d117_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_32x32 = aom_highbd_d117_predictor_32x32_ssse3; + aom_highbd_d117_predictor_4x4 = aom_highbd_d117_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_d117_predictor_4x4 = aom_highbd_d117_predictor_4x4_sse2; + aom_highbd_d117_predictor_8x8 = aom_highbd_d117_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_8x8 = aom_highbd_d117_predictor_8x8_ssse3; + aom_highbd_d135_predictor_16x16 = aom_highbd_d135_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_16x16 = aom_highbd_d135_predictor_16x16_ssse3; + aom_highbd_d135_predictor_32x32 = aom_highbd_d135_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_32x32 = aom_highbd_d135_predictor_32x32_ssse3; + aom_highbd_d135_predictor_4x4 = aom_highbd_d135_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_d135_predictor_4x4 = aom_highbd_d135_predictor_4x4_sse2; + aom_highbd_d135_predictor_8x8 = aom_highbd_d135_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_8x8 = aom_highbd_d135_predictor_8x8_ssse3; + aom_highbd_d153_predictor_16x16 = aom_highbd_d153_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_16x16 = aom_highbd_d153_predictor_16x16_ssse3; + aom_highbd_d153_predictor_32x32 = aom_highbd_d153_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_32x32 = aom_highbd_d153_predictor_32x32_ssse3; + aom_highbd_d153_predictor_4x4 = aom_highbd_d153_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_d153_predictor_4x4 = aom_highbd_d153_predictor_4x4_sse2; + aom_highbd_d153_predictor_8x8 = aom_highbd_d153_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_8x8 = aom_highbd_d153_predictor_8x8_ssse3; + aom_highbd_d45e_predictor_16x16 = aom_highbd_d45e_predictor_16x16_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x16 = aom_highbd_d45e_predictor_16x16_avx2; + aom_highbd_d45e_predictor_16x32 = aom_highbd_d45e_predictor_16x32_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x32 = aom_highbd_d45e_predictor_16x32_avx2; + aom_highbd_d45e_predictor_16x8 = aom_highbd_d45e_predictor_16x8_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x8 = aom_highbd_d45e_predictor_16x8_avx2; + aom_highbd_d45e_predictor_32x16 = aom_highbd_d45e_predictor_32x16_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_32x16 = aom_highbd_d45e_predictor_32x16_avx2; + aom_highbd_d45e_predictor_32x32 = aom_highbd_d45e_predictor_32x32_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_32x32 = aom_highbd_d45e_predictor_32x32_avx2; + aom_highbd_d45e_predictor_4x4 = aom_highbd_d45e_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_d45e_predictor_4x4 = aom_highbd_d45e_predictor_4x4_sse2; + aom_highbd_d45e_predictor_4x8 = aom_highbd_d45e_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_d45e_predictor_4x8 = aom_highbd_d45e_predictor_4x8_sse2; + aom_highbd_d45e_predictor_8x16 = aom_highbd_d45e_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_d45e_predictor_8x16 = aom_highbd_d45e_predictor_8x16_sse2; + aom_highbd_d45e_predictor_8x4 = aom_highbd_d45e_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_d45e_predictor_8x4 = aom_highbd_d45e_predictor_8x4_sse2; + aom_highbd_d45e_predictor_8x8 = aom_highbd_d45e_predictor_8x8_c; + if (flags & HAS_SSE2) aom_highbd_d45e_predictor_8x8 = aom_highbd_d45e_predictor_8x8_sse2; + aom_highbd_dc_128_predictor_16x16 = aom_highbd_dc_128_predictor_16x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_16x16 = aom_highbd_dc_128_predictor_16x16_sse2; + aom_highbd_dc_128_predictor_16x32 = aom_highbd_dc_128_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_16x32 = aom_highbd_dc_128_predictor_16x32_sse2; + aom_highbd_dc_128_predictor_16x8 = aom_highbd_dc_128_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_16x8 = aom_highbd_dc_128_predictor_16x8_sse2; + aom_highbd_dc_128_predictor_32x16 = aom_highbd_dc_128_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_32x16 = aom_highbd_dc_128_predictor_32x16_sse2; + aom_highbd_dc_128_predictor_32x32 = aom_highbd_dc_128_predictor_32x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_32x32 = aom_highbd_dc_128_predictor_32x32_sse2; + aom_highbd_dc_128_predictor_4x4 = aom_highbd_dc_128_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_4x4 = aom_highbd_dc_128_predictor_4x4_sse2; + aom_highbd_dc_128_predictor_4x8 = aom_highbd_dc_128_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_4x8 = aom_highbd_dc_128_predictor_4x8_sse2; + aom_highbd_dc_128_predictor_8x16 = aom_highbd_dc_128_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_8x16 = aom_highbd_dc_128_predictor_8x16_sse2; + aom_highbd_dc_128_predictor_8x4 = aom_highbd_dc_128_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_8x4 = aom_highbd_dc_128_predictor_8x4_sse2; + aom_highbd_dc_128_predictor_8x8 = aom_highbd_dc_128_predictor_8x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_8x8 = aom_highbd_dc_128_predictor_8x8_sse2; + aom_highbd_dc_left_predictor_16x16 = aom_highbd_dc_left_predictor_16x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_16x16 = aom_highbd_dc_left_predictor_16x16_sse2; + aom_highbd_dc_left_predictor_16x32 = aom_highbd_dc_left_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_16x32 = aom_highbd_dc_left_predictor_16x32_sse2; + aom_highbd_dc_left_predictor_16x8 = aom_highbd_dc_left_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_16x8 = aom_highbd_dc_left_predictor_16x8_sse2; + aom_highbd_dc_left_predictor_32x16 = aom_highbd_dc_left_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_32x16 = aom_highbd_dc_left_predictor_32x16_sse2; + aom_highbd_dc_left_predictor_32x32 = aom_highbd_dc_left_predictor_32x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_32x32 = aom_highbd_dc_left_predictor_32x32_sse2; + aom_highbd_dc_left_predictor_4x4 = aom_highbd_dc_left_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_4x4 = aom_highbd_dc_left_predictor_4x4_sse2; + aom_highbd_dc_left_predictor_4x8 = aom_highbd_dc_left_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_4x8 = aom_highbd_dc_left_predictor_4x8_sse2; + aom_highbd_dc_left_predictor_8x16 = aom_highbd_dc_left_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_8x16 = aom_highbd_dc_left_predictor_8x16_sse2; + aom_highbd_dc_left_predictor_8x4 = aom_highbd_dc_left_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_8x4 = aom_highbd_dc_left_predictor_8x4_sse2; + aom_highbd_dc_left_predictor_8x8 = aom_highbd_dc_left_predictor_8x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_8x8 = aom_highbd_dc_left_predictor_8x8_sse2; aom_highbd_dc_predictor_16x16 = aom_highbd_dc_predictor_16x16_c; if (flags & HAS_SSE2) aom_highbd_dc_predictor_16x16 = aom_highbd_dc_predictor_16x16_sse2; + aom_highbd_dc_predictor_16x32 = aom_highbd_dc_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_16x32 = aom_highbd_dc_predictor_16x32_sse2; + aom_highbd_dc_predictor_16x8 = aom_highbd_dc_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_16x8 = aom_highbd_dc_predictor_16x8_sse2; + aom_highbd_dc_predictor_32x16 = aom_highbd_dc_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_32x16 = aom_highbd_dc_predictor_32x16_sse2; aom_highbd_dc_predictor_32x32 = aom_highbd_dc_predictor_32x32_c; if (flags & HAS_SSE2) aom_highbd_dc_predictor_32x32 = aom_highbd_dc_predictor_32x32_sse2; aom_highbd_dc_predictor_4x4 = aom_highbd_dc_predictor_4x4_c; if (flags & HAS_SSE2) aom_highbd_dc_predictor_4x4 = aom_highbd_dc_predictor_4x4_sse2; + aom_highbd_dc_predictor_4x8 = aom_highbd_dc_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_4x8 = aom_highbd_dc_predictor_4x8_sse2; + aom_highbd_dc_predictor_8x16 = aom_highbd_dc_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_8x16 = aom_highbd_dc_predictor_8x16_sse2; + aom_highbd_dc_predictor_8x4 = aom_highbd_dc_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_8x4 = aom_highbd_dc_predictor_8x4_sse2; aom_highbd_dc_predictor_8x8 = aom_highbd_dc_predictor_8x8_c; if (flags & HAS_SSE2) aom_highbd_dc_predictor_8x8 = aom_highbd_dc_predictor_8x8_sse2; + aom_highbd_dc_top_predictor_16x16 = aom_highbd_dc_top_predictor_16x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_16x16 = aom_highbd_dc_top_predictor_16x16_sse2; + aom_highbd_dc_top_predictor_16x32 = aom_highbd_dc_top_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_16x32 = aom_highbd_dc_top_predictor_16x32_sse2; + aom_highbd_dc_top_predictor_16x8 = aom_highbd_dc_top_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_16x8 = aom_highbd_dc_top_predictor_16x8_sse2; + aom_highbd_dc_top_predictor_32x16 = aom_highbd_dc_top_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_32x16 = aom_highbd_dc_top_predictor_32x16_sse2; + aom_highbd_dc_top_predictor_32x32 = aom_highbd_dc_top_predictor_32x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_32x32 = aom_highbd_dc_top_predictor_32x32_sse2; + aom_highbd_dc_top_predictor_4x4 = aom_highbd_dc_top_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_4x4 = aom_highbd_dc_top_predictor_4x4_sse2; + aom_highbd_dc_top_predictor_4x8 = aom_highbd_dc_top_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_4x8 = aom_highbd_dc_top_predictor_4x8_sse2; + aom_highbd_dc_top_predictor_8x16 = aom_highbd_dc_top_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_8x16 = aom_highbd_dc_top_predictor_8x16_sse2; + aom_highbd_dc_top_predictor_8x4 = aom_highbd_dc_top_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_8x4 = aom_highbd_dc_top_predictor_8x4_sse2; + aom_highbd_dc_top_predictor_8x8 = aom_highbd_dc_top_predictor_8x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_8x8 = aom_highbd_dc_top_predictor_8x8_sse2; aom_highbd_fdct16x16 = aom_highbd_fdct16x16_c; if (flags & HAS_SSE2) aom_highbd_fdct16x16 = aom_highbd_fdct16x16_sse2; aom_highbd_fdct32x32 = aom_highbd_fdct32x32_c; @@ -4109,30 +4493,56 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_highbd_fdct4x4 = aom_highbd_fdct4x4_sse2; aom_highbd_fdct8x8 = aom_highbd_fdct8x8_c; if (flags & HAS_SSE2) aom_highbd_fdct8x8 = aom_highbd_fdct8x8_sse2; + aom_highbd_h_predictor_16x16 = aom_highbd_h_predictor_16x16_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_16x16 = aom_highbd_h_predictor_16x16_sse2; + aom_highbd_h_predictor_16x32 = aom_highbd_h_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_16x32 = aom_highbd_h_predictor_16x32_sse2; + aom_highbd_h_predictor_16x8 = aom_highbd_h_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_16x8 = aom_highbd_h_predictor_16x8_sse2; + aom_highbd_h_predictor_32x16 = aom_highbd_h_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_32x16 = aom_highbd_h_predictor_32x16_sse2; + aom_highbd_h_predictor_32x32 = aom_highbd_h_predictor_32x32_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_32x32 = aom_highbd_h_predictor_32x32_sse2; + aom_highbd_h_predictor_4x4 = aom_highbd_h_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_4x4 = aom_highbd_h_predictor_4x4_sse2; + aom_highbd_h_predictor_4x8 = aom_highbd_h_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_4x8 = aom_highbd_h_predictor_4x8_sse2; + aom_highbd_h_predictor_8x16 = aom_highbd_h_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_8x16 = aom_highbd_h_predictor_8x16_sse2; + aom_highbd_h_predictor_8x4 = aom_highbd_h_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_8x4 = aom_highbd_h_predictor_8x4_sse2; + aom_highbd_h_predictor_8x8 = aom_highbd_h_predictor_8x8_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_8x8 = aom_highbd_h_predictor_8x8_sse2; aom_highbd_lpf_horizontal_4 = aom_highbd_lpf_horizontal_4_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_4 = aom_highbd_lpf_horizontal_4_sse2; aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_avx2; aom_highbd_lpf_horizontal_8 = aom_highbd_lpf_horizontal_8_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_8 = aom_highbd_lpf_horizontal_8_sse2; aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_avx2; aom_highbd_lpf_horizontal_edge_16 = aom_highbd_lpf_horizontal_edge_16_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_edge_16 = aom_highbd_lpf_horizontal_edge_16_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_edge_16 = aom_highbd_lpf_horizontal_edge_16_avx2; aom_highbd_lpf_horizontal_edge_8 = aom_highbd_lpf_horizontal_edge_8_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_edge_8 = aom_highbd_lpf_horizontal_edge_8_sse2; aom_highbd_lpf_vertical_16 = aom_highbd_lpf_vertical_16_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_16 = aom_highbd_lpf_vertical_16_sse2; aom_highbd_lpf_vertical_16_dual = aom_highbd_lpf_vertical_16_dual_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_16_dual = aom_highbd_lpf_vertical_16_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_16_dual = aom_highbd_lpf_vertical_16_dual_avx2; aom_highbd_lpf_vertical_4 = aom_highbd_lpf_vertical_4_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_4 = aom_highbd_lpf_vertical_4_sse2; aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_avx2; aom_highbd_lpf_vertical_8 = aom_highbd_lpf_vertical_8_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_8 = aom_highbd_lpf_vertical_8_sse2; aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_avx2; aom_highbd_masked_sad16x16 = aom_highbd_masked_sad16x16_c; if (flags & HAS_SSSE3) aom_highbd_masked_sad16x16 = aom_highbd_masked_sad16x16_ssse3; aom_highbd_masked_sad16x32 = aom_highbd_masked_sad16x32_c; @@ -4316,10 +4726,22 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_highbd_upsampled_pred = aom_highbd_upsampled_pred_sse2; aom_highbd_v_predictor_16x16 = aom_highbd_v_predictor_16x16_c; if (flags & HAS_SSE2) aom_highbd_v_predictor_16x16 = aom_highbd_v_predictor_16x16_sse2; + aom_highbd_v_predictor_16x32 = aom_highbd_v_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_16x32 = aom_highbd_v_predictor_16x32_sse2; + aom_highbd_v_predictor_16x8 = aom_highbd_v_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_16x8 = aom_highbd_v_predictor_16x8_sse2; + aom_highbd_v_predictor_32x16 = aom_highbd_v_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_32x16 = aom_highbd_v_predictor_32x16_sse2; aom_highbd_v_predictor_32x32 = aom_highbd_v_predictor_32x32_c; if (flags & HAS_SSE2) aom_highbd_v_predictor_32x32 = aom_highbd_v_predictor_32x32_sse2; aom_highbd_v_predictor_4x4 = aom_highbd_v_predictor_4x4_c; if (flags & HAS_SSE2) aom_highbd_v_predictor_4x4 = aom_highbd_v_predictor_4x4_sse2; + aom_highbd_v_predictor_4x8 = aom_highbd_v_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_4x8 = aom_highbd_v_predictor_4x8_sse2; + aom_highbd_v_predictor_8x16 = aom_highbd_v_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_8x16 = aom_highbd_v_predictor_8x16_sse2; + aom_highbd_v_predictor_8x4 = aom_highbd_v_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_8x4 = aom_highbd_v_predictor_8x4_sse2; aom_highbd_v_predictor_8x8 = aom_highbd_v_predictor_8x8_c; if (flags & HAS_SSE2) aom_highbd_v_predictor_8x8 = aom_highbd_v_predictor_8x8_sse2; aom_idct16x16_10_add = aom_idct16x16_10_add_c; @@ -4368,30 +4790,18 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_iwht4x4_16_add = aom_iwht4x4_16_add_sse2; aom_lpf_horizontal_4 = aom_lpf_horizontal_4_c; if (flags & HAS_SSE2) aom_lpf_horizontal_4 = aom_lpf_horizontal_4_sse2; - aom_lpf_horizontal_4_dual = aom_lpf_horizontal_4_dual_c; - if (flags & HAS_SSE2) aom_lpf_horizontal_4_dual = aom_lpf_horizontal_4_dual_sse2; aom_lpf_horizontal_8 = aom_lpf_horizontal_8_c; if (flags & HAS_SSE2) aom_lpf_horizontal_8 = aom_lpf_horizontal_8_sse2; - aom_lpf_horizontal_8_dual = aom_lpf_horizontal_8_dual_c; - if (flags & HAS_SSE2) aom_lpf_horizontal_8_dual = aom_lpf_horizontal_8_dual_sse2; aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_c; if (flags & HAS_SSE2) aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_sse2; - if (flags & HAS_AVX2) aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_avx2; aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_c; if (flags & HAS_SSE2) aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_sse2; - if (flags & HAS_AVX2) aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_avx2; aom_lpf_vertical_16 = aom_lpf_vertical_16_c; if (flags & HAS_SSE2) aom_lpf_vertical_16 = aom_lpf_vertical_16_sse2; - aom_lpf_vertical_16_dual = aom_lpf_vertical_16_dual_c; - if (flags & HAS_SSE2) aom_lpf_vertical_16_dual = aom_lpf_vertical_16_dual_sse2; aom_lpf_vertical_4 = aom_lpf_vertical_4_c; if (flags & HAS_SSE2) aom_lpf_vertical_4 = aom_lpf_vertical_4_sse2; - aom_lpf_vertical_4_dual = aom_lpf_vertical_4_dual_c; - if (flags & HAS_SSE2) aom_lpf_vertical_4_dual = aom_lpf_vertical_4_dual_sse2; aom_lpf_vertical_8 = aom_lpf_vertical_8_c; if (flags & HAS_SSE2) aom_lpf_vertical_8 = aom_lpf_vertical_8_sse2; - aom_lpf_vertical_8_dual = aom_lpf_vertical_8_dual_c; - if (flags & HAS_SSE2) aom_lpf_vertical_8_dual = aom_lpf_vertical_8_dual_sse2; aom_masked_sad16x16 = aom_masked_sad16x16_c; if (flags & HAS_SSSE3) aom_masked_sad16x16 = aom_masked_sad16x16_ssse3; aom_masked_sad16x32 = aom_masked_sad16x32_c; @@ -4507,6 +4917,31 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) aom_obmc_variance8x4 = aom_obmc_variance8x4_sse4_1; aom_obmc_variance8x8 = aom_obmc_variance8x8_c; if (flags & HAS_SSE4_1) aom_obmc_variance8x8 = aom_obmc_variance8x8_sse4_1; + aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_avx2; + aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_avx2; + aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_avx2; + aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_avx2; + aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_avx2; + aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_ssse3; + aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_ssse3; + aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_ssse3; + aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_ssse3; + aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_ssse3; aom_quantize_b = aom_quantize_b_c; if (flags & HAS_SSE2) aom_quantize_b = aom_quantize_b_sse2; aom_sad16x16 = aom_sad16x16_c; @@ -4627,6 +5062,26 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_satd = aom_satd_sse2; aom_scaled_2d = aom_scaled_2d_c; if (flags & HAS_SSSE3) aom_scaled_2d = aom_scaled_2d_ssse3; + aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_ssse3; + aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_ssse3; + aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_ssse3; + aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_ssse3; + aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_ssse3; + aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_ssse3; + aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_ssse3; + aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_ssse3; + aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_ssse3; + aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_ssse3; aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_c; if (flags & HAS_SSE2) aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_ssse3; @@ -4719,10 +5174,24 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_upsampled_pred = aom_upsampled_pred_sse2; aom_v_predictor_16x16 = aom_v_predictor_16x16_c; if (flags & HAS_SSE2) aom_v_predictor_16x16 = aom_v_predictor_16x16_sse2; + aom_v_predictor_16x32 = aom_v_predictor_16x32_c; + if (flags & HAS_SSE2) aom_v_predictor_16x32 = aom_v_predictor_16x32_sse2; + aom_v_predictor_16x8 = aom_v_predictor_16x8_c; + if (flags & HAS_SSE2) aom_v_predictor_16x8 = aom_v_predictor_16x8_sse2; + aom_v_predictor_32x16 = aom_v_predictor_32x16_c; + if (flags & HAS_SSE2) aom_v_predictor_32x16 = aom_v_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_v_predictor_32x16 = aom_v_predictor_32x16_avx2; aom_v_predictor_32x32 = aom_v_predictor_32x32_c; if (flags & HAS_SSE2) aom_v_predictor_32x32 = aom_v_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_v_predictor_32x32 = aom_v_predictor_32x32_avx2; aom_v_predictor_4x4 = aom_v_predictor_4x4_c; if (flags & HAS_SSE2) aom_v_predictor_4x4 = aom_v_predictor_4x4_sse2; + aom_v_predictor_4x8 = aom_v_predictor_4x8_c; + if (flags & HAS_SSE2) aom_v_predictor_4x8 = aom_v_predictor_4x8_sse2; + aom_v_predictor_8x16 = aom_v_predictor_8x16_c; + if (flags & HAS_SSE2) aom_v_predictor_8x16 = aom_v_predictor_8x16_sse2; + aom_v_predictor_8x4 = aom_v_predictor_8x4_c; + if (flags & HAS_SSE2) aom_v_predictor_8x4 = aom_v_predictor_8x4_sse2; aom_v_predictor_8x8 = aom_v_predictor_8x8_c; if (flags & HAS_SSE2) aom_v_predictor_8x8 = aom_v_predictor_8x8_sse2; aom_variance16x16 = aom_variance16x16_c; diff --git a/media/libaom/config/linux/ia32/av1_rtcd.h b/media/libaom/config/linux/ia32/av1_rtcd.h index 3bd4729ab..b5d286212 100644 --- a/media/libaom/config/linux/ia32/av1_rtcd.h +++ b/media/libaom/config/linux/ia32/av1_rtcd.h @@ -31,44 +31,39 @@ struct search_site_config; struct mv; union int_mv; struct yv12_buffer_config; -typedef uint16_t od_dering_in; #ifdef __cplusplus extern "C" { #endif -void aom_clpf_block_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_sse2(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_ssse3(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_sse4_1(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_block)(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_block_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_sse2(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_ssse3(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_sse4_1(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_block_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_hblock_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_sse2(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_ssse3(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_sse4_1(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_hblock)(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_hblock_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_sse2(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_ssse3(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_sse4_1(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_hblock_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void apply_selfguided_restoration_c(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); +void apply_selfguided_restoration_sse4_1(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); +RTCD_EXTERN void (*apply_selfguided_restoration)(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); + +void apply_selfguided_restoration_highbd_c(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); +void apply_selfguided_restoration_highbd_sse4_1(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); +RTCD_EXTERN void (*apply_selfguided_restoration_highbd)(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); int64_t av1_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); RTCD_EXTERN int64_t (*av1_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params); +void av1_convolve_2d_sse2(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params); +RTCD_EXTERN void (*av1_convolve_2d)(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params); + +void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); +void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); +RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); + void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); RTCD_EXTERN void (*av1_convolve_horiz)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); +void av1_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); +void av1_convolve_rounding_avx2(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); +RTCD_EXTERN void (*av1_convolve_rounding)(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); + void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); void av1_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); RTCD_EXTERN void (*av1_convolve_vert)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); @@ -76,9 +71,6 @@ RTCD_EXTERN void (*av1_convolve_vert)(const uint8_t *src, int src_stride, uint8_ int av1_diamond_search_sad_c(struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); #define av1_diamond_search_sad av1_diamond_search_sad_c -void av1_fdct8x8_quant_c(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -#define av1_fdct8x8_quant av1_fdct8x8_quant_c - void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); void av1_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); @@ -133,6 +125,14 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, struct t void av1_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); RTCD_EXTERN void (*av1_fht8x8)(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); +void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength); +void av1_filter_intra_edge_sse4_1(uint8_t *p, int sz, int strength); +RTCD_EXTERN void (*av1_filter_intra_edge)(uint8_t *p, int sz, int strength); + +void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength); +void av1_filter_intra_edge_high_sse4_1(uint16_t *p, int sz, int strength); +RTCD_EXTERN void (*av1_filter_intra_edge_high)(uint16_t *p, int sz, int strength); + int av1_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); #define av1_full_range_search av1_full_range_search_c @@ -141,46 +141,42 @@ int av1_full_search_sadx3(const struct macroblock *x, const struct mv *ref_mv, i int av1_full_search_sadx8(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); RTCD_EXTERN int (*av1_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); -void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type); +void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bsx, int bsy, TX_TYPE tx_type); #define av1_fwd_idtx av1_fwd_idtx_c -void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_16x16)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_16x16)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x32 av1_fwd_txfm2d_16x32_c -void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x8 av1_fwd_txfm2d_16x8_c -void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_32x16 av1_fwd_txfm2d_32x16_c -void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_32x32)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_32x32)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_4x4)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_4x4)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_4x8 av1_fwd_txfm2d_4x8_c -void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_64x64)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); - -void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x16 av1_fwd_txfm2d_8x16_c -void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x4 av1_fwd_txfm2d_8x4_c -void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_8x8)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_8x8)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); #define av1_fwht4x4 av1_fwht4x4_c @@ -207,6 +203,14 @@ void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c +void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); +void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_2d)(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); + +void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); +void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); + void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve_avg av1_highbd_convolve_avg_c @@ -221,6 +225,10 @@ void av1_highbd_convolve_init_c(void); void av1_highbd_convolve_init_sse4_1(void); RTCD_EXTERN void (*av1_highbd_convolve_init)(void); +void av1_highbd_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); +void av1_highbd_convolve_rounding_avx2(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_rounding)(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); + void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); void av1_highbd_convolve_vert_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); RTCD_EXTERN void (*av1_highbd_convolve_vert)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); @@ -267,9 +275,6 @@ void av1_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int dest void av1_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht8x8_64_add av1_highbd_iht8x8_64_add_c -void av1_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); -#define av1_highbd_quantize_b av1_highbd_quantize_b_c - void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); void av1_highbd_quantize_fp_sse4_1(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); void av1_highbd_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); @@ -282,6 +287,14 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); RTCD_EXTERN void (*av1_highbd_warp_affine)(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +void av1_highpass_filter_c(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_highpass_filter_sse4_1(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_highpass_filter)(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + +void av1_highpass_filter_highbd_c(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_highpass_filter_highbd_sse4_1(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_highpass_filter_highbd)(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); void av1_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); void av1_iht16x16_256_add_avx2(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); @@ -334,48 +347,45 @@ void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride void av1_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); RTCD_EXTERN void (*av1_iht8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); -void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_16x16)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_16x16)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x32 av1_inv_txfm2d_add_16x32_c -void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x8 av1_inv_txfm2d_add_16x8_c -void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_32x16 av1_inv_txfm2d_add_32x16_c -void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_32x32_avx2(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_32x32)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_32x32_avx2(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_32x32)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_4x8 av1_inv_txfm2d_add_4x8_c -void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -#define av1_inv_txfm2d_add_64x64 av1_inv_txfm2d_add_64x64_c - -void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x16 av1_inv_txfm2d_add_8x16_c -void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x4 av1_inv_txfm2d_add_8x4_c -void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); void av1_lowbd_convolve_init_c(void); void av1_lowbd_convolve_init_ssse3(void); RTCD_EXTERN void (*av1_lowbd_convolve_init)(void); -void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); +void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale); #define av1_quantize_b av1_quantize_b_c void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -387,10 +397,26 @@ void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int void av1_quantize_fp_32x32_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); RTCD_EXTERN void (*av1_quantize_fp_32x32)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void av1_selfguided_restoration_c(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_selfguided_restoration_sse4_1(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_selfguided_restoration)(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + +void av1_selfguided_restoration_highbd_c(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); +void av1_selfguided_restoration_highbd_sse4_1(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); +RTCD_EXTERN void (*av1_selfguided_restoration_highbd)(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); + void av1_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); void av1_temporal_filter_apply_sse2(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); RTCD_EXTERN void (*av1_temporal_filter_apply)(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); +void av1_upsample_intra_edge_c(uint8_t *p, int sz); +void av1_upsample_intra_edge_sse4_1(uint8_t *p, int sz); +RTCD_EXTERN void (*av1_upsample_intra_edge)(uint8_t *p, int sz); + +void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd); +void av1_upsample_intra_edge_high_sse4_1(uint16_t *p, int sz, int bd); +RTCD_EXTERN void (*av1_upsample_intra_edge_high)(uint16_t *p, int sz, int bd); + void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); @@ -408,64 +434,38 @@ uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d, con uint64_t av1_wedge_sse_from_residuals_sse2(const int16_t *r1, const int16_t *d, const uint8_t *m, int N); RTCD_EXTERN uint64_t (*av1_wedge_sse_from_residuals)(const int16_t *r1, const int16_t *d, const uint8_t *m, int N); +void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_sse2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_ssse3(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_sse4_1(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_avx2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +RTCD_EXTERN void (*cdef_filter_block)(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); + +int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_sse2(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_ssse3(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_sse4_1(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_avx2(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +RTCD_EXTERN int (*cdef_find_dir)(const uint16_t *img, int stride, int32_t *var, int coeff_shift); + double compute_cross_correlation_c(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); double compute_cross_correlation_sse4_1(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); RTCD_EXTERN double (*compute_cross_correlation)(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); -void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_4x4_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_sse2(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_ssse3(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_sse4_1(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_4x4_16bit_to_8bit)(uint8_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_8x8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_sse2(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_ssse3(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_sse4_1(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_8x8_16bit_to_8bit)(uint8_t *dst, int dstride, const uint16_t *src, int sstride); - void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); +void copy_rect8_16bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); RTCD_EXTERN void (*copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); +void copy_rect8_8bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); RTCD_EXTERN void (*copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); -int od_dir_find8_c(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_sse2(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_ssse3(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_sse4_1(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -RTCD_EXTERN int (*od_dir_find8)(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); - -void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_sse2(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_ssse3(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_sse4_1(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -RTCD_EXTERN void (*od_filter_dering_direction_4x4)(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); - -void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_sse2(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_ssse3(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_sse4_1(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -RTCD_EXTERN void (*od_filter_dering_direction_8x8)(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); - void aom_rtcd(void); #ifdef RTCD_C @@ -476,26 +476,20 @@ static void setup_rtcd_internal(void) (void)flags; - aom_clpf_block = aom_clpf_block_c; - if (flags & HAS_SSE2) aom_clpf_block = aom_clpf_block_sse2; - if (flags & HAS_SSSE3) aom_clpf_block = aom_clpf_block_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_block = aom_clpf_block_sse4_1; - aom_clpf_block_hbd = aom_clpf_block_hbd_c; - if (flags & HAS_SSE2) aom_clpf_block_hbd = aom_clpf_block_hbd_sse2; - if (flags & HAS_SSSE3) aom_clpf_block_hbd = aom_clpf_block_hbd_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_block_hbd = aom_clpf_block_hbd_sse4_1; - aom_clpf_hblock = aom_clpf_hblock_c; - if (flags & HAS_SSE2) aom_clpf_hblock = aom_clpf_hblock_sse2; - if (flags & HAS_SSSE3) aom_clpf_hblock = aom_clpf_hblock_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_hblock = aom_clpf_hblock_sse4_1; - aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_c; - if (flags & HAS_SSE2) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse2; - if (flags & HAS_SSSE3) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse4_1; + apply_selfguided_restoration = apply_selfguided_restoration_c; + if (flags & HAS_SSE4_1) apply_selfguided_restoration = apply_selfguided_restoration_sse4_1; + apply_selfguided_restoration_highbd = apply_selfguided_restoration_highbd_c; + if (flags & HAS_SSE4_1) apply_selfguided_restoration_highbd = apply_selfguided_restoration_highbd_sse4_1; av1_block_error = av1_block_error_c; if (flags & HAS_AVX2) av1_block_error = av1_block_error_avx2; + av1_convolve_2d = av1_convolve_2d_c; + if (flags & HAS_SSE2) av1_convolve_2d = av1_convolve_2d_sse2; + av1_convolve_2d_scale = av1_convolve_2d_scale_c; + if (flags & HAS_SSE4_1) av1_convolve_2d_scale = av1_convolve_2d_scale_sse4_1; av1_convolve_horiz = av1_convolve_horiz_c; if (flags & HAS_SSSE3) av1_convolve_horiz = av1_convolve_horiz_ssse3; + av1_convolve_rounding = av1_convolve_rounding_c; + if (flags & HAS_AVX2) av1_convolve_rounding = av1_convolve_rounding_avx2; av1_convolve_vert = av1_convolve_vert_c; if (flags & HAS_SSSE3) av1_convolve_vert = av1_convolve_vert_ssse3; av1_fht16x16 = av1_fht16x16_c; @@ -520,6 +514,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) av1_fht8x4 = av1_fht8x4_sse2; av1_fht8x8 = av1_fht8x8_c; if (flags & HAS_SSE2) av1_fht8x8 = av1_fht8x8_sse2; + av1_filter_intra_edge = av1_filter_intra_edge_c; + if (flags & HAS_SSE4_1) av1_filter_intra_edge = av1_filter_intra_edge_sse4_1; + av1_filter_intra_edge_high = av1_filter_intra_edge_high_c; + if (flags & HAS_SSE4_1) av1_filter_intra_edge_high = av1_filter_intra_edge_high_sse4_1; av1_full_search_sad = av1_full_search_sad_c; if (flags & HAS_SSE3) av1_full_search_sad = av1_full_search_sadx3; if (flags & HAS_SSE4_1) av1_full_search_sad = av1_full_search_sadx8; @@ -529,16 +527,20 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) av1_fwd_txfm2d_32x32 = av1_fwd_txfm2d_32x32_sse4_1; av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_c; if (flags & HAS_SSE4_1) av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_sse4_1; - av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_c; - if (flags & HAS_SSE4_1) av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_sse4_1; av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_c; if (flags & HAS_SSE4_1) av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_sse4_1; av1_highbd_block_error = av1_highbd_block_error_c; if (flags & HAS_SSE2) av1_highbd_block_error = av1_highbd_block_error_sse2; + av1_highbd_convolve_2d = av1_highbd_convolve_2d_c; + if (flags & HAS_SSSE3) av1_highbd_convolve_2d = av1_highbd_convolve_2d_ssse3; + av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_c; + if (flags & HAS_SSE4_1) av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_sse4_1; av1_highbd_convolve_horiz = av1_highbd_convolve_horiz_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_horiz = av1_highbd_convolve_horiz_sse4_1; av1_highbd_convolve_init = av1_highbd_convolve_init_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_init = av1_highbd_convolve_init_sse4_1; + av1_highbd_convolve_rounding = av1_highbd_convolve_rounding_c; + if (flags & HAS_AVX2) av1_highbd_convolve_rounding = av1_highbd_convolve_rounding_avx2; av1_highbd_convolve_vert = av1_highbd_convolve_vert_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_vert = av1_highbd_convolve_vert_sse4_1; av1_highbd_quantize_fp = av1_highbd_quantize_fp_c; @@ -546,6 +548,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) av1_highbd_quantize_fp = av1_highbd_quantize_fp_avx2; av1_highbd_warp_affine = av1_highbd_warp_affine_c; if (flags & HAS_SSSE3) av1_highbd_warp_affine = av1_highbd_warp_affine_ssse3; + av1_highpass_filter = av1_highpass_filter_c; + if (flags & HAS_SSE4_1) av1_highpass_filter = av1_highpass_filter_sse4_1; + av1_highpass_filter_highbd = av1_highpass_filter_highbd_c; + if (flags & HAS_SSE4_1) av1_highpass_filter_highbd = av1_highpass_filter_highbd_sse4_1; av1_iht16x16_256_add = av1_iht16x16_256_add_c; if (flags & HAS_SSE2) av1_iht16x16_256_add = av1_iht16x16_256_add_sse2; if (flags & HAS_AVX2) av1_iht16x16_256_add = av1_iht16x16_256_add_avx2; @@ -580,8 +586,16 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) av1_quantize_fp = av1_quantize_fp_avx2; av1_quantize_fp_32x32 = av1_quantize_fp_32x32_c; if (flags & HAS_AVX2) av1_quantize_fp_32x32 = av1_quantize_fp_32x32_avx2; + av1_selfguided_restoration = av1_selfguided_restoration_c; + if (flags & HAS_SSE4_1) av1_selfguided_restoration = av1_selfguided_restoration_sse4_1; + av1_selfguided_restoration_highbd = av1_selfguided_restoration_highbd_c; + if (flags & HAS_SSE4_1) av1_selfguided_restoration_highbd = av1_selfguided_restoration_highbd_sse4_1; av1_temporal_filter_apply = av1_temporal_filter_apply_c; if (flags & HAS_SSE2) av1_temporal_filter_apply = av1_temporal_filter_apply_sse2; + av1_upsample_intra_edge = av1_upsample_intra_edge_c; + if (flags & HAS_SSE4_1) av1_upsample_intra_edge = av1_upsample_intra_edge_sse4_1; + av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c; + if (flags & HAS_SSE4_1) av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_sse4_1; av1_warp_affine = av1_warp_affine_c; if (flags & HAS_SSE2) av1_warp_affine = av1_warp_affine_sse2; if (flags & HAS_SSSE3) av1_warp_affine = av1_warp_affine_ssse3; @@ -591,44 +605,28 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) av1_wedge_sign_from_residuals = av1_wedge_sign_from_residuals_sse2; av1_wedge_sse_from_residuals = av1_wedge_sse_from_residuals_c; if (flags & HAS_SSE2) av1_wedge_sse_from_residuals = av1_wedge_sse_from_residuals_sse2; + cdef_filter_block = cdef_filter_block_c; + if (flags & HAS_SSE2) cdef_filter_block = cdef_filter_block_sse2; + if (flags & HAS_SSSE3) cdef_filter_block = cdef_filter_block_ssse3; + if (flags & HAS_SSE4_1) cdef_filter_block = cdef_filter_block_sse4_1; + if (flags & HAS_AVX2) cdef_filter_block = cdef_filter_block_avx2; + cdef_find_dir = cdef_find_dir_c; + if (flags & HAS_SSE2) cdef_find_dir = cdef_find_dir_sse2; + if (flags & HAS_SSSE3) cdef_find_dir = cdef_find_dir_ssse3; + if (flags & HAS_SSE4_1) cdef_find_dir = cdef_find_dir_sse4_1; + if (flags & HAS_AVX2) cdef_find_dir = cdef_find_dir_avx2; compute_cross_correlation = compute_cross_correlation_c; if (flags & HAS_SSE4_1) compute_cross_correlation = compute_cross_correlation_sse4_1; - copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_c; - if (flags & HAS_SSE2) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_sse2; - if (flags & HAS_SSSE3) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_ssse3; - if (flags & HAS_SSE4_1) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_sse4_1; - copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_c; - if (flags & HAS_SSE2) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_sse2; - if (flags & HAS_SSSE3) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_ssse3; - if (flags & HAS_SSE4_1) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_sse4_1; - copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_c; - if (flags & HAS_SSE2) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_sse2; - if (flags & HAS_SSSE3) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_ssse3; - if (flags & HAS_SSE4_1) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_sse4_1; - copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_c; - if (flags & HAS_SSE2) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_sse2; - if (flags & HAS_SSSE3) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_ssse3; - if (flags & HAS_SSE4_1) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_sse4_1; copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_c; if (flags & HAS_SSE2) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse2; if (flags & HAS_SSSE3) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_ssse3; if (flags & HAS_SSE4_1) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse4_1; + if (flags & HAS_AVX2) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_avx2; copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_c; if (flags & HAS_SSE2) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse2; if (flags & HAS_SSSE3) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_ssse3; if (flags & HAS_SSE4_1) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse4_1; - od_dir_find8 = od_dir_find8_c; - if (flags & HAS_SSE2) od_dir_find8 = od_dir_find8_sse2; - if (flags & HAS_SSSE3) od_dir_find8 = od_dir_find8_ssse3; - if (flags & HAS_SSE4_1) od_dir_find8 = od_dir_find8_sse4_1; - od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_c; - if (flags & HAS_SSE2) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_sse2; - if (flags & HAS_SSSE3) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_ssse3; - if (flags & HAS_SSE4_1) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_sse4_1; - od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_c; - if (flags & HAS_SSE2) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_sse2; - if (flags & HAS_SSSE3) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_ssse3; - if (flags & HAS_SSE4_1) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_sse4_1; + if (flags & HAS_AVX2) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_avx2; } #endif diff --git a/media/libaom/config/linux/x64/aom_config.asm b/media/libaom/config/linux/x64/aom_config.asm index 2821f150f..455683b8f 100644 --- a/media/libaom/config/linux/x64/aom_config.asm +++ b/media/libaom/config/linux/x64/aom_config.asm @@ -46,8 +46,6 @@ CONFIG_AV1 equ 1 CONFIG_STATIC_MSVCRT equ 0 CONFIG_SPATIAL_RESAMPLING equ 1 CONFIG_REALTIME_ONLY equ 0 -CONFIG_ONTHEFLY_BITPACKING equ 0 -CONFIG_ERROR_CONCEALMENT equ 0 CONFIG_SHARED equ 0 CONFIG_STATIC equ 1 CONFIG_SMALL equ 0 @@ -60,73 +58,71 @@ CONFIG_ACCOUNTING equ 0 CONFIG_INSPECTION equ 0 CONFIG_DECODE_PERF_TESTS equ 0 CONFIG_ENCODE_PERF_TESTS equ 0 +CONFIG_BITSTREAM_DEBUG equ 0 +CONFIG_SYMBOLRATE equ 0 CONFIG_COEFFICIENT_RANGE_CHECKING equ 0 CONFIG_LOWBITDEPTH equ 1 CONFIG_HIGHBITDEPTH equ 1 CONFIG_EXPERIMENTAL equ 0 CONFIG_SIZE_LIMIT equ 1 -CONFIG_COLORSPACE_HEADERS equ 0 CONFIG_FP_MB_STATS equ 0 CONFIG_CDEF equ 1 +CONFIG_CDEF_SINGLEPASS equ 1 CONFIG_VAR_TX equ 1 CONFIG_RECT_TX equ 1 CONFIG_RECT_TX_EXT equ 0 CONFIG_TPL_MV equ 0 CONFIG_DUAL_FILTER equ 1 -CONFIG_CONVOLVE_ROUND equ 0 +CONFIG_CONVOLVE_ROUND equ 1 CONFIG_COMPOUND_ROUND equ 0 CONFIG_EXT_TX equ 1 -CONFIG_DPCM_INTRA equ 0 CONFIG_TX64X64 equ 0 CONFIG_EXT_INTRA equ 1 CONFIG_INTRA_INTERP equ 0 CONFIG_FILTER_INTRA equ 0 -CONFIG_INTRA_EDGE equ 0 +CONFIG_INTRA_EDGE equ 1 CONFIG_INTRABC equ 0 -CONFIG_EXT_INTER equ 1 CONFIG_INTERINTRA equ 1 CONFIG_WEDGE equ 1 CONFIG_COMPOUND_SEGMENT equ 1 CONFIG_EXT_REFS equ 1 -CONFIG_ALTREF2 equ 0 -CONFIG_SPEED_REFS equ 0 -CONFIG_GF_GROUPS equ 0 -CONFIG_FLEX_REFS equ 0 CONFIG_GLOBAL_MOTION equ 1 CONFIG_NEW_QUANT equ 0 CONFIG_SUPERTX equ 0 CONFIG_ANS equ 0 -CONFIG_LOOP_RESTORATION equ 0 +CONFIG_LOOP_RESTORATION equ 1 +CONFIG_STRIPED_LOOP_RESTORATION equ 0 CONFIG_EXT_PARTITION equ 0 CONFIG_EXT_PARTITION_TYPES equ 0 +CONFIG_EXT_PARTITION_TYPES_AB equ 0 CONFIG_UNPOISON_PARTITION_CTX equ 0 CONFIG_EXT_TILE equ 0 CONFIG_MOTION_VAR equ 1 CONFIG_NCOBMC equ 0 CONFIG_WARPED_MOTION equ 1 CONFIG_Q_ADAPT_PROBS equ 0 -CONFIG_BITSTREAM_DEBUG equ 0 CONFIG_INTER_STATS_ONLY equ 0 -CONFIG_ALT_INTRA equ 1 -CONFIG_PALETTE equ 1 CONFIG_PALETTE_DELTA_ENCODING equ 0 CONFIG_RAWBITS equ 0 -CONFIG_EC_SMALLMUL equ 1 +CONFIG_KF_CTX equ 0 CONFIG_PVQ equ 0 CONFIG_CFL equ 0 CONFIG_XIPHRC equ 0 CONFIG_DCT_ONLY equ 0 +CONFIG_DAALA_TX equ 0 CONFIG_DAALA_DCT4 equ 0 CONFIG_DAALA_DCT8 equ 0 +CONFIG_DAALA_DCT16 equ 0 +CONFIG_DAALA_DCT32 equ 0 +CONFIG_DAALA_DCT64 equ 0 CONFIG_CB4X4 equ 1 CONFIG_CHROMA_2X2 equ 0 CONFIG_CHROMA_SUB8X8 equ 1 CONFIG_FRAME_SIZE equ 0 -CONFIG_DELTA_Q equ 1 CONFIG_EXT_DELTA_Q equ 1 CONFIG_ADAPT_SCAN equ 0 -CONFIG_FILTER_7BIT equ 1 CONFIG_PARALLEL_DEBLOCKING equ 1 +CONFIG_DEBLOCK_13TAP equ 0 CONFIG_LOOPFILTERING_ACROSS_TILES equ 1 CONFIG_TEMPMV_SIGNALING equ 1 CONFIG_RD_DEBUG equ 0 @@ -135,29 +131,46 @@ CONFIG_COEF_INTERLEAVE equ 0 CONFIG_ENTROPY_STATS equ 0 CONFIG_MASKED_TX equ 0 CONFIG_DEPENDENT_HORZTILES equ 0 -CONFIG_DIST_8X8 equ 0 -CONFIG_DAALA_DIST equ 0 -CONFIG_TRIPRED equ 0 +CONFIG_DIST_8X8 equ 1 CONFIG_PALETTE_THROUGHPUT equ 1 CONFIG_REF_ADAPT equ 0 CONFIG_LV_MAP equ 0 +CONFIG_CTX1D equ 0 CONFIG_TXK_SEL equ 0 CONFIG_MV_COMPRESS equ 1 +CONFIG_SEGMENT_ZEROMV equ 0 CONFIG_FRAME_SUPERRES equ 0 CONFIG_NEW_MULTISYMBOL equ 0 CONFIG_COMPOUND_SINGLEREF equ 0 -CONFIG_AOM_QM equ 0 +CONFIG_AOM_QM equ 1 CONFIG_ONE_SIDED_COMPOUND equ 1 -CONFIG_EXT_COMP_REFS equ 0 +CONFIG_EXT_COMP_REFS equ 1 CONFIG_SMOOTH_HV equ 1 CONFIG_VAR_REFS equ 0 -CONFIG_RECT_INTRA_PRED equ 1 CONFIG_LGT equ 0 +CONFIG_LGT_FROM_PRED equ 0 CONFIG_SBL_SYMBOL equ 0 CONFIG_NCOBMC_ADAPT_WEIGHT equ 0 CONFIG_BGSPRITE equ 0 CONFIG_VAR_TX_NO_TX_MODE equ 0 CONFIG_MRC_TX equ 0 CONFIG_LPF_DIRECT equ 0 -CONFIG_UV_LVL equ 0 +CONFIG_LOOPFILTER_LEVEL equ 0 +CONFIG_NO_FRAME_CONTEXT_SIGNALING equ 0 +CONFIG_TXMG equ 1 +CONFIG_MAX_TILE equ 0 +CONFIG_HASH_ME equ 0 +CONFIG_COLORSPACE_HEADERS equ 0 +CONFIG_MFMV equ 0 +CONFIG_FRAME_MARKER equ 0 +CONFIG_JNT_COMP equ 0 +CONFIG_FRAME_SIGN_BIAS equ 0 +CONFIG_EXT_SKIP equ 0 +CONFIG_OBU equ 0 +CONFIG_AMVR equ 0 +CONFIG_LPF_SB equ 0 +CONFIG_OPT_REF_MV equ 0 +CONFIG_TMV equ 0 +CONFIG_RESTRICT_COMPRESSED_HDR equ 0 +CONFIG_HORZONLY_FRAME_SUPERRES equ 0 CONFIG_ANALYZER equ 0 diff --git a/media/libaom/config/linux/x64/aom_config.h b/media/libaom/config/linux/x64/aom_config.h index f9f2caadd..f3416f3dd 100644 --- a/media/libaom/config/linux/x64/aom_config.h +++ b/media/libaom/config/linux/x64/aom_config.h @@ -59,8 +59,6 @@ #define CONFIG_STATIC_MSVCRT 0 #define CONFIG_SPATIAL_RESAMPLING 1 #define CONFIG_REALTIME_ONLY 0 -#define CONFIG_ONTHEFLY_BITPACKING 0 -#define CONFIG_ERROR_CONCEALMENT 0 #define CONFIG_SHARED 0 #define CONFIG_STATIC 1 #define CONFIG_SMALL 0 @@ -73,73 +71,71 @@ #define CONFIG_INSPECTION 0 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_ENCODE_PERF_TESTS 0 +#define CONFIG_BITSTREAM_DEBUG 0 +#define CONFIG_SYMBOLRATE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_LOWBITDEPTH 1 #define CONFIG_HIGHBITDEPTH 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_SIZE_LIMIT 1 -#define CONFIG_COLORSPACE_HEADERS 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_CDEF 1 +#define CONFIG_CDEF_SINGLEPASS 1 #define CONFIG_VAR_TX 1 #define CONFIG_RECT_TX 1 #define CONFIG_RECT_TX_EXT 0 #define CONFIG_TPL_MV 0 #define CONFIG_DUAL_FILTER 1 -#define CONFIG_CONVOLVE_ROUND 0 +#define CONFIG_CONVOLVE_ROUND 1 #define CONFIG_COMPOUND_ROUND 0 #define CONFIG_EXT_TX 1 -#define CONFIG_DPCM_INTRA 0 #define CONFIG_TX64X64 0 #define CONFIG_EXT_INTRA 1 #define CONFIG_INTRA_INTERP 0 #define CONFIG_FILTER_INTRA 0 -#define CONFIG_INTRA_EDGE 0 +#define CONFIG_INTRA_EDGE 1 #define CONFIG_INTRABC 0 -#define CONFIG_EXT_INTER 1 #define CONFIG_INTERINTRA 1 #define CONFIG_WEDGE 1 #define CONFIG_COMPOUND_SEGMENT 1 #define CONFIG_EXT_REFS 1 -#define CONFIG_ALTREF2 0 -#define CONFIG_SPEED_REFS 0 -#define CONFIG_GF_GROUPS 0 -#define CONFIG_FLEX_REFS 0 #define CONFIG_GLOBAL_MOTION 1 #define CONFIG_NEW_QUANT 0 #define CONFIG_SUPERTX 0 #define CONFIG_ANS 0 -#define CONFIG_LOOP_RESTORATION 0 +#define CONFIG_LOOP_RESTORATION 1 +#define CONFIG_STRIPED_LOOP_RESTORATION 0 #define CONFIG_EXT_PARTITION 0 #define CONFIG_EXT_PARTITION_TYPES 0 +#define CONFIG_EXT_PARTITION_TYPES_AB 0 #define CONFIG_UNPOISON_PARTITION_CTX 0 #define CONFIG_EXT_TILE 0 #define CONFIG_MOTION_VAR 1 #define CONFIG_NCOBMC 0 #define CONFIG_WARPED_MOTION 1 #define CONFIG_Q_ADAPT_PROBS 0 -#define CONFIG_BITSTREAM_DEBUG 0 #define CONFIG_INTER_STATS_ONLY 0 -#define CONFIG_ALT_INTRA 1 -#define CONFIG_PALETTE 1 #define CONFIG_PALETTE_DELTA_ENCODING 0 #define CONFIG_RAWBITS 0 -#define CONFIG_EC_SMALLMUL 1 +#define CONFIG_KF_CTX 0 #define CONFIG_PVQ 0 #define CONFIG_CFL 0 #define CONFIG_XIPHRC 0 #define CONFIG_DCT_ONLY 0 +#define CONFIG_DAALA_TX 0 #define CONFIG_DAALA_DCT4 0 #define CONFIG_DAALA_DCT8 0 +#define CONFIG_DAALA_DCT16 0 +#define CONFIG_DAALA_DCT32 0 +#define CONFIG_DAALA_DCT64 0 #define CONFIG_CB4X4 1 #define CONFIG_CHROMA_2X2 0 #define CONFIG_CHROMA_SUB8X8 1 #define CONFIG_FRAME_SIZE 0 -#define CONFIG_DELTA_Q 1 #define CONFIG_EXT_DELTA_Q 1 #define CONFIG_ADAPT_SCAN 0 -#define CONFIG_FILTER_7BIT 1 #define CONFIG_PARALLEL_DEBLOCKING 1 +#define CONFIG_DEBLOCK_13TAP 0 #define CONFIG_LOOPFILTERING_ACROSS_TILES 1 #define CONFIG_TEMPMV_SIGNALING 1 #define CONFIG_RD_DEBUG 0 @@ -148,31 +144,48 @@ #define CONFIG_ENTROPY_STATS 0 #define CONFIG_MASKED_TX 0 #define CONFIG_DEPENDENT_HORZTILES 0 -#define CONFIG_DIST_8X8 0 -#define CONFIG_DAALA_DIST 0 -#define CONFIG_TRIPRED 0 +#define CONFIG_DIST_8X8 1 #define CONFIG_PALETTE_THROUGHPUT 1 #define CONFIG_REF_ADAPT 0 #define CONFIG_LV_MAP 0 +#define CONFIG_CTX1D 0 #define CONFIG_TXK_SEL 0 #define CONFIG_MV_COMPRESS 1 +#define CONFIG_SEGMENT_ZEROMV 0 #define CONFIG_FRAME_SUPERRES 0 #define CONFIG_NEW_MULTISYMBOL 0 #define CONFIG_COMPOUND_SINGLEREF 0 -#define CONFIG_AOM_QM 0 +#define CONFIG_AOM_QM 1 #define CONFIG_ONE_SIDED_COMPOUND 1 -#define CONFIG_EXT_COMP_REFS 0 +#define CONFIG_EXT_COMP_REFS 1 #define CONFIG_SMOOTH_HV 1 #define CONFIG_VAR_REFS 0 -#define CONFIG_RECT_INTRA_PRED 1 #define CONFIG_LGT 0 +#define CONFIG_LGT_FROM_PRED 0 #define CONFIG_SBL_SYMBOL 0 #define CONFIG_NCOBMC_ADAPT_WEIGHT 0 #define CONFIG_BGSPRITE 0 #define CONFIG_VAR_TX_NO_TX_MODE 0 #define CONFIG_MRC_TX 0 #define CONFIG_LPF_DIRECT 0 -#define CONFIG_UV_LVL 0 +#define CONFIG_LOOPFILTER_LEVEL 0 +#define CONFIG_NO_FRAME_CONTEXT_SIGNALING 0 +#define CONFIG_TXMG 1 +#define CONFIG_MAX_TILE 0 +#define CONFIG_HASH_ME 0 +#define CONFIG_COLORSPACE_HEADERS 0 +#define CONFIG_MFMV 0 +#define CONFIG_FRAME_MARKER 0 +#define CONFIG_JNT_COMP 0 +#define CONFIG_FRAME_SIGN_BIAS 0 +#define CONFIG_EXT_SKIP 0 +#define CONFIG_OBU 0 +#define CONFIG_AMVR 0 +#define CONFIG_LPF_SB 0 +#define CONFIG_OPT_REF_MV 0 +#define CONFIG_TMV 0 +#define CONFIG_RESTRICT_COMPRESSED_HDR 0 +#define CONFIG_HORZONLY_FRAME_SUPERRES 0 #define CONFIG_ANALYZER 0 #define DECODE_WIDTH_LIMIT 8192 #define DECODE_HEIGHT_LIMIT 4608 diff --git a/media/libaom/config/linux/x64/aom_dsp_rtcd.h b/media/libaom/config/linux/x64/aom_dsp_rtcd.h index 6ee856f1d..d2bee385f 100644 --- a/media/libaom/config/linux/x64/aom_dsp_rtcd.h +++ b/media/libaom/config/linux/x64/aom_dsp_rtcd.h @@ -20,6 +20,9 @@ extern "C" { #endif +void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx); +#define aom_blend_a64_d32_mask aom_blend_a64_d32_mask_c + void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); void aom_blend_a64_hmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); RTCD_EXTERN void (*aom_blend_a64_hmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); @@ -51,6 +54,28 @@ void aom_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, void aom_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*aom_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_hip_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_hip aom_convolve8_add_src_hip_sse2 + +void aom_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_horiz_hip aom_convolve8_add_src_horiz_hip_c + +void aom_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_vert_hip aom_convolve8_add_src_vert_hip_c + void aom_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void aom_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void aom_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -312,33 +337,41 @@ void aom_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uin #define aom_dc_128_predictor_16x16 aom_dc_128_predictor_16x16_sse2 void aom_dc_128_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_c +void aom_dc_128_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_sse2 void aom_dc_128_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_c +void aom_dc_128_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_sse2 void aom_dc_128_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c void aom_dc_128_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_32x16 aom_dc_128_predictor_32x16_c +void aom_dc_128_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_128_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_32x32 aom_dc_128_predictor_32x32_sse2 +void aom_dc_128_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_128_predictor_4x4 aom_dc_128_predictor_4x4_sse2 void aom_dc_128_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_c +void aom_dc_128_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_sse2 void aom_dc_128_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_c +void aom_dc_128_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_sse2 void aom_dc_128_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_8x4 aom_dc_128_predictor_8x4_c +void aom_dc_128_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_8x4 aom_dc_128_predictor_8x4_sse2 void aom_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -349,33 +382,41 @@ void aom_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const ui #define aom_dc_left_predictor_16x16 aom_dc_left_predictor_16x16_sse2 void aom_dc_left_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_c +void aom_dc_left_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_sse2 void aom_dc_left_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_c +void aom_dc_left_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_sse2 void aom_dc_left_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c void aom_dc_left_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_32x16 aom_dc_left_predictor_32x16_c +void aom_dc_left_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_left_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_32x32 aom_dc_left_predictor_32x32_sse2 +void aom_dc_left_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_left_predictor_4x4 aom_dc_left_predictor_4x4_sse2 void aom_dc_left_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_c +void aom_dc_left_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_sse2 void aom_dc_left_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_c +void aom_dc_left_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_sse2 void aom_dc_left_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_8x4 aom_dc_left_predictor_8x4_c +void aom_dc_left_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_8x4 aom_dc_left_predictor_8x4_sse2 void aom_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -386,33 +427,41 @@ void aom_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t #define aom_dc_predictor_16x16 aom_dc_predictor_16x16_sse2 void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_16x32 aom_dc_predictor_16x32_c +void aom_dc_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_16x32 aom_dc_predictor_16x32_sse2 void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_16x8 aom_dc_predictor_16x8_c +void aom_dc_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_16x8 aom_dc_predictor_16x8_sse2 void aom_dc_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_32x16 aom_dc_predictor_32x16_c +void aom_dc_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_32x32 aom_dc_predictor_32x32_sse2 +void aom_dc_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_predictor_4x4 aom_dc_predictor_4x4_sse2 void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_4x8 aom_dc_predictor_4x8_c +void aom_dc_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_4x8 aom_dc_predictor_4x8_sse2 void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_8x16 aom_dc_predictor_8x16_c +void aom_dc_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_8x16 aom_dc_predictor_8x16_sse2 void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_8x4 aom_dc_predictor_8x4_c +void aom_dc_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_8x4 aom_dc_predictor_8x4_sse2 void aom_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -423,33 +472,41 @@ void aom_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uin #define aom_dc_top_predictor_16x16 aom_dc_top_predictor_16x16_sse2 void aom_dc_top_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_c +void aom_dc_top_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_sse2 void aom_dc_top_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_c +void aom_dc_top_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_sse2 void aom_dc_top_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c void aom_dc_top_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_32x16 aom_dc_top_predictor_32x16_c +void aom_dc_top_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_top_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_32x32 aom_dc_top_predictor_32x32_sse2 +void aom_dc_top_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_top_predictor_4x4 aom_dc_top_predictor_4x4_sse2 void aom_dc_top_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_c +void aom_dc_top_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_sse2 void aom_dc_top_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_c +void aom_dc_top_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_sse2 void aom_dc_top_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_8x4 aom_dc_top_predictor_8x4_c +void aom_dc_top_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_8x4 aom_dc_top_predictor_8x4_sse2 void aom_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -503,33 +560,40 @@ void aom_h_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t #define aom_h_predictor_16x16 aom_h_predictor_16x16_sse2 void aom_h_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_16x32 aom_h_predictor_16x32_c +void aom_h_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_16x32 aom_h_predictor_16x32_sse2 void aom_h_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_16x8 aom_h_predictor_16x8_c +void aom_h_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_16x8 aom_h_predictor_16x8_sse2 void aom_h_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_h_predictor_2x2 aom_h_predictor_2x2_c void aom_h_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_32x16 aom_h_predictor_32x16_c +void aom_h_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_32x16 aom_h_predictor_32x16_sse2 void aom_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_32x32 aom_h_predictor_32x32_sse2 +void aom_h_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_h_predictor_4x4 aom_h_predictor_4x4_sse2 void aom_h_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_4x8 aom_h_predictor_4x8_c +void aom_h_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_4x8 aom_h_predictor_4x8_sse2 void aom_h_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_8x16 aom_h_predictor_8x16_c +void aom_h_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_8x16 aom_h_predictor_8x16_sse2 void aom_h_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_8x4 aom_h_predictor_8x4_c +void aom_h_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_8x4 aom_h_predictor_8x4_sse2 void aom_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -1455,6 +1519,26 @@ void aom_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t void aom_highbd_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); RTCD_EXTERN void (*aom_highbd_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src aom_highbd_convolve8_add_src_sse2 + +void aom_highbd_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_hip_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +RTCD_EXTERN void (*aom_highbd_convolve8_add_src_hip)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); + +void aom_highbd_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz aom_highbd_convolve8_add_src_horiz_c + +void aom_highbd_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz_hip aom_highbd_convolve8_add_src_horiz_hip_c + +void aom_highbd_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert aom_highbd_convolve8_add_src_vert_c + +void aom_highbd_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert_hip aom_highbd_convolve8_add_src_vert_hip_c + void aom_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_convolve8_avg_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); @@ -1491,7 +1575,8 @@ void aom_highbd_convolve_copy_avx2(const uint8_t *src, ptrdiff_t src_stride, uin RTCD_EXTERN void (*aom_highbd_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_d117_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_16x16 aom_highbd_d117_predictor_16x16_c +void aom_highbd_d117_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d117_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d117_predictor_16x32 aom_highbd_d117_predictor_16x32_c @@ -1506,10 +1591,12 @@ void aom_highbd_d117_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d117_predictor_32x16 aom_highbd_d117_predictor_32x16_c void aom_highbd_d117_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_32x32 aom_highbd_d117_predictor_32x32_c +void aom_highbd_d117_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d117_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_4x4 aom_highbd_d117_predictor_4x4_c +void aom_highbd_d117_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d117_predictor_4x4 aom_highbd_d117_predictor_4x4_sse2 void aom_highbd_d117_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d117_predictor_4x8 aom_highbd_d117_predictor_4x8_c @@ -1521,10 +1608,12 @@ void aom_highbd_d117_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d117_predictor_8x4 aom_highbd_d117_predictor_8x4_c void aom_highbd_d117_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_8x8 aom_highbd_d117_predictor_8x8_c +void aom_highbd_d117_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_16x16 aom_highbd_d135_predictor_16x16_c +void aom_highbd_d135_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d135_predictor_16x32 aom_highbd_d135_predictor_16x32_c @@ -1539,10 +1628,12 @@ void aom_highbd_d135_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d135_predictor_32x16 aom_highbd_d135_predictor_32x16_c void aom_highbd_d135_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_32x32 aom_highbd_d135_predictor_32x32_c +void aom_highbd_d135_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_4x4 aom_highbd_d135_predictor_4x4_c +void aom_highbd_d135_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d135_predictor_4x4 aom_highbd_d135_predictor_4x4_sse2 void aom_highbd_d135_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d135_predictor_4x8 aom_highbd_d135_predictor_4x8_c @@ -1554,10 +1645,12 @@ void aom_highbd_d135_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d135_predictor_8x4 aom_highbd_d135_predictor_8x4_c void aom_highbd_d135_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_8x8 aom_highbd_d135_predictor_8x8_c +void aom_highbd_d135_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_16x16 aom_highbd_d153_predictor_16x16_c +void aom_highbd_d153_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d153_predictor_16x32 aom_highbd_d153_predictor_16x32_c @@ -1572,10 +1665,12 @@ void aom_highbd_d153_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d153_predictor_32x16 aom_highbd_d153_predictor_32x16_c void aom_highbd_d153_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_32x32 aom_highbd_d153_predictor_32x32_c +void aom_highbd_d153_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_4x4 aom_highbd_d153_predictor_4x4_c +void aom_highbd_d153_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d153_predictor_4x4 aom_highbd_d153_predictor_4x4_sse2 void aom_highbd_d153_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d153_predictor_4x8 aom_highbd_d153_predictor_4x8_c @@ -1587,7 +1682,8 @@ void aom_highbd_d153_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d153_predictor_8x4 aom_highbd_d153_predictor_8x4_c void aom_highbd_d153_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_8x8 aom_highbd_d153_predictor_8x8_c +void aom_highbd_d153_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d207e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d207e_predictor_16x16 aom_highbd_d207e_predictor_16x16_c @@ -1623,37 +1719,47 @@ void aom_highbd_d207e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const u #define aom_highbd_d207e_predictor_8x8 aom_highbd_d207e_predictor_8x8_c void aom_highbd_d45e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x16 aom_highbd_d45e_predictor_16x16_c +void aom_highbd_d45e_predictor_16x16_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x32 aom_highbd_d45e_predictor_16x32_c +void aom_highbd_d45e_predictor_16x32_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x8 aom_highbd_d45e_predictor_16x8_c +void aom_highbd_d45e_predictor_16x8_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d45e_predictor_2x2 aom_highbd_d45e_predictor_2x2_c void aom_highbd_d45e_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_32x16 aom_highbd_d45e_predictor_32x16_c +void aom_highbd_d45e_predictor_32x16_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_32x32 aom_highbd_d45e_predictor_32x32_c +void aom_highbd_d45e_predictor_32x32_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_4x4 aom_highbd_d45e_predictor_4x4_c +void aom_highbd_d45e_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_4x4 aom_highbd_d45e_predictor_4x4_sse2 void aom_highbd_d45e_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_4x8 aom_highbd_d45e_predictor_4x8_c +void aom_highbd_d45e_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_4x8 aom_highbd_d45e_predictor_4x8_sse2 void aom_highbd_d45e_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x16 aom_highbd_d45e_predictor_8x16_c +void aom_highbd_d45e_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_8x16 aom_highbd_d45e_predictor_8x16_sse2 void aom_highbd_d45e_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x4 aom_highbd_d45e_predictor_8x4_c +void aom_highbd_d45e_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_8x4 aom_highbd_d45e_predictor_8x4_sse2 void aom_highbd_d45e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x8 aom_highbd_d45e_predictor_8x8_c +void aom_highbd_d45e_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_8x8 aom_highbd_d45e_predictor_8x8_sse2 void aom_highbd_d63e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d63e_predictor_16x16 aom_highbd_d63e_predictor_16x16_c @@ -1689,86 +1795,109 @@ void aom_highbd_d63e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d63e_predictor_8x8 aom_highbd_d63e_predictor_8x8_c void aom_highbd_dc_128_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x16 aom_highbd_dc_128_predictor_16x16_c +void aom_highbd_dc_128_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_16x16 aom_highbd_dc_128_predictor_16x16_sse2 void aom_highbd_dc_128_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x32 aom_highbd_dc_128_predictor_16x32_c +void aom_highbd_dc_128_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_16x32 aom_highbd_dc_128_predictor_16x32_sse2 void aom_highbd_dc_128_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x8 aom_highbd_dc_128_predictor_16x8_c +void aom_highbd_dc_128_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_16x8 aom_highbd_dc_128_predictor_16x8_sse2 void aom_highbd_dc_128_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_128_predictor_2x2 aom_highbd_dc_128_predictor_2x2_c void aom_highbd_dc_128_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_32x16 aom_highbd_dc_128_predictor_32x16_c +void aom_highbd_dc_128_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_32x16 aom_highbd_dc_128_predictor_32x16_sse2 void aom_highbd_dc_128_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_32x32 aom_highbd_dc_128_predictor_32x32_c +void aom_highbd_dc_128_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_32x32 aom_highbd_dc_128_predictor_32x32_sse2 void aom_highbd_dc_128_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_4x4 aom_highbd_dc_128_predictor_4x4_c +void aom_highbd_dc_128_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_4x4 aom_highbd_dc_128_predictor_4x4_sse2 void aom_highbd_dc_128_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_4x8 aom_highbd_dc_128_predictor_4x8_c +void aom_highbd_dc_128_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_4x8 aom_highbd_dc_128_predictor_4x8_sse2 void aom_highbd_dc_128_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x16 aom_highbd_dc_128_predictor_8x16_c +void aom_highbd_dc_128_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_8x16 aom_highbd_dc_128_predictor_8x16_sse2 void aom_highbd_dc_128_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x4 aom_highbd_dc_128_predictor_8x4_c +void aom_highbd_dc_128_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_8x4 aom_highbd_dc_128_predictor_8x4_sse2 void aom_highbd_dc_128_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x8 aom_highbd_dc_128_predictor_8x8_c +void aom_highbd_dc_128_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_8x8 aom_highbd_dc_128_predictor_8x8_sse2 void aom_highbd_dc_left_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x16 aom_highbd_dc_left_predictor_16x16_c +void aom_highbd_dc_left_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_16x16 aom_highbd_dc_left_predictor_16x16_sse2 void aom_highbd_dc_left_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x32 aom_highbd_dc_left_predictor_16x32_c +void aom_highbd_dc_left_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_16x32 aom_highbd_dc_left_predictor_16x32_sse2 void aom_highbd_dc_left_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x8 aom_highbd_dc_left_predictor_16x8_c +void aom_highbd_dc_left_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_16x8 aom_highbd_dc_left_predictor_16x8_sse2 void aom_highbd_dc_left_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_left_predictor_2x2 aom_highbd_dc_left_predictor_2x2_c void aom_highbd_dc_left_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_32x16 aom_highbd_dc_left_predictor_32x16_c +void aom_highbd_dc_left_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_32x16 aom_highbd_dc_left_predictor_32x16_sse2 void aom_highbd_dc_left_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_32x32 aom_highbd_dc_left_predictor_32x32_c +void aom_highbd_dc_left_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_32x32 aom_highbd_dc_left_predictor_32x32_sse2 void aom_highbd_dc_left_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_4x4 aom_highbd_dc_left_predictor_4x4_c +void aom_highbd_dc_left_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_4x4 aom_highbd_dc_left_predictor_4x4_sse2 void aom_highbd_dc_left_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_4x8 aom_highbd_dc_left_predictor_4x8_c +void aom_highbd_dc_left_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_4x8 aom_highbd_dc_left_predictor_4x8_sse2 void aom_highbd_dc_left_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x16 aom_highbd_dc_left_predictor_8x16_c +void aom_highbd_dc_left_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_8x16 aom_highbd_dc_left_predictor_8x16_sse2 void aom_highbd_dc_left_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x4 aom_highbd_dc_left_predictor_8x4_c +void aom_highbd_dc_left_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_8x4 aom_highbd_dc_left_predictor_8x4_sse2 void aom_highbd_dc_left_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x8 aom_highbd_dc_left_predictor_8x8_c +void aom_highbd_dc_left_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_8x8 aom_highbd_dc_left_predictor_8x8_sse2 void aom_highbd_dc_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_predictor_16x16 aom_highbd_dc_predictor_16x16_sse2 void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_16x32 aom_highbd_dc_predictor_16x32_c +void aom_highbd_dc_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_16x32 aom_highbd_dc_predictor_16x32_sse2 void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_16x8 aom_highbd_dc_predictor_16x8_c +void aom_highbd_dc_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_16x8 aom_highbd_dc_predictor_16x8_sse2 void aom_highbd_dc_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_predictor_2x2 aom_highbd_dc_predictor_2x2_c void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_32x16 aom_highbd_dc_predictor_32x16_c +void aom_highbd_dc_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_32x16 aom_highbd_dc_predictor_32x16_sse2 void aom_highbd_dc_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -1779,50 +1908,63 @@ void aom_highbd_dc_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const u #define aom_highbd_dc_predictor_4x4 aom_highbd_dc_predictor_4x4_sse2 void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_4x8 aom_highbd_dc_predictor_4x8_c +void aom_highbd_dc_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_4x8 aom_highbd_dc_predictor_4x8_sse2 void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_8x16 aom_highbd_dc_predictor_8x16_c +void aom_highbd_dc_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_8x16 aom_highbd_dc_predictor_8x16_sse2 void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_8x4 aom_highbd_dc_predictor_8x4_c +void aom_highbd_dc_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_8x4 aom_highbd_dc_predictor_8x4_sse2 void aom_highbd_dc_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_predictor_8x8 aom_highbd_dc_predictor_8x8_sse2 void aom_highbd_dc_top_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x16 aom_highbd_dc_top_predictor_16x16_c +void aom_highbd_dc_top_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_16x16 aom_highbd_dc_top_predictor_16x16_sse2 void aom_highbd_dc_top_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x32 aom_highbd_dc_top_predictor_16x32_c +void aom_highbd_dc_top_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_16x32 aom_highbd_dc_top_predictor_16x32_sse2 void aom_highbd_dc_top_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x8 aom_highbd_dc_top_predictor_16x8_c +void aom_highbd_dc_top_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_16x8 aom_highbd_dc_top_predictor_16x8_sse2 void aom_highbd_dc_top_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_top_predictor_2x2 aom_highbd_dc_top_predictor_2x2_c void aom_highbd_dc_top_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_32x16 aom_highbd_dc_top_predictor_32x16_c +void aom_highbd_dc_top_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_32x16 aom_highbd_dc_top_predictor_32x16_sse2 void aom_highbd_dc_top_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_32x32 aom_highbd_dc_top_predictor_32x32_c +void aom_highbd_dc_top_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_32x32 aom_highbd_dc_top_predictor_32x32_sse2 void aom_highbd_dc_top_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_4x4 aom_highbd_dc_top_predictor_4x4_c +void aom_highbd_dc_top_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_4x4 aom_highbd_dc_top_predictor_4x4_sse2 void aom_highbd_dc_top_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_4x8 aom_highbd_dc_top_predictor_4x8_c +void aom_highbd_dc_top_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_4x8 aom_highbd_dc_top_predictor_4x8_sse2 void aom_highbd_dc_top_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x16 aom_highbd_dc_top_predictor_8x16_c +void aom_highbd_dc_top_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_8x16 aom_highbd_dc_top_predictor_8x16_sse2 void aom_highbd_dc_top_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x4 aom_highbd_dc_top_predictor_8x4_c +void aom_highbd_dc_top_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_8x4 aom_highbd_dc_top_predictor_8x4_sse2 void aom_highbd_dc_top_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x8 aom_highbd_dc_top_predictor_8x8_c +void aom_highbd_dc_top_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_8x8 aom_highbd_dc_top_predictor_8x8_sse2 void aom_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride); void aom_highbd_fdct16x16_sse2(const int16_t *input, tran_low_t *output, int stride); @@ -1845,37 +1987,47 @@ void aom_highbd_fdct8x8_sse2(const int16_t *input, tran_low_t *output, int strid #define aom_highbd_fdct8x8 aom_highbd_fdct8x8_sse2 void aom_highbd_h_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x16 aom_highbd_h_predictor_16x16_c +void aom_highbd_h_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_16x16 aom_highbd_h_predictor_16x16_sse2 void aom_highbd_h_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x32 aom_highbd_h_predictor_16x32_c +void aom_highbd_h_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_16x32 aom_highbd_h_predictor_16x32_sse2 void aom_highbd_h_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x8 aom_highbd_h_predictor_16x8_c +void aom_highbd_h_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_16x8 aom_highbd_h_predictor_16x8_sse2 void aom_highbd_h_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_h_predictor_2x2 aom_highbd_h_predictor_2x2_c void aom_highbd_h_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_32x16 aom_highbd_h_predictor_32x16_c +void aom_highbd_h_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_32x16 aom_highbd_h_predictor_32x16_sse2 void aom_highbd_h_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_32x32 aom_highbd_h_predictor_32x32_c +void aom_highbd_h_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_32x32 aom_highbd_h_predictor_32x32_sse2 void aom_highbd_h_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_4x4 aom_highbd_h_predictor_4x4_c +void aom_highbd_h_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_4x4 aom_highbd_h_predictor_4x4_sse2 void aom_highbd_h_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_4x8 aom_highbd_h_predictor_4x8_c +void aom_highbd_h_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_4x8 aom_highbd_h_predictor_4x8_sse2 void aom_highbd_h_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x16 aom_highbd_h_predictor_8x16_c +void aom_highbd_h_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_8x16 aom_highbd_h_predictor_8x16_sse2 void aom_highbd_h_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x4 aom_highbd_h_predictor_8x4_c +void aom_highbd_h_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_8x4 aom_highbd_h_predictor_8x4_sse2 void aom_highbd_h_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x8 aom_highbd_h_predictor_8x8_c +void aom_highbd_h_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_8x8 aom_highbd_h_predictor_8x8_sse2 void aom_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); #define aom_highbd_iwht4x4_16_add aom_highbd_iwht4x4_16_add_c @@ -1889,7 +2041,8 @@ void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int pitch, const uint8_t *bli void aom_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); -#define aom_highbd_lpf_horizontal_4_dual aom_highbd_lpf_horizontal_4_dual_sse2 +void aom_highbd_lpf_horizontal_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_horizontal_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1897,11 +2050,13 @@ void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int pitch, const uint8_t *bli void aom_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); -#define aom_highbd_lpf_horizontal_8_dual aom_highbd_lpf_horizontal_8_dual_sse2 +void aom_highbd_lpf_horizontal_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_horizontal_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); -#define aom_highbd_lpf_horizontal_edge_16 aom_highbd_lpf_horizontal_edge_16_sse2 +void aom_highbd_lpf_horizontal_edge_16_avx2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_horizontal_edge_16)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1913,7 +2068,8 @@ void aom_highbd_lpf_vertical_16_sse2(uint16_t *s, int pitch, const uint8_t *blim void aom_highbd_lpf_vertical_16_dual_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); -#define aom_highbd_lpf_vertical_16_dual aom_highbd_lpf_vertical_16_dual_sse2 +void aom_highbd_lpf_vertical_16_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_vertical_16_dual)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_4_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1921,7 +2077,8 @@ void aom_highbd_lpf_vertical_4_sse2(uint16_t *s, int pitch, const uint8_t *blimi void aom_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); -#define aom_highbd_lpf_vertical_4_dual aom_highbd_lpf_vertical_4_dual_sse2 +void aom_highbd_lpf_vertical_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_vertical_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1929,7 +2086,8 @@ void aom_highbd_lpf_vertical_8_sse2(uint16_t *s, int pitch, const uint8_t *blimi void aom_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); -#define aom_highbd_lpf_vertical_8_dual aom_highbd_lpf_vertical_8_dual_sse2 +void aom_highbd_lpf_vertical_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_vertical_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); unsigned int aom_highbd_masked_sad16x16_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); unsigned int aom_highbd_masked_sad16x16_ssse3(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); @@ -2510,16 +2668,19 @@ void aom_highbd_v_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_v_predictor_16x16 aom_highbd_v_predictor_16x16_sse2 void aom_highbd_v_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_16x32 aom_highbd_v_predictor_16x32_c +void aom_highbd_v_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_16x32 aom_highbd_v_predictor_16x32_sse2 void aom_highbd_v_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_16x8 aom_highbd_v_predictor_16x8_c +void aom_highbd_v_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_16x8 aom_highbd_v_predictor_16x8_sse2 void aom_highbd_v_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_v_predictor_2x2 aom_highbd_v_predictor_2x2_c void aom_highbd_v_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_32x16 aom_highbd_v_predictor_32x16_c +void aom_highbd_v_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_32x16 aom_highbd_v_predictor_32x16_sse2 void aom_highbd_v_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -2530,13 +2691,16 @@ void aom_highbd_v_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_v_predictor_4x4 aom_highbd_v_predictor_4x4_sse2 void aom_highbd_v_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_4x8 aom_highbd_v_predictor_4x8_c +void aom_highbd_v_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_4x8 aom_highbd_v_predictor_4x8_sse2 void aom_highbd_v_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_8x16 aom_highbd_v_predictor_8x16_c +void aom_highbd_v_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_8x16 aom_highbd_v_predictor_8x16_sse2 void aom_highbd_v_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_8x4 aom_highbd_v_predictor_8x4_c +void aom_highbd_v_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_8x4 aom_highbd_v_predictor_8x4_sse2 void aom_highbd_v_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -2626,50 +2790,43 @@ void aom_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, con #define aom_lpf_horizontal_4 aom_lpf_horizontal_4_sse2 void aom_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define aom_lpf_horizontal_4_dual aom_lpf_horizontal_4_dual_sse2 +#define aom_lpf_horizontal_4_dual aom_lpf_horizontal_4_dual_c void aom_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); #define aom_lpf_horizontal_8 aom_lpf_horizontal_8_sse2 void aom_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_horizontal_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define aom_lpf_horizontal_8_dual aom_lpf_horizontal_8_dual_sse2 +#define aom_lpf_horizontal_8_dual aom_lpf_horizontal_8_dual_c void aom_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_edge_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_edge_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_horizontal_edge_16 aom_lpf_horizontal_edge_16_sse2 void aom_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_edge_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_edge_8_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_horizontal_edge_8 aom_lpf_horizontal_edge_8_sse2 void aom_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); #define aom_lpf_vertical_16 aom_lpf_vertical_16_sse2 void aom_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_vertical_16_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define aom_lpf_vertical_16_dual aom_lpf_vertical_16_dual_sse2 +#define aom_lpf_vertical_16_dual aom_lpf_vertical_16_dual_c void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); #define aom_lpf_vertical_4 aom_lpf_vertical_4_sse2 void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define aom_lpf_vertical_4_dual aom_lpf_vertical_4_dual_sse2 +#define aom_lpf_vertical_4_dual aom_lpf_vertical_4_dual_c void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); #define aom_lpf_vertical_8 aom_lpf_vertical_8_sse2 void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define aom_lpf_vertical_8_dual aom_lpf_vertical_8_dual_sse2 +#define aom_lpf_vertical_8_dual aom_lpf_vertical_8_dual_c unsigned int aom_masked_sad16x16_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); unsigned int aom_masked_sad16x16_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); @@ -2940,37 +3097,52 @@ unsigned int aom_obmc_variance8x8_sse4_1(const uint8_t *pre, int pre_stride, con RTCD_EXTERN unsigned int (*aom_obmc_variance8x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); void aom_paeth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_c +void aom_paeth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_c +void aom_paeth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_c +void aom_paeth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x8_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c void aom_paeth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_c +void aom_paeth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_c +void aom_paeth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_c +void aom_paeth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_c +void aom_paeth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_c +void aom_paeth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_c +void aom_paeth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_c +void aom_paeth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void aom_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -3273,37 +3445,47 @@ void aom_smooth_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_ #define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_c void aom_smooth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x16 aom_smooth_predictor_16x16_c +void aom_smooth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x32 aom_smooth_predictor_16x32_c +void aom_smooth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x8 aom_smooth_predictor_16x8_c +void aom_smooth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c void aom_smooth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_32x16 aom_smooth_predictor_32x16_c +void aom_smooth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_32x32 aom_smooth_predictor_32x32_c +void aom_smooth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_4x4 aom_smooth_predictor_4x4_c +void aom_smooth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_4x8 aom_smooth_predictor_4x8_c +void aom_smooth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x16 aom_smooth_predictor_8x16_c +void aom_smooth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x4 aom_smooth_predictor_8x4_c +void aom_smooth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x8 aom_smooth_predictor_8x8_c +void aom_smooth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_c @@ -3493,33 +3675,41 @@ void aom_v_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t #define aom_v_predictor_16x16 aom_v_predictor_16x16_sse2 void aom_v_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_16x32 aom_v_predictor_16x32_c +void aom_v_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_16x32 aom_v_predictor_16x32_sse2 void aom_v_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_16x8 aom_v_predictor_16x8_c +void aom_v_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_16x8 aom_v_predictor_16x8_sse2 void aom_v_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_v_predictor_2x2 aom_v_predictor_2x2_c void aom_v_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_32x16 aom_v_predictor_32x16_c +void aom_v_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_v_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_32x32 aom_v_predictor_32x32_sse2 +void aom_v_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_v_predictor_4x4 aom_v_predictor_4x4_sse2 void aom_v_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_4x8 aom_v_predictor_4x8_c +void aom_v_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_4x8 aom_v_predictor_4x8_sse2 void aom_v_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_8x16 aom_v_predictor_8x16_c +void aom_v_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_8x16 aom_v_predictor_8x16_sse2 void aom_v_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_8x4 aom_v_predictor_8x4_c +void aom_v_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_8x4 aom_v_predictor_8x4_sse2 void aom_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -3626,6 +3816,12 @@ static void setup_rtcd_internal(void) aom_convolve8 = aom_convolve8_sse2; if (flags & HAS_SSSE3) aom_convolve8 = aom_convolve8_ssse3; if (flags & HAS_AVX2) aom_convolve8 = aom_convolve8_avx2; + aom_convolve8_add_src = aom_convolve8_add_src_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src = aom_convolve8_add_src_ssse3; + aom_convolve8_add_src_horiz = aom_convolve8_add_src_horiz_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src_horiz = aom_convolve8_add_src_horiz_ssse3; + aom_convolve8_add_src_vert = aom_convolve8_add_src_vert_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src_vert = aom_convolve8_add_src_vert_ssse3; aom_convolve8_avg = aom_convolve8_avg_sse2; if (flags & HAS_SSSE3) aom_convolve8_avg = aom_convolve8_avg_ssse3; aom_convolve8_avg_horiz = aom_convolve8_avg_horiz_sse2; @@ -3648,6 +3844,22 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_d153_predictor_8x8 = aom_d153_predictor_8x8_ssse3; aom_d63e_predictor_4x4 = aom_d63e_predictor_4x4_c; if (flags & HAS_SSSE3) aom_d63e_predictor_4x4 = aom_d63e_predictor_4x4_ssse3; + aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_avx2; + aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_avx2; + aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_avx2; + aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_avx2; + aom_dc_predictor_32x16 = aom_dc_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_predictor_32x16 = aom_dc_predictor_32x16_avx2; + aom_dc_predictor_32x32 = aom_dc_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_predictor_32x32 = aom_dc_predictor_32x32_avx2; + aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_avx2; + aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_avx2; aom_fdct32x32 = aom_fdct32x32_sse2; if (flags & HAS_AVX2) aom_fdct32x32 = aom_fdct32x32_avx2; aom_fdct32x32_rd = aom_fdct32x32_rd_sse2; @@ -3656,6 +3868,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_fdct8x8 = aom_fdct8x8_ssse3; aom_get16x16var = aom_get16x16var_sse2; if (flags & HAS_AVX2) aom_get16x16var = aom_get16x16var_avx2; + aom_h_predictor_32x32 = aom_h_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_h_predictor_32x32 = aom_h_predictor_32x32_avx2; aom_hadamard_8x8 = aom_hadamard_8x8_sse2; if (flags & HAS_SSSE3) aom_hadamard_8x8 = aom_hadamard_8x8_ssse3; aom_highbd_10_masked_sub_pixel_variance16x16 = aom_highbd_10_masked_sub_pixel_variance16x16_c; @@ -3814,6 +4028,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) aom_highbd_blend_a64_vmask = aom_highbd_blend_a64_vmask_sse4_1; aom_highbd_convolve8 = aom_highbd_convolve8_sse2; if (flags & HAS_AVX2) aom_highbd_convolve8 = aom_highbd_convolve8_avx2; + aom_highbd_convolve8_add_src_hip = aom_highbd_convolve8_add_src_hip_c; + if (flags & HAS_SSSE3) aom_highbd_convolve8_add_src_hip = aom_highbd_convolve8_add_src_hip_ssse3; aom_highbd_convolve8_avg = aom_highbd_convolve8_avg_sse2; if (flags & HAS_AVX2) aom_highbd_convolve8_avg = aom_highbd_convolve8_avg_avx2; aom_highbd_convolve8_avg_horiz = aom_highbd_convolve8_avg_horiz_sse2; @@ -3828,6 +4044,46 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) aom_highbd_convolve_avg = aom_highbd_convolve_avg_avx2; aom_highbd_convolve_copy = aom_highbd_convolve_copy_sse2; if (flags & HAS_AVX2) aom_highbd_convolve_copy = aom_highbd_convolve_copy_avx2; + aom_highbd_d117_predictor_16x16 = aom_highbd_d117_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_16x16 = aom_highbd_d117_predictor_16x16_ssse3; + aom_highbd_d117_predictor_32x32 = aom_highbd_d117_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_32x32 = aom_highbd_d117_predictor_32x32_ssse3; + aom_highbd_d117_predictor_8x8 = aom_highbd_d117_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_8x8 = aom_highbd_d117_predictor_8x8_ssse3; + aom_highbd_d135_predictor_16x16 = aom_highbd_d135_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_16x16 = aom_highbd_d135_predictor_16x16_ssse3; + aom_highbd_d135_predictor_32x32 = aom_highbd_d135_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_32x32 = aom_highbd_d135_predictor_32x32_ssse3; + aom_highbd_d135_predictor_8x8 = aom_highbd_d135_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_8x8 = aom_highbd_d135_predictor_8x8_ssse3; + aom_highbd_d153_predictor_16x16 = aom_highbd_d153_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_16x16 = aom_highbd_d153_predictor_16x16_ssse3; + aom_highbd_d153_predictor_32x32 = aom_highbd_d153_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_32x32 = aom_highbd_d153_predictor_32x32_ssse3; + aom_highbd_d153_predictor_8x8 = aom_highbd_d153_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_8x8 = aom_highbd_d153_predictor_8x8_ssse3; + aom_highbd_d45e_predictor_16x16 = aom_highbd_d45e_predictor_16x16_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x16 = aom_highbd_d45e_predictor_16x16_avx2; + aom_highbd_d45e_predictor_16x32 = aom_highbd_d45e_predictor_16x32_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x32 = aom_highbd_d45e_predictor_16x32_avx2; + aom_highbd_d45e_predictor_16x8 = aom_highbd_d45e_predictor_16x8_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x8 = aom_highbd_d45e_predictor_16x8_avx2; + aom_highbd_d45e_predictor_32x16 = aom_highbd_d45e_predictor_32x16_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_32x16 = aom_highbd_d45e_predictor_32x16_avx2; + aom_highbd_d45e_predictor_32x32 = aom_highbd_d45e_predictor_32x32_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_32x32 = aom_highbd_d45e_predictor_32x32_avx2; + aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_avx2; + aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_avx2; + aom_highbd_lpf_horizontal_edge_16 = aom_highbd_lpf_horizontal_edge_16_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_edge_16 = aom_highbd_lpf_horizontal_edge_16_avx2; + aom_highbd_lpf_vertical_16_dual = aom_highbd_lpf_vertical_16_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_16_dual = aom_highbd_lpf_vertical_16_dual_avx2; + aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_avx2; + aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_avx2; aom_highbd_masked_sad16x16 = aom_highbd_masked_sad16x16_c; if (flags & HAS_SSSE3) aom_highbd_masked_sad16x16 = aom_highbd_masked_sad16x16_ssse3; aom_highbd_masked_sad16x32 = aom_highbd_masked_sad16x32_c; @@ -3979,10 +4235,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_idct8x8_12_add = aom_idct8x8_12_add_ssse3; aom_idct8x8_64_add = aom_idct8x8_64_add_sse2; if (flags & HAS_SSSE3) aom_idct8x8_64_add = aom_idct8x8_64_add_ssse3; - aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_sse2; - if (flags & HAS_AVX2) aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_avx2; - aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_sse2; - if (flags & HAS_AVX2) aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_avx2; aom_masked_sad16x16 = aom_masked_sad16x16_c; if (flags & HAS_SSSE3) aom_masked_sad16x16 = aom_masked_sad16x16_ssse3; aom_masked_sad16x32 = aom_masked_sad16x32_c; @@ -4089,6 +4341,31 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) aom_obmc_variance8x4 = aom_obmc_variance8x4_sse4_1; aom_obmc_variance8x8 = aom_obmc_variance8x8_c; if (flags & HAS_SSE4_1) aom_obmc_variance8x8 = aom_obmc_variance8x8_sse4_1; + aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_avx2; + aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_avx2; + aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_avx2; + aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_avx2; + aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_avx2; + aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_ssse3; + aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_ssse3; + aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_ssse3; + aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_ssse3; + aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_ssse3; aom_quantize_b = aom_quantize_b_sse2; if (flags & HAS_SSSE3) aom_quantize_b = aom_quantize_b_ssse3; if (flags & HAS_AVX) aom_quantize_b = aom_quantize_b_avx; @@ -4147,6 +4424,26 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) aom_sad8x8x8 = aom_sad8x8x8_sse4_1; aom_scaled_2d = aom_scaled_2d_c; if (flags & HAS_SSSE3) aom_scaled_2d = aom_scaled_2d_ssse3; + aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_ssse3; + aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_ssse3; + aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_ssse3; + aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_ssse3; + aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_ssse3; + aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_ssse3; + aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_ssse3; + aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_ssse3; + aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_ssse3; + aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_ssse3; aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_ssse3; aom_sub_pixel_avg_variance16x32 = aom_sub_pixel_avg_variance16x32_sse2; @@ -4203,6 +4500,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_sub_pixel_variance8x4 = aom_sub_pixel_variance8x4_ssse3; aom_sub_pixel_variance8x8 = aom_sub_pixel_variance8x8_sse2; if (flags & HAS_SSSE3) aom_sub_pixel_variance8x8 = aom_sub_pixel_variance8x8_ssse3; + aom_v_predictor_32x16 = aom_v_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_v_predictor_32x16 = aom_v_predictor_32x16_avx2; + aom_v_predictor_32x32 = aom_v_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_v_predictor_32x32 = aom_v_predictor_32x32_avx2; aom_variance16x16 = aom_variance16x16_sse2; if (flags & HAS_AVX2) aom_variance16x16 = aom_variance16x16_avx2; aom_variance32x16 = aom_variance32x16_sse2; diff --git a/media/libaom/config/linux/x64/av1_rtcd.h b/media/libaom/config/linux/x64/av1_rtcd.h index d4fc99c10..66c0349aa 100644 --- a/media/libaom/config/linux/x64/av1_rtcd.h +++ b/media/libaom/config/linux/x64/av1_rtcd.h @@ -31,44 +31,39 @@ struct search_site_config; struct mv; union int_mv; struct yv12_buffer_config; -typedef uint16_t od_dering_in; #ifdef __cplusplus extern "C" { #endif -void aom_clpf_block_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_sse2(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_ssse3(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_sse4_1(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_block)(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_block_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_sse2(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_ssse3(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_sse4_1(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_block_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_hblock_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_sse2(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_ssse3(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_sse4_1(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_hblock)(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_hblock_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_sse2(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_ssse3(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_sse4_1(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_hblock_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void apply_selfguided_restoration_c(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); +void apply_selfguided_restoration_sse4_1(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); +RTCD_EXTERN void (*apply_selfguided_restoration)(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); + +void apply_selfguided_restoration_highbd_c(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); +void apply_selfguided_restoration_highbd_sse4_1(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); +RTCD_EXTERN void (*apply_selfguided_restoration_highbd)(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); int64_t av1_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); RTCD_EXTERN int64_t (*av1_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params); +void av1_convolve_2d_sse2(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params); +#define av1_convolve_2d av1_convolve_2d_sse2 + +void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); +void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); +RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); + void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); RTCD_EXTERN void (*av1_convolve_horiz)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); +void av1_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); +void av1_convolve_rounding_avx2(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); +RTCD_EXTERN void (*av1_convolve_rounding)(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); + void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); void av1_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); RTCD_EXTERN void (*av1_convolve_vert)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); @@ -76,9 +71,6 @@ RTCD_EXTERN void (*av1_convolve_vert)(const uint8_t *src, int src_stride, uint8_ int av1_diamond_search_sad_c(struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); #define av1_diamond_search_sad av1_diamond_search_sad_c -void av1_fdct8x8_quant_c(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -#define av1_fdct8x8_quant av1_fdct8x8_quant_c - void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); void av1_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); @@ -133,6 +125,14 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, struct t void av1_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht8x8 av1_fht8x8_sse2 +void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength); +void av1_filter_intra_edge_sse4_1(uint8_t *p, int sz, int strength); +RTCD_EXTERN void (*av1_filter_intra_edge)(uint8_t *p, int sz, int strength); + +void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength); +void av1_filter_intra_edge_high_sse4_1(uint16_t *p, int sz, int strength); +RTCD_EXTERN void (*av1_filter_intra_edge_high)(uint16_t *p, int sz, int strength); + int av1_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); #define av1_full_range_search av1_full_range_search_c @@ -141,46 +141,42 @@ int av1_full_search_sadx3(const struct macroblock *x, const struct mv *ref_mv, i int av1_full_search_sadx8(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); RTCD_EXTERN int (*av1_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); -void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type); +void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bsx, int bsy, TX_TYPE tx_type); #define av1_fwd_idtx av1_fwd_idtx_c -void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_16x16)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_16x16)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x32 av1_fwd_txfm2d_16x32_c -void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x8 av1_fwd_txfm2d_16x8_c -void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_32x16 av1_fwd_txfm2d_32x16_c -void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_32x32)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_32x32)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_4x4)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_4x4)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_4x8 av1_fwd_txfm2d_4x8_c -void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_64x64)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); - -void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x16 av1_fwd_txfm2d_8x16_c -void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x4 av1_fwd_txfm2d_8x4_c -void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_8x8)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_8x8)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); #define av1_fwht4x4 av1_fwht4x4_c @@ -213,6 +209,14 @@ void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8 void av1_highbd_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_sse2 +void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); +void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_2d)(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); + +void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); +void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); + void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve_avg av1_highbd_convolve_avg_c @@ -227,6 +231,10 @@ void av1_highbd_convolve_init_c(void); void av1_highbd_convolve_init_sse4_1(void); RTCD_EXTERN void (*av1_highbd_convolve_init)(void); +void av1_highbd_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); +void av1_highbd_convolve_rounding_avx2(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_rounding)(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); + void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); void av1_highbd_convolve_vert_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); RTCD_EXTERN void (*av1_highbd_convolve_vert)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); @@ -273,9 +281,6 @@ void av1_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int dest void av1_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht8x8_64_add av1_highbd_iht8x8_64_add_c -void av1_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); -#define av1_highbd_quantize_b av1_highbd_quantize_b_c - void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); void av1_highbd_quantize_fp_sse4_1(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); void av1_highbd_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); @@ -288,6 +293,14 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); RTCD_EXTERN void (*av1_highbd_warp_affine)(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +void av1_highpass_filter_c(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_highpass_filter_sse4_1(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_highpass_filter)(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + +void av1_highpass_filter_highbd_c(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_highpass_filter_highbd_sse4_1(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_highpass_filter_highbd)(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); void av1_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); void av1_iht16x16_256_add_avx2(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); @@ -340,48 +353,45 @@ void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride void av1_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_iht8x8_64_add av1_iht8x8_64_add_sse2 -void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_16x16)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_16x16)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x32 av1_inv_txfm2d_add_16x32_c -void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x8 av1_inv_txfm2d_add_16x8_c -void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_32x16 av1_inv_txfm2d_add_32x16_c -void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_32x32_avx2(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_32x32)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_32x32_avx2(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_32x32)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_4x8 av1_inv_txfm2d_add_4x8_c -void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -#define av1_inv_txfm2d_add_64x64 av1_inv_txfm2d_add_64x64_c - -void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x16 av1_inv_txfm2d_add_8x16_c -void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x4 av1_inv_txfm2d_add_8x4_c -void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); void av1_lowbd_convolve_init_c(void); void av1_lowbd_convolve_init_ssse3(void); RTCD_EXTERN void (*av1_lowbd_convolve_init)(void); -void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); +void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale); #define av1_quantize_b av1_quantize_b_c void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -393,10 +403,26 @@ void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int void av1_quantize_fp_32x32_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); RTCD_EXTERN void (*av1_quantize_fp_32x32)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void av1_selfguided_restoration_c(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_selfguided_restoration_sse4_1(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_selfguided_restoration)(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + +void av1_selfguided_restoration_highbd_c(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); +void av1_selfguided_restoration_highbd_sse4_1(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); +RTCD_EXTERN void (*av1_selfguided_restoration_highbd)(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); + void av1_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); void av1_temporal_filter_apply_sse2(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); #define av1_temporal_filter_apply av1_temporal_filter_apply_sse2 +void av1_upsample_intra_edge_c(uint8_t *p, int sz); +void av1_upsample_intra_edge_sse4_1(uint8_t *p, int sz); +RTCD_EXTERN void (*av1_upsample_intra_edge)(uint8_t *p, int sz); + +void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd); +void av1_upsample_intra_edge_high_sse4_1(uint16_t *p, int sz, int bd); +RTCD_EXTERN void (*av1_upsample_intra_edge_high)(uint16_t *p, int sz, int bd); + void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); @@ -414,64 +440,38 @@ uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d, con uint64_t av1_wedge_sse_from_residuals_sse2(const int16_t *r1, const int16_t *d, const uint8_t *m, int N); #define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_sse2 +void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_sse2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_ssse3(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_sse4_1(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_avx2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +RTCD_EXTERN void (*cdef_filter_block)(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); + +int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_sse2(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_ssse3(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_sse4_1(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_avx2(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +RTCD_EXTERN int (*cdef_find_dir)(const uint16_t *img, int stride, int32_t *var, int coeff_shift); + double compute_cross_correlation_c(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); double compute_cross_correlation_sse4_1(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); RTCD_EXTERN double (*compute_cross_correlation)(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); -void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_4x4_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_sse2(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_ssse3(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_sse4_1(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_4x4_16bit_to_8bit)(uint8_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_8x8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_sse2(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_ssse3(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_sse4_1(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_8x8_16bit_to_8bit)(uint8_t *dst, int dstride, const uint16_t *src, int sstride); - void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); +void copy_rect8_16bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); RTCD_EXTERN void (*copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); +void copy_rect8_8bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); RTCD_EXTERN void (*copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); -int od_dir_find8_c(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_sse2(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_ssse3(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_sse4_1(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -RTCD_EXTERN int (*od_dir_find8)(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); - -void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_sse2(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_ssse3(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_sse4_1(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -RTCD_EXTERN void (*od_filter_dering_direction_4x4)(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); - -void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_sse2(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_ssse3(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_sse4_1(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -RTCD_EXTERN void (*od_filter_dering_direction_8x8)(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); - void aom_rtcd(void); #ifdef RTCD_C @@ -482,28 +482,28 @@ static void setup_rtcd_internal(void) (void)flags; - aom_clpf_block = aom_clpf_block_sse2; - if (flags & HAS_SSSE3) aom_clpf_block = aom_clpf_block_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_block = aom_clpf_block_sse4_1; - aom_clpf_block_hbd = aom_clpf_block_hbd_sse2; - if (flags & HAS_SSSE3) aom_clpf_block_hbd = aom_clpf_block_hbd_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_block_hbd = aom_clpf_block_hbd_sse4_1; - aom_clpf_hblock = aom_clpf_hblock_sse2; - if (flags & HAS_SSSE3) aom_clpf_hblock = aom_clpf_hblock_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_hblock = aom_clpf_hblock_sse4_1; - aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse2; - if (flags & HAS_SSSE3) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse4_1; + apply_selfguided_restoration = apply_selfguided_restoration_c; + if (flags & HAS_SSE4_1) apply_selfguided_restoration = apply_selfguided_restoration_sse4_1; + apply_selfguided_restoration_highbd = apply_selfguided_restoration_highbd_c; + if (flags & HAS_SSE4_1) apply_selfguided_restoration_highbd = apply_selfguided_restoration_highbd_sse4_1; av1_block_error = av1_block_error_c; if (flags & HAS_AVX2) av1_block_error = av1_block_error_avx2; + av1_convolve_2d_scale = av1_convolve_2d_scale_c; + if (flags & HAS_SSE4_1) av1_convolve_2d_scale = av1_convolve_2d_scale_sse4_1; av1_convolve_horiz = av1_convolve_horiz_c; if (flags & HAS_SSSE3) av1_convolve_horiz = av1_convolve_horiz_ssse3; + av1_convolve_rounding = av1_convolve_rounding_c; + if (flags & HAS_AVX2) av1_convolve_rounding = av1_convolve_rounding_avx2; av1_convolve_vert = av1_convolve_vert_c; if (flags & HAS_SSSE3) av1_convolve_vert = av1_convolve_vert_ssse3; av1_fht16x16 = av1_fht16x16_sse2; if (flags & HAS_AVX2) av1_fht16x16 = av1_fht16x16_avx2; av1_fht32x32 = av1_fht32x32_sse2; if (flags & HAS_AVX2) av1_fht32x32 = av1_fht32x32_avx2; + av1_filter_intra_edge = av1_filter_intra_edge_c; + if (flags & HAS_SSE4_1) av1_filter_intra_edge = av1_filter_intra_edge_sse4_1; + av1_filter_intra_edge_high = av1_filter_intra_edge_high_c; + if (flags & HAS_SSE4_1) av1_filter_intra_edge_high = av1_filter_intra_edge_high_sse4_1; av1_full_search_sad = av1_full_search_sad_c; if (flags & HAS_SSE3) av1_full_search_sad = av1_full_search_sadx3; if (flags & HAS_SSE4_1) av1_full_search_sad = av1_full_search_sadx8; @@ -513,14 +513,18 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) av1_fwd_txfm2d_32x32 = av1_fwd_txfm2d_32x32_sse4_1; av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_c; if (flags & HAS_SSE4_1) av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_sse4_1; - av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_c; - if (flags & HAS_SSE4_1) av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_sse4_1; av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_c; if (flags & HAS_SSE4_1) av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_sse4_1; + av1_highbd_convolve_2d = av1_highbd_convolve_2d_c; + if (flags & HAS_SSSE3) av1_highbd_convolve_2d = av1_highbd_convolve_2d_ssse3; + av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_c; + if (flags & HAS_SSE4_1) av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_sse4_1; av1_highbd_convolve_horiz = av1_highbd_convolve_horiz_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_horiz = av1_highbd_convolve_horiz_sse4_1; av1_highbd_convolve_init = av1_highbd_convolve_init_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_init = av1_highbd_convolve_init_sse4_1; + av1_highbd_convolve_rounding = av1_highbd_convolve_rounding_c; + if (flags & HAS_AVX2) av1_highbd_convolve_rounding = av1_highbd_convolve_rounding_avx2; av1_highbd_convolve_vert = av1_highbd_convolve_vert_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_vert = av1_highbd_convolve_vert_sse4_1; av1_highbd_quantize_fp = av1_highbd_quantize_fp_c; @@ -528,6 +532,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) av1_highbd_quantize_fp = av1_highbd_quantize_fp_avx2; av1_highbd_warp_affine = av1_highbd_warp_affine_c; if (flags & HAS_SSSE3) av1_highbd_warp_affine = av1_highbd_warp_affine_ssse3; + av1_highpass_filter = av1_highpass_filter_c; + if (flags & HAS_SSE4_1) av1_highpass_filter = av1_highpass_filter_sse4_1; + av1_highpass_filter_highbd = av1_highpass_filter_highbd_c; + if (flags & HAS_SSE4_1) av1_highpass_filter_highbd = av1_highpass_filter_highbd_sse4_1; av1_iht16x16_256_add = av1_iht16x16_256_add_sse2; if (flags & HAS_AVX2) av1_iht16x16_256_add = av1_iht16x16_256_add_avx2; av1_inv_txfm2d_add_16x16 = av1_inv_txfm2d_add_16x16_c; @@ -544,37 +552,34 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) av1_quantize_fp = av1_quantize_fp_avx2; av1_quantize_fp_32x32 = av1_quantize_fp_32x32_c; if (flags & HAS_AVX2) av1_quantize_fp_32x32 = av1_quantize_fp_32x32_avx2; + av1_selfguided_restoration = av1_selfguided_restoration_c; + if (flags & HAS_SSE4_1) av1_selfguided_restoration = av1_selfguided_restoration_sse4_1; + av1_selfguided_restoration_highbd = av1_selfguided_restoration_highbd_c; + if (flags & HAS_SSE4_1) av1_selfguided_restoration_highbd = av1_selfguided_restoration_highbd_sse4_1; + av1_upsample_intra_edge = av1_upsample_intra_edge_c; + if (flags & HAS_SSE4_1) av1_upsample_intra_edge = av1_upsample_intra_edge_sse4_1; + av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c; + if (flags & HAS_SSE4_1) av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_sse4_1; av1_warp_affine = av1_warp_affine_sse2; if (flags & HAS_SSSE3) av1_warp_affine = av1_warp_affine_ssse3; + cdef_filter_block = cdef_filter_block_sse2; + if (flags & HAS_SSSE3) cdef_filter_block = cdef_filter_block_ssse3; + if (flags & HAS_SSE4_1) cdef_filter_block = cdef_filter_block_sse4_1; + if (flags & HAS_AVX2) cdef_filter_block = cdef_filter_block_avx2; + cdef_find_dir = cdef_find_dir_sse2; + if (flags & HAS_SSSE3) cdef_find_dir = cdef_find_dir_ssse3; + if (flags & HAS_SSE4_1) cdef_find_dir = cdef_find_dir_sse4_1; + if (flags & HAS_AVX2) cdef_find_dir = cdef_find_dir_avx2; compute_cross_correlation = compute_cross_correlation_c; if (flags & HAS_SSE4_1) compute_cross_correlation = compute_cross_correlation_sse4_1; - copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_sse2; - if (flags & HAS_SSSE3) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_ssse3; - if (flags & HAS_SSE4_1) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_sse4_1; - copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_sse2; - if (flags & HAS_SSSE3) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_ssse3; - if (flags & HAS_SSE4_1) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_sse4_1; - copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_sse2; - if (flags & HAS_SSSE3) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_ssse3; - if (flags & HAS_SSE4_1) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_sse4_1; - copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_sse2; - if (flags & HAS_SSSE3) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_ssse3; - if (flags & HAS_SSE4_1) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_sse4_1; copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse2; if (flags & HAS_SSSE3) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_ssse3; if (flags & HAS_SSE4_1) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse4_1; + if (flags & HAS_AVX2) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_avx2; copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse2; if (flags & HAS_SSSE3) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_ssse3; if (flags & HAS_SSE4_1) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse4_1; - od_dir_find8 = od_dir_find8_sse2; - if (flags & HAS_SSSE3) od_dir_find8 = od_dir_find8_ssse3; - if (flags & HAS_SSE4_1) od_dir_find8 = od_dir_find8_sse4_1; - od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_sse2; - if (flags & HAS_SSSE3) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_ssse3; - if (flags & HAS_SSE4_1) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_sse4_1; - od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_sse2; - if (flags & HAS_SSSE3) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_ssse3; - if (flags & HAS_SSE4_1) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_sse4_1; + if (flags & HAS_AVX2) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_avx2; } #endif diff --git a/media/libaom/config/mac/x64/aom_config.asm b/media/libaom/config/mac/x64/aom_config.asm index 2821f150f..455683b8f 100644 --- a/media/libaom/config/mac/x64/aom_config.asm +++ b/media/libaom/config/mac/x64/aom_config.asm @@ -46,8 +46,6 @@ CONFIG_AV1 equ 1 CONFIG_STATIC_MSVCRT equ 0 CONFIG_SPATIAL_RESAMPLING equ 1 CONFIG_REALTIME_ONLY equ 0 -CONFIG_ONTHEFLY_BITPACKING equ 0 -CONFIG_ERROR_CONCEALMENT equ 0 CONFIG_SHARED equ 0 CONFIG_STATIC equ 1 CONFIG_SMALL equ 0 @@ -60,73 +58,71 @@ CONFIG_ACCOUNTING equ 0 CONFIG_INSPECTION equ 0 CONFIG_DECODE_PERF_TESTS equ 0 CONFIG_ENCODE_PERF_TESTS equ 0 +CONFIG_BITSTREAM_DEBUG equ 0 +CONFIG_SYMBOLRATE equ 0 CONFIG_COEFFICIENT_RANGE_CHECKING equ 0 CONFIG_LOWBITDEPTH equ 1 CONFIG_HIGHBITDEPTH equ 1 CONFIG_EXPERIMENTAL equ 0 CONFIG_SIZE_LIMIT equ 1 -CONFIG_COLORSPACE_HEADERS equ 0 CONFIG_FP_MB_STATS equ 0 CONFIG_CDEF equ 1 +CONFIG_CDEF_SINGLEPASS equ 1 CONFIG_VAR_TX equ 1 CONFIG_RECT_TX equ 1 CONFIG_RECT_TX_EXT equ 0 CONFIG_TPL_MV equ 0 CONFIG_DUAL_FILTER equ 1 -CONFIG_CONVOLVE_ROUND equ 0 +CONFIG_CONVOLVE_ROUND equ 1 CONFIG_COMPOUND_ROUND equ 0 CONFIG_EXT_TX equ 1 -CONFIG_DPCM_INTRA equ 0 CONFIG_TX64X64 equ 0 CONFIG_EXT_INTRA equ 1 CONFIG_INTRA_INTERP equ 0 CONFIG_FILTER_INTRA equ 0 -CONFIG_INTRA_EDGE equ 0 +CONFIG_INTRA_EDGE equ 1 CONFIG_INTRABC equ 0 -CONFIG_EXT_INTER equ 1 CONFIG_INTERINTRA equ 1 CONFIG_WEDGE equ 1 CONFIG_COMPOUND_SEGMENT equ 1 CONFIG_EXT_REFS equ 1 -CONFIG_ALTREF2 equ 0 -CONFIG_SPEED_REFS equ 0 -CONFIG_GF_GROUPS equ 0 -CONFIG_FLEX_REFS equ 0 CONFIG_GLOBAL_MOTION equ 1 CONFIG_NEW_QUANT equ 0 CONFIG_SUPERTX equ 0 CONFIG_ANS equ 0 -CONFIG_LOOP_RESTORATION equ 0 +CONFIG_LOOP_RESTORATION equ 1 +CONFIG_STRIPED_LOOP_RESTORATION equ 0 CONFIG_EXT_PARTITION equ 0 CONFIG_EXT_PARTITION_TYPES equ 0 +CONFIG_EXT_PARTITION_TYPES_AB equ 0 CONFIG_UNPOISON_PARTITION_CTX equ 0 CONFIG_EXT_TILE equ 0 CONFIG_MOTION_VAR equ 1 CONFIG_NCOBMC equ 0 CONFIG_WARPED_MOTION equ 1 CONFIG_Q_ADAPT_PROBS equ 0 -CONFIG_BITSTREAM_DEBUG equ 0 CONFIG_INTER_STATS_ONLY equ 0 -CONFIG_ALT_INTRA equ 1 -CONFIG_PALETTE equ 1 CONFIG_PALETTE_DELTA_ENCODING equ 0 CONFIG_RAWBITS equ 0 -CONFIG_EC_SMALLMUL equ 1 +CONFIG_KF_CTX equ 0 CONFIG_PVQ equ 0 CONFIG_CFL equ 0 CONFIG_XIPHRC equ 0 CONFIG_DCT_ONLY equ 0 +CONFIG_DAALA_TX equ 0 CONFIG_DAALA_DCT4 equ 0 CONFIG_DAALA_DCT8 equ 0 +CONFIG_DAALA_DCT16 equ 0 +CONFIG_DAALA_DCT32 equ 0 +CONFIG_DAALA_DCT64 equ 0 CONFIG_CB4X4 equ 1 CONFIG_CHROMA_2X2 equ 0 CONFIG_CHROMA_SUB8X8 equ 1 CONFIG_FRAME_SIZE equ 0 -CONFIG_DELTA_Q equ 1 CONFIG_EXT_DELTA_Q equ 1 CONFIG_ADAPT_SCAN equ 0 -CONFIG_FILTER_7BIT equ 1 CONFIG_PARALLEL_DEBLOCKING equ 1 +CONFIG_DEBLOCK_13TAP equ 0 CONFIG_LOOPFILTERING_ACROSS_TILES equ 1 CONFIG_TEMPMV_SIGNALING equ 1 CONFIG_RD_DEBUG equ 0 @@ -135,29 +131,46 @@ CONFIG_COEF_INTERLEAVE equ 0 CONFIG_ENTROPY_STATS equ 0 CONFIG_MASKED_TX equ 0 CONFIG_DEPENDENT_HORZTILES equ 0 -CONFIG_DIST_8X8 equ 0 -CONFIG_DAALA_DIST equ 0 -CONFIG_TRIPRED equ 0 +CONFIG_DIST_8X8 equ 1 CONFIG_PALETTE_THROUGHPUT equ 1 CONFIG_REF_ADAPT equ 0 CONFIG_LV_MAP equ 0 +CONFIG_CTX1D equ 0 CONFIG_TXK_SEL equ 0 CONFIG_MV_COMPRESS equ 1 +CONFIG_SEGMENT_ZEROMV equ 0 CONFIG_FRAME_SUPERRES equ 0 CONFIG_NEW_MULTISYMBOL equ 0 CONFIG_COMPOUND_SINGLEREF equ 0 -CONFIG_AOM_QM equ 0 +CONFIG_AOM_QM equ 1 CONFIG_ONE_SIDED_COMPOUND equ 1 -CONFIG_EXT_COMP_REFS equ 0 +CONFIG_EXT_COMP_REFS equ 1 CONFIG_SMOOTH_HV equ 1 CONFIG_VAR_REFS equ 0 -CONFIG_RECT_INTRA_PRED equ 1 CONFIG_LGT equ 0 +CONFIG_LGT_FROM_PRED equ 0 CONFIG_SBL_SYMBOL equ 0 CONFIG_NCOBMC_ADAPT_WEIGHT equ 0 CONFIG_BGSPRITE equ 0 CONFIG_VAR_TX_NO_TX_MODE equ 0 CONFIG_MRC_TX equ 0 CONFIG_LPF_DIRECT equ 0 -CONFIG_UV_LVL equ 0 +CONFIG_LOOPFILTER_LEVEL equ 0 +CONFIG_NO_FRAME_CONTEXT_SIGNALING equ 0 +CONFIG_TXMG equ 1 +CONFIG_MAX_TILE equ 0 +CONFIG_HASH_ME equ 0 +CONFIG_COLORSPACE_HEADERS equ 0 +CONFIG_MFMV equ 0 +CONFIG_FRAME_MARKER equ 0 +CONFIG_JNT_COMP equ 0 +CONFIG_FRAME_SIGN_BIAS equ 0 +CONFIG_EXT_SKIP equ 0 +CONFIG_OBU equ 0 +CONFIG_AMVR equ 0 +CONFIG_LPF_SB equ 0 +CONFIG_OPT_REF_MV equ 0 +CONFIG_TMV equ 0 +CONFIG_RESTRICT_COMPRESSED_HDR equ 0 +CONFIG_HORZONLY_FRAME_SUPERRES equ 0 CONFIG_ANALYZER equ 0 diff --git a/media/libaom/config/mac/x64/aom_config.h b/media/libaom/config/mac/x64/aom_config.h index f9f2caadd..f3416f3dd 100644 --- a/media/libaom/config/mac/x64/aom_config.h +++ b/media/libaom/config/mac/x64/aom_config.h @@ -59,8 +59,6 @@ #define CONFIG_STATIC_MSVCRT 0 #define CONFIG_SPATIAL_RESAMPLING 1 #define CONFIG_REALTIME_ONLY 0 -#define CONFIG_ONTHEFLY_BITPACKING 0 -#define CONFIG_ERROR_CONCEALMENT 0 #define CONFIG_SHARED 0 #define CONFIG_STATIC 1 #define CONFIG_SMALL 0 @@ -73,73 +71,71 @@ #define CONFIG_INSPECTION 0 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_ENCODE_PERF_TESTS 0 +#define CONFIG_BITSTREAM_DEBUG 0 +#define CONFIG_SYMBOLRATE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_LOWBITDEPTH 1 #define CONFIG_HIGHBITDEPTH 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_SIZE_LIMIT 1 -#define CONFIG_COLORSPACE_HEADERS 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_CDEF 1 +#define CONFIG_CDEF_SINGLEPASS 1 #define CONFIG_VAR_TX 1 #define CONFIG_RECT_TX 1 #define CONFIG_RECT_TX_EXT 0 #define CONFIG_TPL_MV 0 #define CONFIG_DUAL_FILTER 1 -#define CONFIG_CONVOLVE_ROUND 0 +#define CONFIG_CONVOLVE_ROUND 1 #define CONFIG_COMPOUND_ROUND 0 #define CONFIG_EXT_TX 1 -#define CONFIG_DPCM_INTRA 0 #define CONFIG_TX64X64 0 #define CONFIG_EXT_INTRA 1 #define CONFIG_INTRA_INTERP 0 #define CONFIG_FILTER_INTRA 0 -#define CONFIG_INTRA_EDGE 0 +#define CONFIG_INTRA_EDGE 1 #define CONFIG_INTRABC 0 -#define CONFIG_EXT_INTER 1 #define CONFIG_INTERINTRA 1 #define CONFIG_WEDGE 1 #define CONFIG_COMPOUND_SEGMENT 1 #define CONFIG_EXT_REFS 1 -#define CONFIG_ALTREF2 0 -#define CONFIG_SPEED_REFS 0 -#define CONFIG_GF_GROUPS 0 -#define CONFIG_FLEX_REFS 0 #define CONFIG_GLOBAL_MOTION 1 #define CONFIG_NEW_QUANT 0 #define CONFIG_SUPERTX 0 #define CONFIG_ANS 0 -#define CONFIG_LOOP_RESTORATION 0 +#define CONFIG_LOOP_RESTORATION 1 +#define CONFIG_STRIPED_LOOP_RESTORATION 0 #define CONFIG_EXT_PARTITION 0 #define CONFIG_EXT_PARTITION_TYPES 0 +#define CONFIG_EXT_PARTITION_TYPES_AB 0 #define CONFIG_UNPOISON_PARTITION_CTX 0 #define CONFIG_EXT_TILE 0 #define CONFIG_MOTION_VAR 1 #define CONFIG_NCOBMC 0 #define CONFIG_WARPED_MOTION 1 #define CONFIG_Q_ADAPT_PROBS 0 -#define CONFIG_BITSTREAM_DEBUG 0 #define CONFIG_INTER_STATS_ONLY 0 -#define CONFIG_ALT_INTRA 1 -#define CONFIG_PALETTE 1 #define CONFIG_PALETTE_DELTA_ENCODING 0 #define CONFIG_RAWBITS 0 -#define CONFIG_EC_SMALLMUL 1 +#define CONFIG_KF_CTX 0 #define CONFIG_PVQ 0 #define CONFIG_CFL 0 #define CONFIG_XIPHRC 0 #define CONFIG_DCT_ONLY 0 +#define CONFIG_DAALA_TX 0 #define CONFIG_DAALA_DCT4 0 #define CONFIG_DAALA_DCT8 0 +#define CONFIG_DAALA_DCT16 0 +#define CONFIG_DAALA_DCT32 0 +#define CONFIG_DAALA_DCT64 0 #define CONFIG_CB4X4 1 #define CONFIG_CHROMA_2X2 0 #define CONFIG_CHROMA_SUB8X8 1 #define CONFIG_FRAME_SIZE 0 -#define CONFIG_DELTA_Q 1 #define CONFIG_EXT_DELTA_Q 1 #define CONFIG_ADAPT_SCAN 0 -#define CONFIG_FILTER_7BIT 1 #define CONFIG_PARALLEL_DEBLOCKING 1 +#define CONFIG_DEBLOCK_13TAP 0 #define CONFIG_LOOPFILTERING_ACROSS_TILES 1 #define CONFIG_TEMPMV_SIGNALING 1 #define CONFIG_RD_DEBUG 0 @@ -148,31 +144,48 @@ #define CONFIG_ENTROPY_STATS 0 #define CONFIG_MASKED_TX 0 #define CONFIG_DEPENDENT_HORZTILES 0 -#define CONFIG_DIST_8X8 0 -#define CONFIG_DAALA_DIST 0 -#define CONFIG_TRIPRED 0 +#define CONFIG_DIST_8X8 1 #define CONFIG_PALETTE_THROUGHPUT 1 #define CONFIG_REF_ADAPT 0 #define CONFIG_LV_MAP 0 +#define CONFIG_CTX1D 0 #define CONFIG_TXK_SEL 0 #define CONFIG_MV_COMPRESS 1 +#define CONFIG_SEGMENT_ZEROMV 0 #define CONFIG_FRAME_SUPERRES 0 #define CONFIG_NEW_MULTISYMBOL 0 #define CONFIG_COMPOUND_SINGLEREF 0 -#define CONFIG_AOM_QM 0 +#define CONFIG_AOM_QM 1 #define CONFIG_ONE_SIDED_COMPOUND 1 -#define CONFIG_EXT_COMP_REFS 0 +#define CONFIG_EXT_COMP_REFS 1 #define CONFIG_SMOOTH_HV 1 #define CONFIG_VAR_REFS 0 -#define CONFIG_RECT_INTRA_PRED 1 #define CONFIG_LGT 0 +#define CONFIG_LGT_FROM_PRED 0 #define CONFIG_SBL_SYMBOL 0 #define CONFIG_NCOBMC_ADAPT_WEIGHT 0 #define CONFIG_BGSPRITE 0 #define CONFIG_VAR_TX_NO_TX_MODE 0 #define CONFIG_MRC_TX 0 #define CONFIG_LPF_DIRECT 0 -#define CONFIG_UV_LVL 0 +#define CONFIG_LOOPFILTER_LEVEL 0 +#define CONFIG_NO_FRAME_CONTEXT_SIGNALING 0 +#define CONFIG_TXMG 1 +#define CONFIG_MAX_TILE 0 +#define CONFIG_HASH_ME 0 +#define CONFIG_COLORSPACE_HEADERS 0 +#define CONFIG_MFMV 0 +#define CONFIG_FRAME_MARKER 0 +#define CONFIG_JNT_COMP 0 +#define CONFIG_FRAME_SIGN_BIAS 0 +#define CONFIG_EXT_SKIP 0 +#define CONFIG_OBU 0 +#define CONFIG_AMVR 0 +#define CONFIG_LPF_SB 0 +#define CONFIG_OPT_REF_MV 0 +#define CONFIG_TMV 0 +#define CONFIG_RESTRICT_COMPRESSED_HDR 0 +#define CONFIG_HORZONLY_FRAME_SUPERRES 0 #define CONFIG_ANALYZER 0 #define DECODE_WIDTH_LIMIT 8192 #define DECODE_HEIGHT_LIMIT 4608 diff --git a/media/libaom/config/mac/x64/aom_dsp_rtcd.h b/media/libaom/config/mac/x64/aom_dsp_rtcd.h index 6ee856f1d..d2bee385f 100644 --- a/media/libaom/config/mac/x64/aom_dsp_rtcd.h +++ b/media/libaom/config/mac/x64/aom_dsp_rtcd.h @@ -20,6 +20,9 @@ extern "C" { #endif +void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx); +#define aom_blend_a64_d32_mask aom_blend_a64_d32_mask_c + void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); void aom_blend_a64_hmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); RTCD_EXTERN void (*aom_blend_a64_hmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); @@ -51,6 +54,28 @@ void aom_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, void aom_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*aom_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_hip_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_hip aom_convolve8_add_src_hip_sse2 + +void aom_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_horiz_hip aom_convolve8_add_src_horiz_hip_c + +void aom_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_vert_hip aom_convolve8_add_src_vert_hip_c + void aom_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void aom_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void aom_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -312,33 +337,41 @@ void aom_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uin #define aom_dc_128_predictor_16x16 aom_dc_128_predictor_16x16_sse2 void aom_dc_128_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_c +void aom_dc_128_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_sse2 void aom_dc_128_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_c +void aom_dc_128_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_sse2 void aom_dc_128_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c void aom_dc_128_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_32x16 aom_dc_128_predictor_32x16_c +void aom_dc_128_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_128_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_32x32 aom_dc_128_predictor_32x32_sse2 +void aom_dc_128_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_128_predictor_4x4 aom_dc_128_predictor_4x4_sse2 void aom_dc_128_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_c +void aom_dc_128_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_sse2 void aom_dc_128_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_c +void aom_dc_128_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_sse2 void aom_dc_128_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_8x4 aom_dc_128_predictor_8x4_c +void aom_dc_128_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_8x4 aom_dc_128_predictor_8x4_sse2 void aom_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -349,33 +382,41 @@ void aom_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const ui #define aom_dc_left_predictor_16x16 aom_dc_left_predictor_16x16_sse2 void aom_dc_left_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_c +void aom_dc_left_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_sse2 void aom_dc_left_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_c +void aom_dc_left_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_sse2 void aom_dc_left_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c void aom_dc_left_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_32x16 aom_dc_left_predictor_32x16_c +void aom_dc_left_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_left_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_32x32 aom_dc_left_predictor_32x32_sse2 +void aom_dc_left_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_left_predictor_4x4 aom_dc_left_predictor_4x4_sse2 void aom_dc_left_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_c +void aom_dc_left_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_sse2 void aom_dc_left_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_c +void aom_dc_left_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_sse2 void aom_dc_left_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_8x4 aom_dc_left_predictor_8x4_c +void aom_dc_left_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_8x4 aom_dc_left_predictor_8x4_sse2 void aom_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -386,33 +427,41 @@ void aom_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t #define aom_dc_predictor_16x16 aom_dc_predictor_16x16_sse2 void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_16x32 aom_dc_predictor_16x32_c +void aom_dc_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_16x32 aom_dc_predictor_16x32_sse2 void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_16x8 aom_dc_predictor_16x8_c +void aom_dc_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_16x8 aom_dc_predictor_16x8_sse2 void aom_dc_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_32x16 aom_dc_predictor_32x16_c +void aom_dc_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_32x32 aom_dc_predictor_32x32_sse2 +void aom_dc_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_predictor_4x4 aom_dc_predictor_4x4_sse2 void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_4x8 aom_dc_predictor_4x8_c +void aom_dc_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_4x8 aom_dc_predictor_4x8_sse2 void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_8x16 aom_dc_predictor_8x16_c +void aom_dc_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_8x16 aom_dc_predictor_8x16_sse2 void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_8x4 aom_dc_predictor_8x4_c +void aom_dc_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_8x4 aom_dc_predictor_8x4_sse2 void aom_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -423,33 +472,41 @@ void aom_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uin #define aom_dc_top_predictor_16x16 aom_dc_top_predictor_16x16_sse2 void aom_dc_top_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_c +void aom_dc_top_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_sse2 void aom_dc_top_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_c +void aom_dc_top_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_sse2 void aom_dc_top_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c void aom_dc_top_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_32x16 aom_dc_top_predictor_32x16_c +void aom_dc_top_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_top_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_32x32 aom_dc_top_predictor_32x32_sse2 +void aom_dc_top_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_top_predictor_4x4 aom_dc_top_predictor_4x4_sse2 void aom_dc_top_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_c +void aom_dc_top_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_sse2 void aom_dc_top_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_c +void aom_dc_top_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_sse2 void aom_dc_top_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_8x4 aom_dc_top_predictor_8x4_c +void aom_dc_top_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_8x4 aom_dc_top_predictor_8x4_sse2 void aom_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -503,33 +560,40 @@ void aom_h_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t #define aom_h_predictor_16x16 aom_h_predictor_16x16_sse2 void aom_h_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_16x32 aom_h_predictor_16x32_c +void aom_h_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_16x32 aom_h_predictor_16x32_sse2 void aom_h_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_16x8 aom_h_predictor_16x8_c +void aom_h_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_16x8 aom_h_predictor_16x8_sse2 void aom_h_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_h_predictor_2x2 aom_h_predictor_2x2_c void aom_h_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_32x16 aom_h_predictor_32x16_c +void aom_h_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_32x16 aom_h_predictor_32x16_sse2 void aom_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_32x32 aom_h_predictor_32x32_sse2 +void aom_h_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_h_predictor_4x4 aom_h_predictor_4x4_sse2 void aom_h_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_4x8 aom_h_predictor_4x8_c +void aom_h_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_4x8 aom_h_predictor_4x8_sse2 void aom_h_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_8x16 aom_h_predictor_8x16_c +void aom_h_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_8x16 aom_h_predictor_8x16_sse2 void aom_h_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_8x4 aom_h_predictor_8x4_c +void aom_h_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_8x4 aom_h_predictor_8x4_sse2 void aom_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -1455,6 +1519,26 @@ void aom_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t void aom_highbd_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); RTCD_EXTERN void (*aom_highbd_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src aom_highbd_convolve8_add_src_sse2 + +void aom_highbd_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_hip_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +RTCD_EXTERN void (*aom_highbd_convolve8_add_src_hip)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); + +void aom_highbd_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz aom_highbd_convolve8_add_src_horiz_c + +void aom_highbd_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz_hip aom_highbd_convolve8_add_src_horiz_hip_c + +void aom_highbd_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert aom_highbd_convolve8_add_src_vert_c + +void aom_highbd_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert_hip aom_highbd_convolve8_add_src_vert_hip_c + void aom_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_convolve8_avg_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); @@ -1491,7 +1575,8 @@ void aom_highbd_convolve_copy_avx2(const uint8_t *src, ptrdiff_t src_stride, uin RTCD_EXTERN void (*aom_highbd_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_d117_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_16x16 aom_highbd_d117_predictor_16x16_c +void aom_highbd_d117_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d117_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d117_predictor_16x32 aom_highbd_d117_predictor_16x32_c @@ -1506,10 +1591,12 @@ void aom_highbd_d117_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d117_predictor_32x16 aom_highbd_d117_predictor_32x16_c void aom_highbd_d117_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_32x32 aom_highbd_d117_predictor_32x32_c +void aom_highbd_d117_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d117_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_4x4 aom_highbd_d117_predictor_4x4_c +void aom_highbd_d117_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d117_predictor_4x4 aom_highbd_d117_predictor_4x4_sse2 void aom_highbd_d117_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d117_predictor_4x8 aom_highbd_d117_predictor_4x8_c @@ -1521,10 +1608,12 @@ void aom_highbd_d117_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d117_predictor_8x4 aom_highbd_d117_predictor_8x4_c void aom_highbd_d117_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_8x8 aom_highbd_d117_predictor_8x8_c +void aom_highbd_d117_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_16x16 aom_highbd_d135_predictor_16x16_c +void aom_highbd_d135_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d135_predictor_16x32 aom_highbd_d135_predictor_16x32_c @@ -1539,10 +1628,12 @@ void aom_highbd_d135_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d135_predictor_32x16 aom_highbd_d135_predictor_32x16_c void aom_highbd_d135_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_32x32 aom_highbd_d135_predictor_32x32_c +void aom_highbd_d135_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_4x4 aom_highbd_d135_predictor_4x4_c +void aom_highbd_d135_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d135_predictor_4x4 aom_highbd_d135_predictor_4x4_sse2 void aom_highbd_d135_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d135_predictor_4x8 aom_highbd_d135_predictor_4x8_c @@ -1554,10 +1645,12 @@ void aom_highbd_d135_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d135_predictor_8x4 aom_highbd_d135_predictor_8x4_c void aom_highbd_d135_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_8x8 aom_highbd_d135_predictor_8x8_c +void aom_highbd_d135_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_16x16 aom_highbd_d153_predictor_16x16_c +void aom_highbd_d153_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d153_predictor_16x32 aom_highbd_d153_predictor_16x32_c @@ -1572,10 +1665,12 @@ void aom_highbd_d153_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d153_predictor_32x16 aom_highbd_d153_predictor_32x16_c void aom_highbd_d153_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_32x32 aom_highbd_d153_predictor_32x32_c +void aom_highbd_d153_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_4x4 aom_highbd_d153_predictor_4x4_c +void aom_highbd_d153_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d153_predictor_4x4 aom_highbd_d153_predictor_4x4_sse2 void aom_highbd_d153_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d153_predictor_4x8 aom_highbd_d153_predictor_4x8_c @@ -1587,7 +1682,8 @@ void aom_highbd_d153_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d153_predictor_8x4 aom_highbd_d153_predictor_8x4_c void aom_highbd_d153_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_8x8 aom_highbd_d153_predictor_8x8_c +void aom_highbd_d153_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d207e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d207e_predictor_16x16 aom_highbd_d207e_predictor_16x16_c @@ -1623,37 +1719,47 @@ void aom_highbd_d207e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const u #define aom_highbd_d207e_predictor_8x8 aom_highbd_d207e_predictor_8x8_c void aom_highbd_d45e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x16 aom_highbd_d45e_predictor_16x16_c +void aom_highbd_d45e_predictor_16x16_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x32 aom_highbd_d45e_predictor_16x32_c +void aom_highbd_d45e_predictor_16x32_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x8 aom_highbd_d45e_predictor_16x8_c +void aom_highbd_d45e_predictor_16x8_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d45e_predictor_2x2 aom_highbd_d45e_predictor_2x2_c void aom_highbd_d45e_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_32x16 aom_highbd_d45e_predictor_32x16_c +void aom_highbd_d45e_predictor_32x16_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_32x32 aom_highbd_d45e_predictor_32x32_c +void aom_highbd_d45e_predictor_32x32_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_4x4 aom_highbd_d45e_predictor_4x4_c +void aom_highbd_d45e_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_4x4 aom_highbd_d45e_predictor_4x4_sse2 void aom_highbd_d45e_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_4x8 aom_highbd_d45e_predictor_4x8_c +void aom_highbd_d45e_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_4x8 aom_highbd_d45e_predictor_4x8_sse2 void aom_highbd_d45e_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x16 aom_highbd_d45e_predictor_8x16_c +void aom_highbd_d45e_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_8x16 aom_highbd_d45e_predictor_8x16_sse2 void aom_highbd_d45e_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x4 aom_highbd_d45e_predictor_8x4_c +void aom_highbd_d45e_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_8x4 aom_highbd_d45e_predictor_8x4_sse2 void aom_highbd_d45e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x8 aom_highbd_d45e_predictor_8x8_c +void aom_highbd_d45e_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_8x8 aom_highbd_d45e_predictor_8x8_sse2 void aom_highbd_d63e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d63e_predictor_16x16 aom_highbd_d63e_predictor_16x16_c @@ -1689,86 +1795,109 @@ void aom_highbd_d63e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d63e_predictor_8x8 aom_highbd_d63e_predictor_8x8_c void aom_highbd_dc_128_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x16 aom_highbd_dc_128_predictor_16x16_c +void aom_highbd_dc_128_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_16x16 aom_highbd_dc_128_predictor_16x16_sse2 void aom_highbd_dc_128_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x32 aom_highbd_dc_128_predictor_16x32_c +void aom_highbd_dc_128_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_16x32 aom_highbd_dc_128_predictor_16x32_sse2 void aom_highbd_dc_128_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x8 aom_highbd_dc_128_predictor_16x8_c +void aom_highbd_dc_128_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_16x8 aom_highbd_dc_128_predictor_16x8_sse2 void aom_highbd_dc_128_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_128_predictor_2x2 aom_highbd_dc_128_predictor_2x2_c void aom_highbd_dc_128_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_32x16 aom_highbd_dc_128_predictor_32x16_c +void aom_highbd_dc_128_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_32x16 aom_highbd_dc_128_predictor_32x16_sse2 void aom_highbd_dc_128_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_32x32 aom_highbd_dc_128_predictor_32x32_c +void aom_highbd_dc_128_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_32x32 aom_highbd_dc_128_predictor_32x32_sse2 void aom_highbd_dc_128_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_4x4 aom_highbd_dc_128_predictor_4x4_c +void aom_highbd_dc_128_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_4x4 aom_highbd_dc_128_predictor_4x4_sse2 void aom_highbd_dc_128_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_4x8 aom_highbd_dc_128_predictor_4x8_c +void aom_highbd_dc_128_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_4x8 aom_highbd_dc_128_predictor_4x8_sse2 void aom_highbd_dc_128_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x16 aom_highbd_dc_128_predictor_8x16_c +void aom_highbd_dc_128_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_8x16 aom_highbd_dc_128_predictor_8x16_sse2 void aom_highbd_dc_128_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x4 aom_highbd_dc_128_predictor_8x4_c +void aom_highbd_dc_128_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_8x4 aom_highbd_dc_128_predictor_8x4_sse2 void aom_highbd_dc_128_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x8 aom_highbd_dc_128_predictor_8x8_c +void aom_highbd_dc_128_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_8x8 aom_highbd_dc_128_predictor_8x8_sse2 void aom_highbd_dc_left_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x16 aom_highbd_dc_left_predictor_16x16_c +void aom_highbd_dc_left_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_16x16 aom_highbd_dc_left_predictor_16x16_sse2 void aom_highbd_dc_left_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x32 aom_highbd_dc_left_predictor_16x32_c +void aom_highbd_dc_left_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_16x32 aom_highbd_dc_left_predictor_16x32_sse2 void aom_highbd_dc_left_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x8 aom_highbd_dc_left_predictor_16x8_c +void aom_highbd_dc_left_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_16x8 aom_highbd_dc_left_predictor_16x8_sse2 void aom_highbd_dc_left_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_left_predictor_2x2 aom_highbd_dc_left_predictor_2x2_c void aom_highbd_dc_left_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_32x16 aom_highbd_dc_left_predictor_32x16_c +void aom_highbd_dc_left_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_32x16 aom_highbd_dc_left_predictor_32x16_sse2 void aom_highbd_dc_left_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_32x32 aom_highbd_dc_left_predictor_32x32_c +void aom_highbd_dc_left_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_32x32 aom_highbd_dc_left_predictor_32x32_sse2 void aom_highbd_dc_left_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_4x4 aom_highbd_dc_left_predictor_4x4_c +void aom_highbd_dc_left_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_4x4 aom_highbd_dc_left_predictor_4x4_sse2 void aom_highbd_dc_left_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_4x8 aom_highbd_dc_left_predictor_4x8_c +void aom_highbd_dc_left_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_4x8 aom_highbd_dc_left_predictor_4x8_sse2 void aom_highbd_dc_left_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x16 aom_highbd_dc_left_predictor_8x16_c +void aom_highbd_dc_left_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_8x16 aom_highbd_dc_left_predictor_8x16_sse2 void aom_highbd_dc_left_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x4 aom_highbd_dc_left_predictor_8x4_c +void aom_highbd_dc_left_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_8x4 aom_highbd_dc_left_predictor_8x4_sse2 void aom_highbd_dc_left_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x8 aom_highbd_dc_left_predictor_8x8_c +void aom_highbd_dc_left_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_8x8 aom_highbd_dc_left_predictor_8x8_sse2 void aom_highbd_dc_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_predictor_16x16 aom_highbd_dc_predictor_16x16_sse2 void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_16x32 aom_highbd_dc_predictor_16x32_c +void aom_highbd_dc_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_16x32 aom_highbd_dc_predictor_16x32_sse2 void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_16x8 aom_highbd_dc_predictor_16x8_c +void aom_highbd_dc_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_16x8 aom_highbd_dc_predictor_16x8_sse2 void aom_highbd_dc_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_predictor_2x2 aom_highbd_dc_predictor_2x2_c void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_32x16 aom_highbd_dc_predictor_32x16_c +void aom_highbd_dc_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_32x16 aom_highbd_dc_predictor_32x16_sse2 void aom_highbd_dc_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -1779,50 +1908,63 @@ void aom_highbd_dc_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const u #define aom_highbd_dc_predictor_4x4 aom_highbd_dc_predictor_4x4_sse2 void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_4x8 aom_highbd_dc_predictor_4x8_c +void aom_highbd_dc_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_4x8 aom_highbd_dc_predictor_4x8_sse2 void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_8x16 aom_highbd_dc_predictor_8x16_c +void aom_highbd_dc_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_8x16 aom_highbd_dc_predictor_8x16_sse2 void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_8x4 aom_highbd_dc_predictor_8x4_c +void aom_highbd_dc_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_8x4 aom_highbd_dc_predictor_8x4_sse2 void aom_highbd_dc_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_predictor_8x8 aom_highbd_dc_predictor_8x8_sse2 void aom_highbd_dc_top_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x16 aom_highbd_dc_top_predictor_16x16_c +void aom_highbd_dc_top_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_16x16 aom_highbd_dc_top_predictor_16x16_sse2 void aom_highbd_dc_top_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x32 aom_highbd_dc_top_predictor_16x32_c +void aom_highbd_dc_top_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_16x32 aom_highbd_dc_top_predictor_16x32_sse2 void aom_highbd_dc_top_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x8 aom_highbd_dc_top_predictor_16x8_c +void aom_highbd_dc_top_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_16x8 aom_highbd_dc_top_predictor_16x8_sse2 void aom_highbd_dc_top_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_top_predictor_2x2 aom_highbd_dc_top_predictor_2x2_c void aom_highbd_dc_top_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_32x16 aom_highbd_dc_top_predictor_32x16_c +void aom_highbd_dc_top_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_32x16 aom_highbd_dc_top_predictor_32x16_sse2 void aom_highbd_dc_top_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_32x32 aom_highbd_dc_top_predictor_32x32_c +void aom_highbd_dc_top_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_32x32 aom_highbd_dc_top_predictor_32x32_sse2 void aom_highbd_dc_top_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_4x4 aom_highbd_dc_top_predictor_4x4_c +void aom_highbd_dc_top_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_4x4 aom_highbd_dc_top_predictor_4x4_sse2 void aom_highbd_dc_top_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_4x8 aom_highbd_dc_top_predictor_4x8_c +void aom_highbd_dc_top_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_4x8 aom_highbd_dc_top_predictor_4x8_sse2 void aom_highbd_dc_top_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x16 aom_highbd_dc_top_predictor_8x16_c +void aom_highbd_dc_top_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_8x16 aom_highbd_dc_top_predictor_8x16_sse2 void aom_highbd_dc_top_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x4 aom_highbd_dc_top_predictor_8x4_c +void aom_highbd_dc_top_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_8x4 aom_highbd_dc_top_predictor_8x4_sse2 void aom_highbd_dc_top_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x8 aom_highbd_dc_top_predictor_8x8_c +void aom_highbd_dc_top_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_8x8 aom_highbd_dc_top_predictor_8x8_sse2 void aom_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride); void aom_highbd_fdct16x16_sse2(const int16_t *input, tran_low_t *output, int stride); @@ -1845,37 +1987,47 @@ void aom_highbd_fdct8x8_sse2(const int16_t *input, tran_low_t *output, int strid #define aom_highbd_fdct8x8 aom_highbd_fdct8x8_sse2 void aom_highbd_h_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x16 aom_highbd_h_predictor_16x16_c +void aom_highbd_h_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_16x16 aom_highbd_h_predictor_16x16_sse2 void aom_highbd_h_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x32 aom_highbd_h_predictor_16x32_c +void aom_highbd_h_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_16x32 aom_highbd_h_predictor_16x32_sse2 void aom_highbd_h_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x8 aom_highbd_h_predictor_16x8_c +void aom_highbd_h_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_16x8 aom_highbd_h_predictor_16x8_sse2 void aom_highbd_h_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_h_predictor_2x2 aom_highbd_h_predictor_2x2_c void aom_highbd_h_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_32x16 aom_highbd_h_predictor_32x16_c +void aom_highbd_h_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_32x16 aom_highbd_h_predictor_32x16_sse2 void aom_highbd_h_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_32x32 aom_highbd_h_predictor_32x32_c +void aom_highbd_h_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_32x32 aom_highbd_h_predictor_32x32_sse2 void aom_highbd_h_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_4x4 aom_highbd_h_predictor_4x4_c +void aom_highbd_h_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_4x4 aom_highbd_h_predictor_4x4_sse2 void aom_highbd_h_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_4x8 aom_highbd_h_predictor_4x8_c +void aom_highbd_h_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_4x8 aom_highbd_h_predictor_4x8_sse2 void aom_highbd_h_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x16 aom_highbd_h_predictor_8x16_c +void aom_highbd_h_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_8x16 aom_highbd_h_predictor_8x16_sse2 void aom_highbd_h_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x4 aom_highbd_h_predictor_8x4_c +void aom_highbd_h_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_8x4 aom_highbd_h_predictor_8x4_sse2 void aom_highbd_h_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x8 aom_highbd_h_predictor_8x8_c +void aom_highbd_h_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_8x8 aom_highbd_h_predictor_8x8_sse2 void aom_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); #define aom_highbd_iwht4x4_16_add aom_highbd_iwht4x4_16_add_c @@ -1889,7 +2041,8 @@ void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int pitch, const uint8_t *bli void aom_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); -#define aom_highbd_lpf_horizontal_4_dual aom_highbd_lpf_horizontal_4_dual_sse2 +void aom_highbd_lpf_horizontal_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_horizontal_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1897,11 +2050,13 @@ void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int pitch, const uint8_t *bli void aom_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); -#define aom_highbd_lpf_horizontal_8_dual aom_highbd_lpf_horizontal_8_dual_sse2 +void aom_highbd_lpf_horizontal_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_horizontal_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); -#define aom_highbd_lpf_horizontal_edge_16 aom_highbd_lpf_horizontal_edge_16_sse2 +void aom_highbd_lpf_horizontal_edge_16_avx2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_horizontal_edge_16)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1913,7 +2068,8 @@ void aom_highbd_lpf_vertical_16_sse2(uint16_t *s, int pitch, const uint8_t *blim void aom_highbd_lpf_vertical_16_dual_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); -#define aom_highbd_lpf_vertical_16_dual aom_highbd_lpf_vertical_16_dual_sse2 +void aom_highbd_lpf_vertical_16_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_vertical_16_dual)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_4_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1921,7 +2077,8 @@ void aom_highbd_lpf_vertical_4_sse2(uint16_t *s, int pitch, const uint8_t *blimi void aom_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); -#define aom_highbd_lpf_vertical_4_dual aom_highbd_lpf_vertical_4_dual_sse2 +void aom_highbd_lpf_vertical_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_vertical_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1929,7 +2086,8 @@ void aom_highbd_lpf_vertical_8_sse2(uint16_t *s, int pitch, const uint8_t *blimi void aom_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); -#define aom_highbd_lpf_vertical_8_dual aom_highbd_lpf_vertical_8_dual_sse2 +void aom_highbd_lpf_vertical_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_vertical_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); unsigned int aom_highbd_masked_sad16x16_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); unsigned int aom_highbd_masked_sad16x16_ssse3(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); @@ -2510,16 +2668,19 @@ void aom_highbd_v_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_v_predictor_16x16 aom_highbd_v_predictor_16x16_sse2 void aom_highbd_v_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_16x32 aom_highbd_v_predictor_16x32_c +void aom_highbd_v_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_16x32 aom_highbd_v_predictor_16x32_sse2 void aom_highbd_v_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_16x8 aom_highbd_v_predictor_16x8_c +void aom_highbd_v_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_16x8 aom_highbd_v_predictor_16x8_sse2 void aom_highbd_v_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_v_predictor_2x2 aom_highbd_v_predictor_2x2_c void aom_highbd_v_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_32x16 aom_highbd_v_predictor_32x16_c +void aom_highbd_v_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_32x16 aom_highbd_v_predictor_32x16_sse2 void aom_highbd_v_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -2530,13 +2691,16 @@ void aom_highbd_v_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_v_predictor_4x4 aom_highbd_v_predictor_4x4_sse2 void aom_highbd_v_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_4x8 aom_highbd_v_predictor_4x8_c +void aom_highbd_v_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_4x8 aom_highbd_v_predictor_4x8_sse2 void aom_highbd_v_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_8x16 aom_highbd_v_predictor_8x16_c +void aom_highbd_v_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_8x16 aom_highbd_v_predictor_8x16_sse2 void aom_highbd_v_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_8x4 aom_highbd_v_predictor_8x4_c +void aom_highbd_v_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_8x4 aom_highbd_v_predictor_8x4_sse2 void aom_highbd_v_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -2626,50 +2790,43 @@ void aom_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, con #define aom_lpf_horizontal_4 aom_lpf_horizontal_4_sse2 void aom_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define aom_lpf_horizontal_4_dual aom_lpf_horizontal_4_dual_sse2 +#define aom_lpf_horizontal_4_dual aom_lpf_horizontal_4_dual_c void aom_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); #define aom_lpf_horizontal_8 aom_lpf_horizontal_8_sse2 void aom_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_horizontal_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define aom_lpf_horizontal_8_dual aom_lpf_horizontal_8_dual_sse2 +#define aom_lpf_horizontal_8_dual aom_lpf_horizontal_8_dual_c void aom_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_edge_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_edge_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_horizontal_edge_16 aom_lpf_horizontal_edge_16_sse2 void aom_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_edge_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_edge_8_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_horizontal_edge_8 aom_lpf_horizontal_edge_8_sse2 void aom_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); #define aom_lpf_vertical_16 aom_lpf_vertical_16_sse2 void aom_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_vertical_16_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define aom_lpf_vertical_16_dual aom_lpf_vertical_16_dual_sse2 +#define aom_lpf_vertical_16_dual aom_lpf_vertical_16_dual_c void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); #define aom_lpf_vertical_4 aom_lpf_vertical_4_sse2 void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define aom_lpf_vertical_4_dual aom_lpf_vertical_4_dual_sse2 +#define aom_lpf_vertical_4_dual aom_lpf_vertical_4_dual_c void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); #define aom_lpf_vertical_8 aom_lpf_vertical_8_sse2 void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define aom_lpf_vertical_8_dual aom_lpf_vertical_8_dual_sse2 +#define aom_lpf_vertical_8_dual aom_lpf_vertical_8_dual_c unsigned int aom_masked_sad16x16_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); unsigned int aom_masked_sad16x16_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); @@ -2940,37 +3097,52 @@ unsigned int aom_obmc_variance8x8_sse4_1(const uint8_t *pre, int pre_stride, con RTCD_EXTERN unsigned int (*aom_obmc_variance8x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); void aom_paeth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_c +void aom_paeth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_c +void aom_paeth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_c +void aom_paeth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x8_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c void aom_paeth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_c +void aom_paeth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_c +void aom_paeth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_c +void aom_paeth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_c +void aom_paeth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_c +void aom_paeth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_c +void aom_paeth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_c +void aom_paeth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void aom_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -3273,37 +3445,47 @@ void aom_smooth_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_ #define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_c void aom_smooth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x16 aom_smooth_predictor_16x16_c +void aom_smooth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x32 aom_smooth_predictor_16x32_c +void aom_smooth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x8 aom_smooth_predictor_16x8_c +void aom_smooth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c void aom_smooth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_32x16 aom_smooth_predictor_32x16_c +void aom_smooth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_32x32 aom_smooth_predictor_32x32_c +void aom_smooth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_4x4 aom_smooth_predictor_4x4_c +void aom_smooth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_4x8 aom_smooth_predictor_4x8_c +void aom_smooth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x16 aom_smooth_predictor_8x16_c +void aom_smooth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x4 aom_smooth_predictor_8x4_c +void aom_smooth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x8 aom_smooth_predictor_8x8_c +void aom_smooth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_c @@ -3493,33 +3675,41 @@ void aom_v_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t #define aom_v_predictor_16x16 aom_v_predictor_16x16_sse2 void aom_v_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_16x32 aom_v_predictor_16x32_c +void aom_v_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_16x32 aom_v_predictor_16x32_sse2 void aom_v_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_16x8 aom_v_predictor_16x8_c +void aom_v_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_16x8 aom_v_predictor_16x8_sse2 void aom_v_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_v_predictor_2x2 aom_v_predictor_2x2_c void aom_v_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_32x16 aom_v_predictor_32x16_c +void aom_v_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_v_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_32x32 aom_v_predictor_32x32_sse2 +void aom_v_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_v_predictor_4x4 aom_v_predictor_4x4_sse2 void aom_v_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_4x8 aom_v_predictor_4x8_c +void aom_v_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_4x8 aom_v_predictor_4x8_sse2 void aom_v_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_8x16 aom_v_predictor_8x16_c +void aom_v_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_8x16 aom_v_predictor_8x16_sse2 void aom_v_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_8x4 aom_v_predictor_8x4_c +void aom_v_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_8x4 aom_v_predictor_8x4_sse2 void aom_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -3626,6 +3816,12 @@ static void setup_rtcd_internal(void) aom_convolve8 = aom_convolve8_sse2; if (flags & HAS_SSSE3) aom_convolve8 = aom_convolve8_ssse3; if (flags & HAS_AVX2) aom_convolve8 = aom_convolve8_avx2; + aom_convolve8_add_src = aom_convolve8_add_src_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src = aom_convolve8_add_src_ssse3; + aom_convolve8_add_src_horiz = aom_convolve8_add_src_horiz_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src_horiz = aom_convolve8_add_src_horiz_ssse3; + aom_convolve8_add_src_vert = aom_convolve8_add_src_vert_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src_vert = aom_convolve8_add_src_vert_ssse3; aom_convolve8_avg = aom_convolve8_avg_sse2; if (flags & HAS_SSSE3) aom_convolve8_avg = aom_convolve8_avg_ssse3; aom_convolve8_avg_horiz = aom_convolve8_avg_horiz_sse2; @@ -3648,6 +3844,22 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_d153_predictor_8x8 = aom_d153_predictor_8x8_ssse3; aom_d63e_predictor_4x4 = aom_d63e_predictor_4x4_c; if (flags & HAS_SSSE3) aom_d63e_predictor_4x4 = aom_d63e_predictor_4x4_ssse3; + aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_avx2; + aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_avx2; + aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_avx2; + aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_avx2; + aom_dc_predictor_32x16 = aom_dc_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_predictor_32x16 = aom_dc_predictor_32x16_avx2; + aom_dc_predictor_32x32 = aom_dc_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_predictor_32x32 = aom_dc_predictor_32x32_avx2; + aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_avx2; + aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_avx2; aom_fdct32x32 = aom_fdct32x32_sse2; if (flags & HAS_AVX2) aom_fdct32x32 = aom_fdct32x32_avx2; aom_fdct32x32_rd = aom_fdct32x32_rd_sse2; @@ -3656,6 +3868,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_fdct8x8 = aom_fdct8x8_ssse3; aom_get16x16var = aom_get16x16var_sse2; if (flags & HAS_AVX2) aom_get16x16var = aom_get16x16var_avx2; + aom_h_predictor_32x32 = aom_h_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_h_predictor_32x32 = aom_h_predictor_32x32_avx2; aom_hadamard_8x8 = aom_hadamard_8x8_sse2; if (flags & HAS_SSSE3) aom_hadamard_8x8 = aom_hadamard_8x8_ssse3; aom_highbd_10_masked_sub_pixel_variance16x16 = aom_highbd_10_masked_sub_pixel_variance16x16_c; @@ -3814,6 +4028,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) aom_highbd_blend_a64_vmask = aom_highbd_blend_a64_vmask_sse4_1; aom_highbd_convolve8 = aom_highbd_convolve8_sse2; if (flags & HAS_AVX2) aom_highbd_convolve8 = aom_highbd_convolve8_avx2; + aom_highbd_convolve8_add_src_hip = aom_highbd_convolve8_add_src_hip_c; + if (flags & HAS_SSSE3) aom_highbd_convolve8_add_src_hip = aom_highbd_convolve8_add_src_hip_ssse3; aom_highbd_convolve8_avg = aom_highbd_convolve8_avg_sse2; if (flags & HAS_AVX2) aom_highbd_convolve8_avg = aom_highbd_convolve8_avg_avx2; aom_highbd_convolve8_avg_horiz = aom_highbd_convolve8_avg_horiz_sse2; @@ -3828,6 +4044,46 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) aom_highbd_convolve_avg = aom_highbd_convolve_avg_avx2; aom_highbd_convolve_copy = aom_highbd_convolve_copy_sse2; if (flags & HAS_AVX2) aom_highbd_convolve_copy = aom_highbd_convolve_copy_avx2; + aom_highbd_d117_predictor_16x16 = aom_highbd_d117_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_16x16 = aom_highbd_d117_predictor_16x16_ssse3; + aom_highbd_d117_predictor_32x32 = aom_highbd_d117_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_32x32 = aom_highbd_d117_predictor_32x32_ssse3; + aom_highbd_d117_predictor_8x8 = aom_highbd_d117_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_8x8 = aom_highbd_d117_predictor_8x8_ssse3; + aom_highbd_d135_predictor_16x16 = aom_highbd_d135_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_16x16 = aom_highbd_d135_predictor_16x16_ssse3; + aom_highbd_d135_predictor_32x32 = aom_highbd_d135_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_32x32 = aom_highbd_d135_predictor_32x32_ssse3; + aom_highbd_d135_predictor_8x8 = aom_highbd_d135_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_8x8 = aom_highbd_d135_predictor_8x8_ssse3; + aom_highbd_d153_predictor_16x16 = aom_highbd_d153_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_16x16 = aom_highbd_d153_predictor_16x16_ssse3; + aom_highbd_d153_predictor_32x32 = aom_highbd_d153_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_32x32 = aom_highbd_d153_predictor_32x32_ssse3; + aom_highbd_d153_predictor_8x8 = aom_highbd_d153_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_8x8 = aom_highbd_d153_predictor_8x8_ssse3; + aom_highbd_d45e_predictor_16x16 = aom_highbd_d45e_predictor_16x16_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x16 = aom_highbd_d45e_predictor_16x16_avx2; + aom_highbd_d45e_predictor_16x32 = aom_highbd_d45e_predictor_16x32_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x32 = aom_highbd_d45e_predictor_16x32_avx2; + aom_highbd_d45e_predictor_16x8 = aom_highbd_d45e_predictor_16x8_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x8 = aom_highbd_d45e_predictor_16x8_avx2; + aom_highbd_d45e_predictor_32x16 = aom_highbd_d45e_predictor_32x16_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_32x16 = aom_highbd_d45e_predictor_32x16_avx2; + aom_highbd_d45e_predictor_32x32 = aom_highbd_d45e_predictor_32x32_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_32x32 = aom_highbd_d45e_predictor_32x32_avx2; + aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_avx2; + aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_avx2; + aom_highbd_lpf_horizontal_edge_16 = aom_highbd_lpf_horizontal_edge_16_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_edge_16 = aom_highbd_lpf_horizontal_edge_16_avx2; + aom_highbd_lpf_vertical_16_dual = aom_highbd_lpf_vertical_16_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_16_dual = aom_highbd_lpf_vertical_16_dual_avx2; + aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_avx2; + aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_avx2; aom_highbd_masked_sad16x16 = aom_highbd_masked_sad16x16_c; if (flags & HAS_SSSE3) aom_highbd_masked_sad16x16 = aom_highbd_masked_sad16x16_ssse3; aom_highbd_masked_sad16x32 = aom_highbd_masked_sad16x32_c; @@ -3979,10 +4235,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_idct8x8_12_add = aom_idct8x8_12_add_ssse3; aom_idct8x8_64_add = aom_idct8x8_64_add_sse2; if (flags & HAS_SSSE3) aom_idct8x8_64_add = aom_idct8x8_64_add_ssse3; - aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_sse2; - if (flags & HAS_AVX2) aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_avx2; - aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_sse2; - if (flags & HAS_AVX2) aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_avx2; aom_masked_sad16x16 = aom_masked_sad16x16_c; if (flags & HAS_SSSE3) aom_masked_sad16x16 = aom_masked_sad16x16_ssse3; aom_masked_sad16x32 = aom_masked_sad16x32_c; @@ -4089,6 +4341,31 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) aom_obmc_variance8x4 = aom_obmc_variance8x4_sse4_1; aom_obmc_variance8x8 = aom_obmc_variance8x8_c; if (flags & HAS_SSE4_1) aom_obmc_variance8x8 = aom_obmc_variance8x8_sse4_1; + aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_avx2; + aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_avx2; + aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_avx2; + aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_avx2; + aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_avx2; + aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_ssse3; + aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_ssse3; + aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_ssse3; + aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_ssse3; + aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_ssse3; aom_quantize_b = aom_quantize_b_sse2; if (flags & HAS_SSSE3) aom_quantize_b = aom_quantize_b_ssse3; if (flags & HAS_AVX) aom_quantize_b = aom_quantize_b_avx; @@ -4147,6 +4424,26 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) aom_sad8x8x8 = aom_sad8x8x8_sse4_1; aom_scaled_2d = aom_scaled_2d_c; if (flags & HAS_SSSE3) aom_scaled_2d = aom_scaled_2d_ssse3; + aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_ssse3; + aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_ssse3; + aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_ssse3; + aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_ssse3; + aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_ssse3; + aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_ssse3; + aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_ssse3; + aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_ssse3; + aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_ssse3; + aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_ssse3; aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_ssse3; aom_sub_pixel_avg_variance16x32 = aom_sub_pixel_avg_variance16x32_sse2; @@ -4203,6 +4500,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_sub_pixel_variance8x4 = aom_sub_pixel_variance8x4_ssse3; aom_sub_pixel_variance8x8 = aom_sub_pixel_variance8x8_sse2; if (flags & HAS_SSSE3) aom_sub_pixel_variance8x8 = aom_sub_pixel_variance8x8_ssse3; + aom_v_predictor_32x16 = aom_v_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_v_predictor_32x16 = aom_v_predictor_32x16_avx2; + aom_v_predictor_32x32 = aom_v_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_v_predictor_32x32 = aom_v_predictor_32x32_avx2; aom_variance16x16 = aom_variance16x16_sse2; if (flags & HAS_AVX2) aom_variance16x16 = aom_variance16x16_avx2; aom_variance32x16 = aom_variance32x16_sse2; diff --git a/media/libaom/config/mac/x64/av1_rtcd.h b/media/libaom/config/mac/x64/av1_rtcd.h index d4fc99c10..66c0349aa 100644 --- a/media/libaom/config/mac/x64/av1_rtcd.h +++ b/media/libaom/config/mac/x64/av1_rtcd.h @@ -31,44 +31,39 @@ struct search_site_config; struct mv; union int_mv; struct yv12_buffer_config; -typedef uint16_t od_dering_in; #ifdef __cplusplus extern "C" { #endif -void aom_clpf_block_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_sse2(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_ssse3(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_sse4_1(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_block)(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_block_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_sse2(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_ssse3(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_sse4_1(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_block_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_hblock_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_sse2(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_ssse3(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_sse4_1(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_hblock)(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_hblock_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_sse2(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_ssse3(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_sse4_1(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_hblock_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void apply_selfguided_restoration_c(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); +void apply_selfguided_restoration_sse4_1(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); +RTCD_EXTERN void (*apply_selfguided_restoration)(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); + +void apply_selfguided_restoration_highbd_c(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); +void apply_selfguided_restoration_highbd_sse4_1(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); +RTCD_EXTERN void (*apply_selfguided_restoration_highbd)(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); int64_t av1_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); RTCD_EXTERN int64_t (*av1_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params); +void av1_convolve_2d_sse2(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params); +#define av1_convolve_2d av1_convolve_2d_sse2 + +void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); +void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); +RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); + void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); RTCD_EXTERN void (*av1_convolve_horiz)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); +void av1_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); +void av1_convolve_rounding_avx2(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); +RTCD_EXTERN void (*av1_convolve_rounding)(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); + void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); void av1_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); RTCD_EXTERN void (*av1_convolve_vert)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); @@ -76,9 +71,6 @@ RTCD_EXTERN void (*av1_convolve_vert)(const uint8_t *src, int src_stride, uint8_ int av1_diamond_search_sad_c(struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); #define av1_diamond_search_sad av1_diamond_search_sad_c -void av1_fdct8x8_quant_c(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -#define av1_fdct8x8_quant av1_fdct8x8_quant_c - void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); void av1_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); @@ -133,6 +125,14 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, struct t void av1_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht8x8 av1_fht8x8_sse2 +void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength); +void av1_filter_intra_edge_sse4_1(uint8_t *p, int sz, int strength); +RTCD_EXTERN void (*av1_filter_intra_edge)(uint8_t *p, int sz, int strength); + +void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength); +void av1_filter_intra_edge_high_sse4_1(uint16_t *p, int sz, int strength); +RTCD_EXTERN void (*av1_filter_intra_edge_high)(uint16_t *p, int sz, int strength); + int av1_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); #define av1_full_range_search av1_full_range_search_c @@ -141,46 +141,42 @@ int av1_full_search_sadx3(const struct macroblock *x, const struct mv *ref_mv, i int av1_full_search_sadx8(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); RTCD_EXTERN int (*av1_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); -void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type); +void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bsx, int bsy, TX_TYPE tx_type); #define av1_fwd_idtx av1_fwd_idtx_c -void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_16x16)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_16x16)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x32 av1_fwd_txfm2d_16x32_c -void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x8 av1_fwd_txfm2d_16x8_c -void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_32x16 av1_fwd_txfm2d_32x16_c -void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_32x32)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_32x32)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_4x4)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_4x4)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_4x8 av1_fwd_txfm2d_4x8_c -void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_64x64)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); - -void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x16 av1_fwd_txfm2d_8x16_c -void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x4 av1_fwd_txfm2d_8x4_c -void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_8x8)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_8x8)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); #define av1_fwht4x4 av1_fwht4x4_c @@ -213,6 +209,14 @@ void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8 void av1_highbd_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_sse2 +void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); +void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_2d)(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); + +void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); +void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); + void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve_avg av1_highbd_convolve_avg_c @@ -227,6 +231,10 @@ void av1_highbd_convolve_init_c(void); void av1_highbd_convolve_init_sse4_1(void); RTCD_EXTERN void (*av1_highbd_convolve_init)(void); +void av1_highbd_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); +void av1_highbd_convolve_rounding_avx2(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_rounding)(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); + void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); void av1_highbd_convolve_vert_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); RTCD_EXTERN void (*av1_highbd_convolve_vert)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); @@ -273,9 +281,6 @@ void av1_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int dest void av1_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht8x8_64_add av1_highbd_iht8x8_64_add_c -void av1_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); -#define av1_highbd_quantize_b av1_highbd_quantize_b_c - void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); void av1_highbd_quantize_fp_sse4_1(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); void av1_highbd_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); @@ -288,6 +293,14 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); RTCD_EXTERN void (*av1_highbd_warp_affine)(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +void av1_highpass_filter_c(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_highpass_filter_sse4_1(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_highpass_filter)(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + +void av1_highpass_filter_highbd_c(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_highpass_filter_highbd_sse4_1(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_highpass_filter_highbd)(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); void av1_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); void av1_iht16x16_256_add_avx2(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); @@ -340,48 +353,45 @@ void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride void av1_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_iht8x8_64_add av1_iht8x8_64_add_sse2 -void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_16x16)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_16x16)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x32 av1_inv_txfm2d_add_16x32_c -void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x8 av1_inv_txfm2d_add_16x8_c -void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_32x16 av1_inv_txfm2d_add_32x16_c -void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_32x32_avx2(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_32x32)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_32x32_avx2(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_32x32)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_4x8 av1_inv_txfm2d_add_4x8_c -void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -#define av1_inv_txfm2d_add_64x64 av1_inv_txfm2d_add_64x64_c - -void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x16 av1_inv_txfm2d_add_8x16_c -void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x4 av1_inv_txfm2d_add_8x4_c -void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); void av1_lowbd_convolve_init_c(void); void av1_lowbd_convolve_init_ssse3(void); RTCD_EXTERN void (*av1_lowbd_convolve_init)(void); -void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); +void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale); #define av1_quantize_b av1_quantize_b_c void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -393,10 +403,26 @@ void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int void av1_quantize_fp_32x32_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); RTCD_EXTERN void (*av1_quantize_fp_32x32)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void av1_selfguided_restoration_c(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_selfguided_restoration_sse4_1(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_selfguided_restoration)(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + +void av1_selfguided_restoration_highbd_c(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); +void av1_selfguided_restoration_highbd_sse4_1(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); +RTCD_EXTERN void (*av1_selfguided_restoration_highbd)(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); + void av1_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); void av1_temporal_filter_apply_sse2(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); #define av1_temporal_filter_apply av1_temporal_filter_apply_sse2 +void av1_upsample_intra_edge_c(uint8_t *p, int sz); +void av1_upsample_intra_edge_sse4_1(uint8_t *p, int sz); +RTCD_EXTERN void (*av1_upsample_intra_edge)(uint8_t *p, int sz); + +void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd); +void av1_upsample_intra_edge_high_sse4_1(uint16_t *p, int sz, int bd); +RTCD_EXTERN void (*av1_upsample_intra_edge_high)(uint16_t *p, int sz, int bd); + void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); @@ -414,64 +440,38 @@ uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d, con uint64_t av1_wedge_sse_from_residuals_sse2(const int16_t *r1, const int16_t *d, const uint8_t *m, int N); #define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_sse2 +void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_sse2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_ssse3(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_sse4_1(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_avx2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +RTCD_EXTERN void (*cdef_filter_block)(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); + +int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_sse2(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_ssse3(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_sse4_1(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_avx2(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +RTCD_EXTERN int (*cdef_find_dir)(const uint16_t *img, int stride, int32_t *var, int coeff_shift); + double compute_cross_correlation_c(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); double compute_cross_correlation_sse4_1(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); RTCD_EXTERN double (*compute_cross_correlation)(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); -void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_4x4_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_sse2(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_ssse3(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_sse4_1(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_4x4_16bit_to_8bit)(uint8_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_8x8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_sse2(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_ssse3(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_sse4_1(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_8x8_16bit_to_8bit)(uint8_t *dst, int dstride, const uint16_t *src, int sstride); - void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); +void copy_rect8_16bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); RTCD_EXTERN void (*copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); +void copy_rect8_8bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); RTCD_EXTERN void (*copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); -int od_dir_find8_c(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_sse2(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_ssse3(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_sse4_1(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -RTCD_EXTERN int (*od_dir_find8)(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); - -void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_sse2(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_ssse3(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_sse4_1(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -RTCD_EXTERN void (*od_filter_dering_direction_4x4)(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); - -void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_sse2(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_ssse3(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_sse4_1(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -RTCD_EXTERN void (*od_filter_dering_direction_8x8)(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); - void aom_rtcd(void); #ifdef RTCD_C @@ -482,28 +482,28 @@ static void setup_rtcd_internal(void) (void)flags; - aom_clpf_block = aom_clpf_block_sse2; - if (flags & HAS_SSSE3) aom_clpf_block = aom_clpf_block_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_block = aom_clpf_block_sse4_1; - aom_clpf_block_hbd = aom_clpf_block_hbd_sse2; - if (flags & HAS_SSSE3) aom_clpf_block_hbd = aom_clpf_block_hbd_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_block_hbd = aom_clpf_block_hbd_sse4_1; - aom_clpf_hblock = aom_clpf_hblock_sse2; - if (flags & HAS_SSSE3) aom_clpf_hblock = aom_clpf_hblock_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_hblock = aom_clpf_hblock_sse4_1; - aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse2; - if (flags & HAS_SSSE3) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse4_1; + apply_selfguided_restoration = apply_selfguided_restoration_c; + if (flags & HAS_SSE4_1) apply_selfguided_restoration = apply_selfguided_restoration_sse4_1; + apply_selfguided_restoration_highbd = apply_selfguided_restoration_highbd_c; + if (flags & HAS_SSE4_1) apply_selfguided_restoration_highbd = apply_selfguided_restoration_highbd_sse4_1; av1_block_error = av1_block_error_c; if (flags & HAS_AVX2) av1_block_error = av1_block_error_avx2; + av1_convolve_2d_scale = av1_convolve_2d_scale_c; + if (flags & HAS_SSE4_1) av1_convolve_2d_scale = av1_convolve_2d_scale_sse4_1; av1_convolve_horiz = av1_convolve_horiz_c; if (flags & HAS_SSSE3) av1_convolve_horiz = av1_convolve_horiz_ssse3; + av1_convolve_rounding = av1_convolve_rounding_c; + if (flags & HAS_AVX2) av1_convolve_rounding = av1_convolve_rounding_avx2; av1_convolve_vert = av1_convolve_vert_c; if (flags & HAS_SSSE3) av1_convolve_vert = av1_convolve_vert_ssse3; av1_fht16x16 = av1_fht16x16_sse2; if (flags & HAS_AVX2) av1_fht16x16 = av1_fht16x16_avx2; av1_fht32x32 = av1_fht32x32_sse2; if (flags & HAS_AVX2) av1_fht32x32 = av1_fht32x32_avx2; + av1_filter_intra_edge = av1_filter_intra_edge_c; + if (flags & HAS_SSE4_1) av1_filter_intra_edge = av1_filter_intra_edge_sse4_1; + av1_filter_intra_edge_high = av1_filter_intra_edge_high_c; + if (flags & HAS_SSE4_1) av1_filter_intra_edge_high = av1_filter_intra_edge_high_sse4_1; av1_full_search_sad = av1_full_search_sad_c; if (flags & HAS_SSE3) av1_full_search_sad = av1_full_search_sadx3; if (flags & HAS_SSE4_1) av1_full_search_sad = av1_full_search_sadx8; @@ -513,14 +513,18 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) av1_fwd_txfm2d_32x32 = av1_fwd_txfm2d_32x32_sse4_1; av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_c; if (flags & HAS_SSE4_1) av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_sse4_1; - av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_c; - if (flags & HAS_SSE4_1) av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_sse4_1; av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_c; if (flags & HAS_SSE4_1) av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_sse4_1; + av1_highbd_convolve_2d = av1_highbd_convolve_2d_c; + if (flags & HAS_SSSE3) av1_highbd_convolve_2d = av1_highbd_convolve_2d_ssse3; + av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_c; + if (flags & HAS_SSE4_1) av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_sse4_1; av1_highbd_convolve_horiz = av1_highbd_convolve_horiz_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_horiz = av1_highbd_convolve_horiz_sse4_1; av1_highbd_convolve_init = av1_highbd_convolve_init_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_init = av1_highbd_convolve_init_sse4_1; + av1_highbd_convolve_rounding = av1_highbd_convolve_rounding_c; + if (flags & HAS_AVX2) av1_highbd_convolve_rounding = av1_highbd_convolve_rounding_avx2; av1_highbd_convolve_vert = av1_highbd_convolve_vert_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_vert = av1_highbd_convolve_vert_sse4_1; av1_highbd_quantize_fp = av1_highbd_quantize_fp_c; @@ -528,6 +532,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) av1_highbd_quantize_fp = av1_highbd_quantize_fp_avx2; av1_highbd_warp_affine = av1_highbd_warp_affine_c; if (flags & HAS_SSSE3) av1_highbd_warp_affine = av1_highbd_warp_affine_ssse3; + av1_highpass_filter = av1_highpass_filter_c; + if (flags & HAS_SSE4_1) av1_highpass_filter = av1_highpass_filter_sse4_1; + av1_highpass_filter_highbd = av1_highpass_filter_highbd_c; + if (flags & HAS_SSE4_1) av1_highpass_filter_highbd = av1_highpass_filter_highbd_sse4_1; av1_iht16x16_256_add = av1_iht16x16_256_add_sse2; if (flags & HAS_AVX2) av1_iht16x16_256_add = av1_iht16x16_256_add_avx2; av1_inv_txfm2d_add_16x16 = av1_inv_txfm2d_add_16x16_c; @@ -544,37 +552,34 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) av1_quantize_fp = av1_quantize_fp_avx2; av1_quantize_fp_32x32 = av1_quantize_fp_32x32_c; if (flags & HAS_AVX2) av1_quantize_fp_32x32 = av1_quantize_fp_32x32_avx2; + av1_selfguided_restoration = av1_selfguided_restoration_c; + if (flags & HAS_SSE4_1) av1_selfguided_restoration = av1_selfguided_restoration_sse4_1; + av1_selfguided_restoration_highbd = av1_selfguided_restoration_highbd_c; + if (flags & HAS_SSE4_1) av1_selfguided_restoration_highbd = av1_selfguided_restoration_highbd_sse4_1; + av1_upsample_intra_edge = av1_upsample_intra_edge_c; + if (flags & HAS_SSE4_1) av1_upsample_intra_edge = av1_upsample_intra_edge_sse4_1; + av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c; + if (flags & HAS_SSE4_1) av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_sse4_1; av1_warp_affine = av1_warp_affine_sse2; if (flags & HAS_SSSE3) av1_warp_affine = av1_warp_affine_ssse3; + cdef_filter_block = cdef_filter_block_sse2; + if (flags & HAS_SSSE3) cdef_filter_block = cdef_filter_block_ssse3; + if (flags & HAS_SSE4_1) cdef_filter_block = cdef_filter_block_sse4_1; + if (flags & HAS_AVX2) cdef_filter_block = cdef_filter_block_avx2; + cdef_find_dir = cdef_find_dir_sse2; + if (flags & HAS_SSSE3) cdef_find_dir = cdef_find_dir_ssse3; + if (flags & HAS_SSE4_1) cdef_find_dir = cdef_find_dir_sse4_1; + if (flags & HAS_AVX2) cdef_find_dir = cdef_find_dir_avx2; compute_cross_correlation = compute_cross_correlation_c; if (flags & HAS_SSE4_1) compute_cross_correlation = compute_cross_correlation_sse4_1; - copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_sse2; - if (flags & HAS_SSSE3) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_ssse3; - if (flags & HAS_SSE4_1) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_sse4_1; - copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_sse2; - if (flags & HAS_SSSE3) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_ssse3; - if (flags & HAS_SSE4_1) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_sse4_1; - copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_sse2; - if (flags & HAS_SSSE3) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_ssse3; - if (flags & HAS_SSE4_1) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_sse4_1; - copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_sse2; - if (flags & HAS_SSSE3) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_ssse3; - if (flags & HAS_SSE4_1) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_sse4_1; copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse2; if (flags & HAS_SSSE3) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_ssse3; if (flags & HAS_SSE4_1) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse4_1; + if (flags & HAS_AVX2) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_avx2; copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse2; if (flags & HAS_SSSE3) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_ssse3; if (flags & HAS_SSE4_1) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse4_1; - od_dir_find8 = od_dir_find8_sse2; - if (flags & HAS_SSSE3) od_dir_find8 = od_dir_find8_ssse3; - if (flags & HAS_SSE4_1) od_dir_find8 = od_dir_find8_sse4_1; - od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_sse2; - if (flags & HAS_SSSE3) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_ssse3; - if (flags & HAS_SSE4_1) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_sse4_1; - od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_sse2; - if (flags & HAS_SSSE3) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_ssse3; - if (flags & HAS_SSE4_1) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_sse4_1; + if (flags & HAS_AVX2) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_avx2; } #endif diff --git a/media/libaom/config/win/ia32/aom_config.asm b/media/libaom/config/win/ia32/aom_config.asm index 4ee2a6d99..3ce5b4ca1 100644 --- a/media/libaom/config/win/ia32/aom_config.asm +++ b/media/libaom/config/win/ia32/aom_config.asm @@ -46,8 +46,6 @@ CONFIG_AV1 equ 1 CONFIG_STATIC_MSVCRT equ 0 CONFIG_SPATIAL_RESAMPLING equ 1 CONFIG_REALTIME_ONLY equ 0 -CONFIG_ONTHEFLY_BITPACKING equ 0 -CONFIG_ERROR_CONCEALMENT equ 0 CONFIG_SHARED equ 0 CONFIG_STATIC equ 1 CONFIG_SMALL equ 0 @@ -60,73 +58,71 @@ CONFIG_ACCOUNTING equ 0 CONFIG_INSPECTION equ 0 CONFIG_DECODE_PERF_TESTS equ 0 CONFIG_ENCODE_PERF_TESTS equ 0 +CONFIG_BITSTREAM_DEBUG equ 0 +CONFIG_SYMBOLRATE equ 0 CONFIG_COEFFICIENT_RANGE_CHECKING equ 0 CONFIG_LOWBITDEPTH equ 1 CONFIG_HIGHBITDEPTH equ 1 CONFIG_EXPERIMENTAL equ 0 CONFIG_SIZE_LIMIT equ 1 -CONFIG_COLORSPACE_HEADERS equ 0 CONFIG_FP_MB_STATS equ 0 CONFIG_CDEF equ 1 +CONFIG_CDEF_SINGLEPASS equ 1 CONFIG_VAR_TX equ 1 CONFIG_RECT_TX equ 1 CONFIG_RECT_TX_EXT equ 0 CONFIG_TPL_MV equ 0 CONFIG_DUAL_FILTER equ 1 -CONFIG_CONVOLVE_ROUND equ 0 +CONFIG_CONVOLVE_ROUND equ 1 CONFIG_COMPOUND_ROUND equ 0 CONFIG_EXT_TX equ 1 -CONFIG_DPCM_INTRA equ 0 CONFIG_TX64X64 equ 0 CONFIG_EXT_INTRA equ 1 CONFIG_INTRA_INTERP equ 0 CONFIG_FILTER_INTRA equ 0 -CONFIG_INTRA_EDGE equ 0 +CONFIG_INTRA_EDGE equ 1 CONFIG_INTRABC equ 0 -CONFIG_EXT_INTER equ 1 CONFIG_INTERINTRA equ 1 CONFIG_WEDGE equ 1 CONFIG_COMPOUND_SEGMENT equ 1 CONFIG_EXT_REFS equ 1 -CONFIG_ALTREF2 equ 0 -CONFIG_SPEED_REFS equ 0 -CONFIG_GF_GROUPS equ 0 -CONFIG_FLEX_REFS equ 0 CONFIG_GLOBAL_MOTION equ 1 CONFIG_NEW_QUANT equ 0 CONFIG_SUPERTX equ 0 CONFIG_ANS equ 0 -CONFIG_LOOP_RESTORATION equ 0 +CONFIG_LOOP_RESTORATION equ 1 +CONFIG_STRIPED_LOOP_RESTORATION equ 0 CONFIG_EXT_PARTITION equ 0 CONFIG_EXT_PARTITION_TYPES equ 0 +CONFIG_EXT_PARTITION_TYPES_AB equ 0 CONFIG_UNPOISON_PARTITION_CTX equ 0 CONFIG_EXT_TILE equ 0 CONFIG_MOTION_VAR equ 1 CONFIG_NCOBMC equ 0 CONFIG_WARPED_MOTION equ 1 CONFIG_Q_ADAPT_PROBS equ 0 -CONFIG_BITSTREAM_DEBUG equ 0 CONFIG_INTER_STATS_ONLY equ 0 -CONFIG_ALT_INTRA equ 1 -CONFIG_PALETTE equ 1 CONFIG_PALETTE_DELTA_ENCODING equ 0 CONFIG_RAWBITS equ 0 -CONFIG_EC_SMALLMUL equ 1 +CONFIG_KF_CTX equ 0 CONFIG_PVQ equ 0 CONFIG_CFL equ 0 CONFIG_XIPHRC equ 0 CONFIG_DCT_ONLY equ 0 +CONFIG_DAALA_TX equ 0 CONFIG_DAALA_DCT4 equ 0 CONFIG_DAALA_DCT8 equ 0 +CONFIG_DAALA_DCT16 equ 0 +CONFIG_DAALA_DCT32 equ 0 +CONFIG_DAALA_DCT64 equ 0 CONFIG_CB4X4 equ 1 CONFIG_CHROMA_2X2 equ 0 CONFIG_CHROMA_SUB8X8 equ 1 CONFIG_FRAME_SIZE equ 0 -CONFIG_DELTA_Q equ 1 CONFIG_EXT_DELTA_Q equ 1 CONFIG_ADAPT_SCAN equ 0 -CONFIG_FILTER_7BIT equ 1 CONFIG_PARALLEL_DEBLOCKING equ 1 +CONFIG_DEBLOCK_13TAP equ 0 CONFIG_LOOPFILTERING_ACROSS_TILES equ 1 CONFIG_TEMPMV_SIGNALING equ 1 CONFIG_RD_DEBUG equ 0 @@ -135,29 +131,46 @@ CONFIG_COEF_INTERLEAVE equ 0 CONFIG_ENTROPY_STATS equ 0 CONFIG_MASKED_TX equ 0 CONFIG_DEPENDENT_HORZTILES equ 0 -CONFIG_DIST_8X8 equ 0 -CONFIG_DAALA_DIST equ 0 -CONFIG_TRIPRED equ 0 +CONFIG_DIST_8X8 equ 1 CONFIG_PALETTE_THROUGHPUT equ 1 CONFIG_REF_ADAPT equ 0 CONFIG_LV_MAP equ 0 +CONFIG_CTX1D equ 0 CONFIG_TXK_SEL equ 0 CONFIG_MV_COMPRESS equ 1 +CONFIG_SEGMENT_ZEROMV equ 0 CONFIG_FRAME_SUPERRES equ 0 CONFIG_NEW_MULTISYMBOL equ 0 CONFIG_COMPOUND_SINGLEREF equ 0 -CONFIG_AOM_QM equ 0 +CONFIG_AOM_QM equ 1 CONFIG_ONE_SIDED_COMPOUND equ 1 -CONFIG_EXT_COMP_REFS equ 0 +CONFIG_EXT_COMP_REFS equ 1 CONFIG_SMOOTH_HV equ 1 CONFIG_VAR_REFS equ 0 -CONFIG_RECT_INTRA_PRED equ 1 CONFIG_LGT equ 0 +CONFIG_LGT_FROM_PRED equ 0 CONFIG_SBL_SYMBOL equ 0 CONFIG_NCOBMC_ADAPT_WEIGHT equ 0 CONFIG_BGSPRITE equ 0 CONFIG_VAR_TX_NO_TX_MODE equ 0 CONFIG_MRC_TX equ 0 CONFIG_LPF_DIRECT equ 0 -CONFIG_UV_LVL equ 0 +CONFIG_LOOPFILTER_LEVEL equ 0 +CONFIG_NO_FRAME_CONTEXT_SIGNALING equ 0 +CONFIG_TXMG equ 1 +CONFIG_MAX_TILE equ 0 +CONFIG_HASH_ME equ 0 +CONFIG_COLORSPACE_HEADERS equ 0 +CONFIG_MFMV equ 0 +CONFIG_FRAME_MARKER equ 0 +CONFIG_JNT_COMP equ 0 +CONFIG_FRAME_SIGN_BIAS equ 0 +CONFIG_EXT_SKIP equ 0 +CONFIG_OBU equ 0 +CONFIG_AMVR equ 0 +CONFIG_LPF_SB equ 0 +CONFIG_OPT_REF_MV equ 0 +CONFIG_TMV equ 0 +CONFIG_RESTRICT_COMPRESSED_HDR equ 0 +CONFIG_HORZONLY_FRAME_SUPERRES equ 0 CONFIG_ANALYZER equ 0 diff --git a/media/libaom/config/win/ia32/aom_config.h b/media/libaom/config/win/ia32/aom_config.h index 92b2a5a67..d908e0b3f 100644 --- a/media/libaom/config/win/ia32/aom_config.h +++ b/media/libaom/config/win/ia32/aom_config.h @@ -59,8 +59,6 @@ #define CONFIG_STATIC_MSVCRT 0 #define CONFIG_SPATIAL_RESAMPLING 1 #define CONFIG_REALTIME_ONLY 0 -#define CONFIG_ONTHEFLY_BITPACKING 0 -#define CONFIG_ERROR_CONCEALMENT 0 #define CONFIG_SHARED 0 #define CONFIG_STATIC 1 #define CONFIG_SMALL 0 @@ -73,73 +71,71 @@ #define CONFIG_INSPECTION 0 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_ENCODE_PERF_TESTS 0 +#define CONFIG_BITSTREAM_DEBUG 0 +#define CONFIG_SYMBOLRATE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_LOWBITDEPTH 1 #define CONFIG_HIGHBITDEPTH 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_SIZE_LIMIT 1 -#define CONFIG_COLORSPACE_HEADERS 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_CDEF 1 +#define CONFIG_CDEF_SINGLEPASS 1 #define CONFIG_VAR_TX 1 #define CONFIG_RECT_TX 1 #define CONFIG_RECT_TX_EXT 0 #define CONFIG_TPL_MV 0 #define CONFIG_DUAL_FILTER 1 -#define CONFIG_CONVOLVE_ROUND 0 +#define CONFIG_CONVOLVE_ROUND 1 #define CONFIG_COMPOUND_ROUND 0 #define CONFIG_EXT_TX 1 -#define CONFIG_DPCM_INTRA 0 #define CONFIG_TX64X64 0 #define CONFIG_EXT_INTRA 1 #define CONFIG_INTRA_INTERP 0 #define CONFIG_FILTER_INTRA 0 -#define CONFIG_INTRA_EDGE 0 +#define CONFIG_INTRA_EDGE 1 #define CONFIG_INTRABC 0 -#define CONFIG_EXT_INTER 1 #define CONFIG_INTERINTRA 1 #define CONFIG_WEDGE 1 #define CONFIG_COMPOUND_SEGMENT 1 #define CONFIG_EXT_REFS 1 -#define CONFIG_ALTREF2 0 -#define CONFIG_SPEED_REFS 0 -#define CONFIG_GF_GROUPS 0 -#define CONFIG_FLEX_REFS 0 #define CONFIG_GLOBAL_MOTION 1 #define CONFIG_NEW_QUANT 0 #define CONFIG_SUPERTX 0 #define CONFIG_ANS 0 -#define CONFIG_LOOP_RESTORATION 0 +#define CONFIG_LOOP_RESTORATION 1 +#define CONFIG_STRIPED_LOOP_RESTORATION 0 #define CONFIG_EXT_PARTITION 0 #define CONFIG_EXT_PARTITION_TYPES 0 +#define CONFIG_EXT_PARTITION_TYPES_AB 0 #define CONFIG_UNPOISON_PARTITION_CTX 0 #define CONFIG_EXT_TILE 0 #define CONFIG_MOTION_VAR 1 #define CONFIG_NCOBMC 0 #define CONFIG_WARPED_MOTION 1 #define CONFIG_Q_ADAPT_PROBS 0 -#define CONFIG_BITSTREAM_DEBUG 0 #define CONFIG_INTER_STATS_ONLY 0 -#define CONFIG_ALT_INTRA 1 -#define CONFIG_PALETTE 1 #define CONFIG_PALETTE_DELTA_ENCODING 0 #define CONFIG_RAWBITS 0 -#define CONFIG_EC_SMALLMUL 1 +#define CONFIG_KF_CTX 0 #define CONFIG_PVQ 0 #define CONFIG_CFL 0 #define CONFIG_XIPHRC 0 #define CONFIG_DCT_ONLY 0 +#define CONFIG_DAALA_TX 0 #define CONFIG_DAALA_DCT4 0 #define CONFIG_DAALA_DCT8 0 +#define CONFIG_DAALA_DCT16 0 +#define CONFIG_DAALA_DCT32 0 +#define CONFIG_DAALA_DCT64 0 #define CONFIG_CB4X4 1 #define CONFIG_CHROMA_2X2 0 #define CONFIG_CHROMA_SUB8X8 1 #define CONFIG_FRAME_SIZE 0 -#define CONFIG_DELTA_Q 1 #define CONFIG_EXT_DELTA_Q 1 #define CONFIG_ADAPT_SCAN 0 -#define CONFIG_FILTER_7BIT 1 #define CONFIG_PARALLEL_DEBLOCKING 1 +#define CONFIG_DEBLOCK_13TAP 0 #define CONFIG_LOOPFILTERING_ACROSS_TILES 1 #define CONFIG_TEMPMV_SIGNALING 1 #define CONFIG_RD_DEBUG 0 @@ -148,31 +144,48 @@ #define CONFIG_ENTROPY_STATS 0 #define CONFIG_MASKED_TX 0 #define CONFIG_DEPENDENT_HORZTILES 0 -#define CONFIG_DIST_8X8 0 -#define CONFIG_DAALA_DIST 0 -#define CONFIG_TRIPRED 0 +#define CONFIG_DIST_8X8 1 #define CONFIG_PALETTE_THROUGHPUT 1 #define CONFIG_REF_ADAPT 0 #define CONFIG_LV_MAP 0 +#define CONFIG_CTX1D 0 #define CONFIG_TXK_SEL 0 #define CONFIG_MV_COMPRESS 1 +#define CONFIG_SEGMENT_ZEROMV 0 #define CONFIG_FRAME_SUPERRES 0 #define CONFIG_NEW_MULTISYMBOL 0 #define CONFIG_COMPOUND_SINGLEREF 0 -#define CONFIG_AOM_QM 0 +#define CONFIG_AOM_QM 1 #define CONFIG_ONE_SIDED_COMPOUND 1 -#define CONFIG_EXT_COMP_REFS 0 +#define CONFIG_EXT_COMP_REFS 1 #define CONFIG_SMOOTH_HV 1 #define CONFIG_VAR_REFS 0 -#define CONFIG_RECT_INTRA_PRED 1 #define CONFIG_LGT 0 +#define CONFIG_LGT_FROM_PRED 0 #define CONFIG_SBL_SYMBOL 0 #define CONFIG_NCOBMC_ADAPT_WEIGHT 0 #define CONFIG_BGSPRITE 0 #define CONFIG_VAR_TX_NO_TX_MODE 0 #define CONFIG_MRC_TX 0 #define CONFIG_LPF_DIRECT 0 -#define CONFIG_UV_LVL 0 +#define CONFIG_LOOPFILTER_LEVEL 0 +#define CONFIG_NO_FRAME_CONTEXT_SIGNALING 0 +#define CONFIG_TXMG 1 +#define CONFIG_MAX_TILE 0 +#define CONFIG_HASH_ME 0 +#define CONFIG_COLORSPACE_HEADERS 0 +#define CONFIG_MFMV 0 +#define CONFIG_FRAME_MARKER 0 +#define CONFIG_JNT_COMP 0 +#define CONFIG_FRAME_SIGN_BIAS 0 +#define CONFIG_EXT_SKIP 0 +#define CONFIG_OBU 0 +#define CONFIG_AMVR 0 +#define CONFIG_LPF_SB 0 +#define CONFIG_OPT_REF_MV 0 +#define CONFIG_TMV 0 +#define CONFIG_RESTRICT_COMPRESSED_HDR 0 +#define CONFIG_HORZONLY_FRAME_SUPERRES 0 #define CONFIG_ANALYZER 0 #define DECODE_WIDTH_LIMIT 8192 #define DECODE_HEIGHT_LIMIT 4608 diff --git a/media/libaom/config/win/ia32/aom_dsp_rtcd.h b/media/libaom/config/win/ia32/aom_dsp_rtcd.h index 964ccb4d2..f6529fafb 100644 --- a/media/libaom/config/win/ia32/aom_dsp_rtcd.h +++ b/media/libaom/config/win/ia32/aom_dsp_rtcd.h @@ -20,6 +20,9 @@ extern "C" { #endif +void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx); +#define aom_blend_a64_d32_mask aom_blend_a64_d32_mask_c + void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); void aom_blend_a64_hmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); RTCD_EXTERN void (*aom_blend_a64_hmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); @@ -51,6 +54,28 @@ void aom_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, void aom_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*aom_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_hip_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src_hip)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_horiz_hip aom_convolve8_add_src_horiz_hip_c + +void aom_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_vert_hip aom_convolve8_add_src_vert_hip_c + void aom_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void aom_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void aom_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -312,19 +337,24 @@ void aom_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uin RTCD_EXTERN void (*aom_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_c +void aom_dc_128_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_c +void aom_dc_128_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c void aom_dc_128_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_32x16 aom_dc_128_predictor_32x16_c +void aom_dc_128_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_128_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_128_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -332,13 +362,16 @@ void aom_dc_128_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8 RTCD_EXTERN void (*aom_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_c +void aom_dc_128_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_c +void aom_dc_128_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_8x4 aom_dc_128_predictor_8x4_c +void aom_dc_128_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -349,19 +382,24 @@ void aom_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const ui RTCD_EXTERN void (*aom_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_c +void aom_dc_left_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_c +void aom_dc_left_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c void aom_dc_left_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_32x16 aom_dc_left_predictor_32x16_c +void aom_dc_left_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_left_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_left_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -369,13 +407,16 @@ void aom_dc_left_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint RTCD_EXTERN void (*aom_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_c +void aom_dc_left_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_c +void aom_dc_left_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_8x4 aom_dc_left_predictor_8x4_c +void aom_dc_left_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -386,19 +427,24 @@ void aom_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t RTCD_EXTERN void (*aom_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_16x32 aom_dc_predictor_16x32_c +void aom_dc_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_16x8 aom_dc_predictor_16x8_c +void aom_dc_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_32x16 aom_dc_predictor_32x16_c +void aom_dc_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -406,13 +452,16 @@ void aom_dc_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t * RTCD_EXTERN void (*aom_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_4x8 aom_dc_predictor_4x8_c +void aom_dc_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_8x16 aom_dc_predictor_8x16_c +void aom_dc_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_8x4 aom_dc_predictor_8x4_c +void aom_dc_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -423,19 +472,24 @@ void aom_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uin RTCD_EXTERN void (*aom_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_c +void aom_dc_top_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_c +void aom_dc_top_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c void aom_dc_top_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_32x16 aom_dc_top_predictor_32x16_c +void aom_dc_top_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_top_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_top_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -443,13 +497,16 @@ void aom_dc_top_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8 RTCD_EXTERN void (*aom_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_c +void aom_dc_top_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_c +void aom_dc_top_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_8x4 aom_dc_top_predictor_8x4_c +void aom_dc_top_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -502,19 +559,23 @@ void aom_h_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t RTCD_EXTERN void (*aom_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_16x32 aom_h_predictor_16x32_c +void aom_h_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_16x8 aom_h_predictor_16x8_c +void aom_h_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_h_predictor_2x2 aom_h_predictor_2x2_c void aom_h_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_32x16 aom_h_predictor_32x16_c +void aom_h_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_h_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -522,13 +583,16 @@ void aom_h_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a RTCD_EXTERN void (*aom_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_4x8 aom_h_predictor_4x8_c +void aom_h_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_8x16 aom_h_predictor_8x16_c +void aom_h_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_8x4 aom_h_predictor_8x4_c +void aom_h_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -1452,6 +1516,25 @@ void aom_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *d void aom_highbd_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); RTCD_EXTERN void (*aom_highbd_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src aom_highbd_convolve8_add_src_c + +void aom_highbd_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_hip_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +RTCD_EXTERN void (*aom_highbd_convolve8_add_src_hip)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); + +void aom_highbd_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz aom_highbd_convolve8_add_src_horiz_c + +void aom_highbd_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz_hip aom_highbd_convolve8_add_src_horiz_hip_c + +void aom_highbd_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert aom_highbd_convolve8_add_src_vert_c + +void aom_highbd_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert_hip aom_highbd_convolve8_add_src_vert_hip_c + void aom_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_convolve8_avg_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); RTCD_EXTERN void (*aom_highbd_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); @@ -1483,7 +1566,8 @@ void aom_highbd_convolve_copy_avx2(const uint8_t *src, ptrdiff_t src_stride, uin RTCD_EXTERN void (*aom_highbd_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_d117_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_16x16 aom_highbd_d117_predictor_16x16_c +void aom_highbd_d117_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d117_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d117_predictor_16x32 aom_highbd_d117_predictor_16x32_c @@ -1498,10 +1582,12 @@ void aom_highbd_d117_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d117_predictor_32x16 aom_highbd_d117_predictor_32x16_c void aom_highbd_d117_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_32x32 aom_highbd_d117_predictor_32x32_c +void aom_highbd_d117_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d117_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_4x4 aom_highbd_d117_predictor_4x4_c +void aom_highbd_d117_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d117_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d117_predictor_4x8 aom_highbd_d117_predictor_4x8_c @@ -1513,10 +1599,12 @@ void aom_highbd_d117_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d117_predictor_8x4 aom_highbd_d117_predictor_8x4_c void aom_highbd_d117_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_8x8 aom_highbd_d117_predictor_8x8_c +void aom_highbd_d117_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_16x16 aom_highbd_d135_predictor_16x16_c +void aom_highbd_d135_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d135_predictor_16x32 aom_highbd_d135_predictor_16x32_c @@ -1531,10 +1619,12 @@ void aom_highbd_d135_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d135_predictor_32x16 aom_highbd_d135_predictor_32x16_c void aom_highbd_d135_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_32x32 aom_highbd_d135_predictor_32x32_c +void aom_highbd_d135_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_4x4 aom_highbd_d135_predictor_4x4_c +void aom_highbd_d135_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d135_predictor_4x8 aom_highbd_d135_predictor_4x8_c @@ -1546,10 +1636,12 @@ void aom_highbd_d135_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d135_predictor_8x4 aom_highbd_d135_predictor_8x4_c void aom_highbd_d135_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_8x8 aom_highbd_d135_predictor_8x8_c +void aom_highbd_d135_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_16x16 aom_highbd_d153_predictor_16x16_c +void aom_highbd_d153_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d153_predictor_16x32 aom_highbd_d153_predictor_16x32_c @@ -1564,10 +1656,12 @@ void aom_highbd_d153_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d153_predictor_32x16 aom_highbd_d153_predictor_32x16_c void aom_highbd_d153_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_32x32 aom_highbd_d153_predictor_32x32_c +void aom_highbd_d153_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_4x4 aom_highbd_d153_predictor_4x4_c +void aom_highbd_d153_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d153_predictor_4x8 aom_highbd_d153_predictor_4x8_c @@ -1579,7 +1673,8 @@ void aom_highbd_d153_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d153_predictor_8x4 aom_highbd_d153_predictor_8x4_c void aom_highbd_d153_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_8x8 aom_highbd_d153_predictor_8x8_c +void aom_highbd_d153_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d207e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d207e_predictor_16x16 aom_highbd_d207e_predictor_16x16_c @@ -1615,37 +1710,47 @@ void aom_highbd_d207e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const u #define aom_highbd_d207e_predictor_8x8 aom_highbd_d207e_predictor_8x8_c void aom_highbd_d45e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x16 aom_highbd_d45e_predictor_16x16_c +void aom_highbd_d45e_predictor_16x16_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x32 aom_highbd_d45e_predictor_16x32_c +void aom_highbd_d45e_predictor_16x32_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x8 aom_highbd_d45e_predictor_16x8_c +void aom_highbd_d45e_predictor_16x8_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d45e_predictor_2x2 aom_highbd_d45e_predictor_2x2_c void aom_highbd_d45e_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_32x16 aom_highbd_d45e_predictor_32x16_c +void aom_highbd_d45e_predictor_32x16_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_32x32 aom_highbd_d45e_predictor_32x32_c +void aom_highbd_d45e_predictor_32x32_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_4x4 aom_highbd_d45e_predictor_4x4_c +void aom_highbd_d45e_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_4x8 aom_highbd_d45e_predictor_4x8_c +void aom_highbd_d45e_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x16 aom_highbd_d45e_predictor_8x16_c +void aom_highbd_d45e_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x4 aom_highbd_d45e_predictor_8x4_c +void aom_highbd_d45e_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x8 aom_highbd_d45e_predictor_8x8_c +void aom_highbd_d45e_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d63e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d63e_predictor_16x16 aom_highbd_d63e_predictor_16x16_c @@ -1681,86 +1786,109 @@ void aom_highbd_d63e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d63e_predictor_8x8 aom_highbd_d63e_predictor_8x8_c void aom_highbd_dc_128_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x16 aom_highbd_dc_128_predictor_16x16_c +void aom_highbd_dc_128_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x32 aom_highbd_dc_128_predictor_16x32_c +void aom_highbd_dc_128_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x8 aom_highbd_dc_128_predictor_16x8_c +void aom_highbd_dc_128_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_128_predictor_2x2 aom_highbd_dc_128_predictor_2x2_c void aom_highbd_dc_128_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_32x16 aom_highbd_dc_128_predictor_32x16_c +void aom_highbd_dc_128_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_32x32 aom_highbd_dc_128_predictor_32x32_c +void aom_highbd_dc_128_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_4x4 aom_highbd_dc_128_predictor_4x4_c +void aom_highbd_dc_128_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_4x8 aom_highbd_dc_128_predictor_4x8_c +void aom_highbd_dc_128_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x16 aom_highbd_dc_128_predictor_8x16_c +void aom_highbd_dc_128_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x4 aom_highbd_dc_128_predictor_8x4_c +void aom_highbd_dc_128_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x8 aom_highbd_dc_128_predictor_8x8_c +void aom_highbd_dc_128_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x16 aom_highbd_dc_left_predictor_16x16_c +void aom_highbd_dc_left_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x32 aom_highbd_dc_left_predictor_16x32_c +void aom_highbd_dc_left_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x8 aom_highbd_dc_left_predictor_16x8_c +void aom_highbd_dc_left_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_left_predictor_2x2 aom_highbd_dc_left_predictor_2x2_c void aom_highbd_dc_left_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_32x16 aom_highbd_dc_left_predictor_32x16_c +void aom_highbd_dc_left_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_32x32 aom_highbd_dc_left_predictor_32x32_c +void aom_highbd_dc_left_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_4x4 aom_highbd_dc_left_predictor_4x4_c +void aom_highbd_dc_left_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_4x8 aom_highbd_dc_left_predictor_4x8_c +void aom_highbd_dc_left_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x16 aom_highbd_dc_left_predictor_8x16_c +void aom_highbd_dc_left_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x4 aom_highbd_dc_left_predictor_8x4_c +void aom_highbd_dc_left_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x8 aom_highbd_dc_left_predictor_8x8_c +void aom_highbd_dc_left_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); RTCD_EXTERN void (*aom_highbd_dc_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_16x32 aom_highbd_dc_predictor_16x32_c +void aom_highbd_dc_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_16x8 aom_highbd_dc_predictor_16x8_c +void aom_highbd_dc_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_predictor_2x2 aom_highbd_dc_predictor_2x2_c void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_32x16 aom_highbd_dc_predictor_32x16_c +void aom_highbd_dc_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -1771,50 +1899,63 @@ void aom_highbd_dc_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const u RTCD_EXTERN void (*aom_highbd_dc_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_4x8 aom_highbd_dc_predictor_4x8_c +void aom_highbd_dc_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_8x16 aom_highbd_dc_predictor_8x16_c +void aom_highbd_dc_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_8x4 aom_highbd_dc_predictor_8x4_c +void aom_highbd_dc_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); RTCD_EXTERN void (*aom_highbd_dc_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x16 aom_highbd_dc_top_predictor_16x16_c +void aom_highbd_dc_top_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x32 aom_highbd_dc_top_predictor_16x32_c +void aom_highbd_dc_top_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x8 aom_highbd_dc_top_predictor_16x8_c +void aom_highbd_dc_top_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_top_predictor_2x2 aom_highbd_dc_top_predictor_2x2_c void aom_highbd_dc_top_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_32x16 aom_highbd_dc_top_predictor_32x16_c +void aom_highbd_dc_top_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_32x32 aom_highbd_dc_top_predictor_32x32_c +void aom_highbd_dc_top_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_4x4 aom_highbd_dc_top_predictor_4x4_c +void aom_highbd_dc_top_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_4x8 aom_highbd_dc_top_predictor_4x8_c +void aom_highbd_dc_top_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x16 aom_highbd_dc_top_predictor_8x16_c +void aom_highbd_dc_top_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x4 aom_highbd_dc_top_predictor_8x4_c +void aom_highbd_dc_top_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x8 aom_highbd_dc_top_predictor_8x8_c +void aom_highbd_dc_top_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride); void aom_highbd_fdct16x16_sse2(const int16_t *input, tran_low_t *output, int stride); @@ -1837,37 +1978,47 @@ void aom_highbd_fdct8x8_sse2(const int16_t *input, tran_low_t *output, int strid RTCD_EXTERN void (*aom_highbd_fdct8x8)(const int16_t *input, tran_low_t *output, int stride); void aom_highbd_h_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x16 aom_highbd_h_predictor_16x16_c +void aom_highbd_h_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x32 aom_highbd_h_predictor_16x32_c +void aom_highbd_h_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x8 aom_highbd_h_predictor_16x8_c +void aom_highbd_h_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_h_predictor_2x2 aom_highbd_h_predictor_2x2_c void aom_highbd_h_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_32x16 aom_highbd_h_predictor_32x16_c +void aom_highbd_h_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_32x32 aom_highbd_h_predictor_32x32_c +void aom_highbd_h_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_4x4 aom_highbd_h_predictor_4x4_c +void aom_highbd_h_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_4x8 aom_highbd_h_predictor_4x8_c +void aom_highbd_h_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x16 aom_highbd_h_predictor_8x16_c +void aom_highbd_h_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x4 aom_highbd_h_predictor_8x4_c +void aom_highbd_h_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x8 aom_highbd_h_predictor_8x8_c +void aom_highbd_h_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); #define aom_highbd_iwht4x4_16_add aom_highbd_iwht4x4_16_add_c @@ -1881,6 +2032,7 @@ RTCD_EXTERN void (*aom_highbd_lpf_horizontal_4)(uint16_t *s, int pitch, const ui void aom_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +void aom_highbd_lpf_horizontal_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); RTCD_EXTERN void (*aom_highbd_lpf_horizontal_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1889,10 +2041,12 @@ RTCD_EXTERN void (*aom_highbd_lpf_horizontal_8)(uint16_t *s, int pitch, const ui void aom_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +void aom_highbd_lpf_horizontal_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); RTCD_EXTERN void (*aom_highbd_lpf_horizontal_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +void aom_highbd_lpf_horizontal_edge_16_avx2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); RTCD_EXTERN void (*aom_highbd_lpf_horizontal_edge_16)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1905,6 +2059,7 @@ RTCD_EXTERN void (*aom_highbd_lpf_vertical_16)(uint16_t *s, int pitch, const uin void aom_highbd_lpf_vertical_16_dual_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +void aom_highbd_lpf_vertical_16_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); RTCD_EXTERN void (*aom_highbd_lpf_vertical_16_dual)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1913,6 +2068,7 @@ RTCD_EXTERN void (*aom_highbd_lpf_vertical_4)(uint16_t *s, int pitch, const uint void aom_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +void aom_highbd_lpf_vertical_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); RTCD_EXTERN void (*aom_highbd_lpf_vertical_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1921,6 +2077,7 @@ RTCD_EXTERN void (*aom_highbd_lpf_vertical_8)(uint16_t *s, int pitch, const uint void aom_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +void aom_highbd_lpf_vertical_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); RTCD_EXTERN void (*aom_highbd_lpf_vertical_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); unsigned int aom_highbd_masked_sad16x16_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); @@ -2502,16 +2659,19 @@ void aom_highbd_v_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const RTCD_EXTERN void (*aom_highbd_v_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_16x32 aom_highbd_v_predictor_16x32_c +void aom_highbd_v_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_16x8 aom_highbd_v_predictor_16x8_c +void aom_highbd_v_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_v_predictor_2x2 aom_highbd_v_predictor_2x2_c void aom_highbd_v_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_32x16 aom_highbd_v_predictor_32x16_c +void aom_highbd_v_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -2522,13 +2682,16 @@ void aom_highbd_v_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const ui RTCD_EXTERN void (*aom_highbd_v_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_4x8 aom_highbd_v_predictor_4x8_c +void aom_highbd_v_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_8x16 aom_highbd_v_predictor_8x16_c +void aom_highbd_v_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_8x4 aom_highbd_v_predictor_8x4_c +void aom_highbd_v_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -2618,25 +2781,21 @@ void aom_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, con RTCD_EXTERN void (*aom_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*aom_lpf_horizontal_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_horizontal_4_dual aom_lpf_horizontal_4_dual_c void aom_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); RTCD_EXTERN void (*aom_lpf_horizontal_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_horizontal_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*aom_lpf_horizontal_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_horizontal_8_dual aom_lpf_horizontal_8_dual_c void aom_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_edge_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_edge_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); RTCD_EXTERN void (*aom_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_edge_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_edge_8_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); RTCD_EXTERN void (*aom_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); @@ -2644,24 +2803,21 @@ void aom_lpf_vertical_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, cons RTCD_EXTERN void (*aom_lpf_vertical_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_vertical_16_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_vertical_16_dual)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_vertical_16_dual aom_lpf_vertical_16_dual_c void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); RTCD_EXTERN void (*aom_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*aom_lpf_vertical_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_vertical_4_dual aom_lpf_vertical_4_dual_c void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); RTCD_EXTERN void (*aom_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*aom_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_vertical_8_dual aom_lpf_vertical_8_dual_c unsigned int aom_masked_sad16x16_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); unsigned int aom_masked_sad16x16_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); @@ -2932,37 +3088,52 @@ unsigned int aom_obmc_variance8x8_sse4_1(const uint8_t *pre, int pre_stride, con RTCD_EXTERN unsigned int (*aom_obmc_variance8x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); void aom_paeth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_c +void aom_paeth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_c +void aom_paeth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_c +void aom_paeth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x8_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c void aom_paeth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_c +void aom_paeth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_c +void aom_paeth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_c +void aom_paeth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_c +void aom_paeth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_c +void aom_paeth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_c +void aom_paeth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_c +void aom_paeth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void aom_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -3261,37 +3432,47 @@ void aom_smooth_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_ #define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_c void aom_smooth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x16 aom_smooth_predictor_16x16_c +void aom_smooth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x32 aom_smooth_predictor_16x32_c +void aom_smooth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x8 aom_smooth_predictor_16x8_c +void aom_smooth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c void aom_smooth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_32x16 aom_smooth_predictor_32x16_c +void aom_smooth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_32x32 aom_smooth_predictor_32x32_c +void aom_smooth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_4x4 aom_smooth_predictor_4x4_c +void aom_smooth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_4x8 aom_smooth_predictor_4x8_c +void aom_smooth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x16 aom_smooth_predictor_8x16_c +void aom_smooth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x4 aom_smooth_predictor_8x4_c +void aom_smooth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x8 aom_smooth_predictor_8x8_c +void aom_smooth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_c @@ -3481,19 +3662,24 @@ void aom_v_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t RTCD_EXTERN void (*aom_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_16x32 aom_v_predictor_16x32_c +void aom_v_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_16x8 aom_v_predictor_16x8_c +void aom_v_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_v_predictor_2x2 aom_v_predictor_2x2_c void aom_v_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_32x16 aom_v_predictor_32x16_c +void aom_v_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_v_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_v_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -3501,13 +3687,16 @@ void aom_v_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a RTCD_EXTERN void (*aom_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_4x8 aom_v_predictor_4x8_c +void aom_v_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_8x16 aom_v_predictor_8x16_c +void aom_v_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_8x4 aom_v_predictor_8x4_c +void aom_v_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -3617,6 +3806,14 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_convolve8 = aom_convolve8_sse2; if (flags & HAS_SSSE3) aom_convolve8 = aom_convolve8_ssse3; if (flags & HAS_AVX2) aom_convolve8 = aom_convolve8_avx2; + aom_convolve8_add_src = aom_convolve8_add_src_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src = aom_convolve8_add_src_ssse3; + aom_convolve8_add_src_hip = aom_convolve8_add_src_hip_c; + if (flags & HAS_SSE2) aom_convolve8_add_src_hip = aom_convolve8_add_src_hip_sse2; + aom_convolve8_add_src_horiz = aom_convolve8_add_src_horiz_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src_horiz = aom_convolve8_add_src_horiz_ssse3; + aom_convolve8_add_src_vert = aom_convolve8_add_src_vert_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src_vert = aom_convolve8_add_src_vert_ssse3; aom_convolve8_avg = aom_convolve8_avg_c; if (flags & HAS_SSE2) aom_convolve8_avg = aom_convolve8_avg_sse2; if (flags & HAS_SSSE3) aom_convolve8_avg = aom_convolve8_avg_ssse3; @@ -3650,34 +3847,90 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_d63e_predictor_4x4 = aom_d63e_predictor_4x4_ssse3; aom_dc_128_predictor_16x16 = aom_dc_128_predictor_16x16_c; if (flags & HAS_SSE2) aom_dc_128_predictor_16x16 = aom_dc_128_predictor_16x16_sse2; + aom_dc_128_predictor_16x32 = aom_dc_128_predictor_16x32_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_16x32 = aom_dc_128_predictor_16x32_sse2; + aom_dc_128_predictor_16x8 = aom_dc_128_predictor_16x8_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_16x8 = aom_dc_128_predictor_16x8_sse2; + aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_avx2; aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_c; if (flags & HAS_SSE2) aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_avx2; aom_dc_128_predictor_4x4 = aom_dc_128_predictor_4x4_c; if (flags & HAS_SSE2) aom_dc_128_predictor_4x4 = aom_dc_128_predictor_4x4_sse2; + aom_dc_128_predictor_4x8 = aom_dc_128_predictor_4x8_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_4x8 = aom_dc_128_predictor_4x8_sse2; + aom_dc_128_predictor_8x16 = aom_dc_128_predictor_8x16_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_8x16 = aom_dc_128_predictor_8x16_sse2; + aom_dc_128_predictor_8x4 = aom_dc_128_predictor_8x4_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_8x4 = aom_dc_128_predictor_8x4_sse2; aom_dc_128_predictor_8x8 = aom_dc_128_predictor_8x8_c; if (flags & HAS_SSE2) aom_dc_128_predictor_8x8 = aom_dc_128_predictor_8x8_sse2; aom_dc_left_predictor_16x16 = aom_dc_left_predictor_16x16_c; if (flags & HAS_SSE2) aom_dc_left_predictor_16x16 = aom_dc_left_predictor_16x16_sse2; + aom_dc_left_predictor_16x32 = aom_dc_left_predictor_16x32_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_16x32 = aom_dc_left_predictor_16x32_sse2; + aom_dc_left_predictor_16x8 = aom_dc_left_predictor_16x8_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_16x8 = aom_dc_left_predictor_16x8_sse2; + aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_avx2; aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_c; if (flags & HAS_SSE2) aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_avx2; aom_dc_left_predictor_4x4 = aom_dc_left_predictor_4x4_c; if (flags & HAS_SSE2) aom_dc_left_predictor_4x4 = aom_dc_left_predictor_4x4_sse2; + aom_dc_left_predictor_4x8 = aom_dc_left_predictor_4x8_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_4x8 = aom_dc_left_predictor_4x8_sse2; + aom_dc_left_predictor_8x16 = aom_dc_left_predictor_8x16_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_8x16 = aom_dc_left_predictor_8x16_sse2; + aom_dc_left_predictor_8x4 = aom_dc_left_predictor_8x4_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_8x4 = aom_dc_left_predictor_8x4_sse2; aom_dc_left_predictor_8x8 = aom_dc_left_predictor_8x8_c; if (flags & HAS_SSE2) aom_dc_left_predictor_8x8 = aom_dc_left_predictor_8x8_sse2; aom_dc_predictor_16x16 = aom_dc_predictor_16x16_c; if (flags & HAS_SSE2) aom_dc_predictor_16x16 = aom_dc_predictor_16x16_sse2; + aom_dc_predictor_16x32 = aom_dc_predictor_16x32_c; + if (flags & HAS_SSE2) aom_dc_predictor_16x32 = aom_dc_predictor_16x32_sse2; + aom_dc_predictor_16x8 = aom_dc_predictor_16x8_c; + if (flags & HAS_SSE2) aom_dc_predictor_16x8 = aom_dc_predictor_16x8_sse2; + aom_dc_predictor_32x16 = aom_dc_predictor_32x16_c; + if (flags & HAS_SSE2) aom_dc_predictor_32x16 = aom_dc_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_predictor_32x16 = aom_dc_predictor_32x16_avx2; aom_dc_predictor_32x32 = aom_dc_predictor_32x32_c; if (flags & HAS_SSE2) aom_dc_predictor_32x32 = aom_dc_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_predictor_32x32 = aom_dc_predictor_32x32_avx2; aom_dc_predictor_4x4 = aom_dc_predictor_4x4_c; if (flags & HAS_SSE2) aom_dc_predictor_4x4 = aom_dc_predictor_4x4_sse2; + aom_dc_predictor_4x8 = aom_dc_predictor_4x8_c; + if (flags & HAS_SSE2) aom_dc_predictor_4x8 = aom_dc_predictor_4x8_sse2; + aom_dc_predictor_8x16 = aom_dc_predictor_8x16_c; + if (flags & HAS_SSE2) aom_dc_predictor_8x16 = aom_dc_predictor_8x16_sse2; + aom_dc_predictor_8x4 = aom_dc_predictor_8x4_c; + if (flags & HAS_SSE2) aom_dc_predictor_8x4 = aom_dc_predictor_8x4_sse2; aom_dc_predictor_8x8 = aom_dc_predictor_8x8_c; if (flags & HAS_SSE2) aom_dc_predictor_8x8 = aom_dc_predictor_8x8_sse2; aom_dc_top_predictor_16x16 = aom_dc_top_predictor_16x16_c; if (flags & HAS_SSE2) aom_dc_top_predictor_16x16 = aom_dc_top_predictor_16x16_sse2; + aom_dc_top_predictor_16x32 = aom_dc_top_predictor_16x32_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_16x32 = aom_dc_top_predictor_16x32_sse2; + aom_dc_top_predictor_16x8 = aom_dc_top_predictor_16x8_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_16x8 = aom_dc_top_predictor_16x8_sse2; + aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_avx2; aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_c; if (flags & HAS_SSE2) aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_avx2; aom_dc_top_predictor_4x4 = aom_dc_top_predictor_4x4_c; if (flags & HAS_SSE2) aom_dc_top_predictor_4x4 = aom_dc_top_predictor_4x4_sse2; + aom_dc_top_predictor_4x8 = aom_dc_top_predictor_4x8_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_4x8 = aom_dc_top_predictor_4x8_sse2; + aom_dc_top_predictor_8x16 = aom_dc_top_predictor_8x16_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_8x16 = aom_dc_top_predictor_8x16_sse2; + aom_dc_top_predictor_8x4 = aom_dc_top_predictor_8x4_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_8x4 = aom_dc_top_predictor_8x4_sse2; aom_dc_top_predictor_8x8 = aom_dc_top_predictor_8x8_c; if (flags & HAS_SSE2) aom_dc_top_predictor_8x8 = aom_dc_top_predictor_8x8_sse2; aom_fdct16x16 = aom_fdct16x16_c; @@ -3703,10 +3956,23 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_get_mb_ss = aom_get_mb_ss_sse2; aom_h_predictor_16x16 = aom_h_predictor_16x16_c; if (flags & HAS_SSE2) aom_h_predictor_16x16 = aom_h_predictor_16x16_sse2; + aom_h_predictor_16x32 = aom_h_predictor_16x32_c; + if (flags & HAS_SSE2) aom_h_predictor_16x32 = aom_h_predictor_16x32_sse2; + aom_h_predictor_16x8 = aom_h_predictor_16x8_c; + if (flags & HAS_SSE2) aom_h_predictor_16x8 = aom_h_predictor_16x8_sse2; + aom_h_predictor_32x16 = aom_h_predictor_32x16_c; + if (flags & HAS_SSE2) aom_h_predictor_32x16 = aom_h_predictor_32x16_sse2; aom_h_predictor_32x32 = aom_h_predictor_32x32_c; if (flags & HAS_SSE2) aom_h_predictor_32x32 = aom_h_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_h_predictor_32x32 = aom_h_predictor_32x32_avx2; aom_h_predictor_4x4 = aom_h_predictor_4x4_c; if (flags & HAS_SSE2) aom_h_predictor_4x4 = aom_h_predictor_4x4_sse2; + aom_h_predictor_4x8 = aom_h_predictor_4x8_c; + if (flags & HAS_SSE2) aom_h_predictor_4x8 = aom_h_predictor_4x8_sse2; + aom_h_predictor_8x16 = aom_h_predictor_8x16_c; + if (flags & HAS_SSE2) aom_h_predictor_8x16 = aom_h_predictor_8x16_sse2; + aom_h_predictor_8x4 = aom_h_predictor_8x4_c; + if (flags & HAS_SSE2) aom_h_predictor_8x4 = aom_h_predictor_8x4_sse2; aom_h_predictor_8x8 = aom_h_predictor_8x8_c; if (flags & HAS_SSE2) aom_h_predictor_8x8 = aom_h_predictor_8x8_sse2; aom_hadamard_16x16 = aom_hadamard_16x16_c; @@ -4075,6 +4341,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_highbd_comp_avg_upsampled_pred = aom_highbd_comp_avg_upsampled_pred_sse2; aom_highbd_convolve8 = aom_highbd_convolve8_c; if (flags & HAS_AVX2) aom_highbd_convolve8 = aom_highbd_convolve8_avx2; + aom_highbd_convolve8_add_src_hip = aom_highbd_convolve8_add_src_hip_c; + if (flags & HAS_SSSE3) aom_highbd_convolve8_add_src_hip = aom_highbd_convolve8_add_src_hip_ssse3; aom_highbd_convolve8_avg = aom_highbd_convolve8_avg_c; if (flags & HAS_AVX2) aom_highbd_convolve8_avg = aom_highbd_convolve8_avg_avx2; aom_highbd_convolve8_avg_horiz = aom_highbd_convolve8_avg_horiz_c; @@ -4091,14 +4359,130 @@ static void setup_rtcd_internal(void) aom_highbd_convolve_copy = aom_highbd_convolve_copy_c; if (flags & HAS_SSE2) aom_highbd_convolve_copy = aom_highbd_convolve_copy_sse2; if (flags & HAS_AVX2) aom_highbd_convolve_copy = aom_highbd_convolve_copy_avx2; + aom_highbd_d117_predictor_16x16 = aom_highbd_d117_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_16x16 = aom_highbd_d117_predictor_16x16_ssse3; + aom_highbd_d117_predictor_32x32 = aom_highbd_d117_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_32x32 = aom_highbd_d117_predictor_32x32_ssse3; + aom_highbd_d117_predictor_4x4 = aom_highbd_d117_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_d117_predictor_4x4 = aom_highbd_d117_predictor_4x4_sse2; + aom_highbd_d117_predictor_8x8 = aom_highbd_d117_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_8x8 = aom_highbd_d117_predictor_8x8_ssse3; + aom_highbd_d135_predictor_16x16 = aom_highbd_d135_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_16x16 = aom_highbd_d135_predictor_16x16_ssse3; + aom_highbd_d135_predictor_32x32 = aom_highbd_d135_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_32x32 = aom_highbd_d135_predictor_32x32_ssse3; + aom_highbd_d135_predictor_4x4 = aom_highbd_d135_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_d135_predictor_4x4 = aom_highbd_d135_predictor_4x4_sse2; + aom_highbd_d135_predictor_8x8 = aom_highbd_d135_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_8x8 = aom_highbd_d135_predictor_8x8_ssse3; + aom_highbd_d153_predictor_16x16 = aom_highbd_d153_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_16x16 = aom_highbd_d153_predictor_16x16_ssse3; + aom_highbd_d153_predictor_32x32 = aom_highbd_d153_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_32x32 = aom_highbd_d153_predictor_32x32_ssse3; + aom_highbd_d153_predictor_4x4 = aom_highbd_d153_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_d153_predictor_4x4 = aom_highbd_d153_predictor_4x4_sse2; + aom_highbd_d153_predictor_8x8 = aom_highbd_d153_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_8x8 = aom_highbd_d153_predictor_8x8_ssse3; + aom_highbd_d45e_predictor_16x16 = aom_highbd_d45e_predictor_16x16_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x16 = aom_highbd_d45e_predictor_16x16_avx2; + aom_highbd_d45e_predictor_16x32 = aom_highbd_d45e_predictor_16x32_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x32 = aom_highbd_d45e_predictor_16x32_avx2; + aom_highbd_d45e_predictor_16x8 = aom_highbd_d45e_predictor_16x8_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x8 = aom_highbd_d45e_predictor_16x8_avx2; + aom_highbd_d45e_predictor_32x16 = aom_highbd_d45e_predictor_32x16_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_32x16 = aom_highbd_d45e_predictor_32x16_avx2; + aom_highbd_d45e_predictor_32x32 = aom_highbd_d45e_predictor_32x32_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_32x32 = aom_highbd_d45e_predictor_32x32_avx2; + aom_highbd_d45e_predictor_4x4 = aom_highbd_d45e_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_d45e_predictor_4x4 = aom_highbd_d45e_predictor_4x4_sse2; + aom_highbd_d45e_predictor_4x8 = aom_highbd_d45e_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_d45e_predictor_4x8 = aom_highbd_d45e_predictor_4x8_sse2; + aom_highbd_d45e_predictor_8x16 = aom_highbd_d45e_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_d45e_predictor_8x16 = aom_highbd_d45e_predictor_8x16_sse2; + aom_highbd_d45e_predictor_8x4 = aom_highbd_d45e_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_d45e_predictor_8x4 = aom_highbd_d45e_predictor_8x4_sse2; + aom_highbd_d45e_predictor_8x8 = aom_highbd_d45e_predictor_8x8_c; + if (flags & HAS_SSE2) aom_highbd_d45e_predictor_8x8 = aom_highbd_d45e_predictor_8x8_sse2; + aom_highbd_dc_128_predictor_16x16 = aom_highbd_dc_128_predictor_16x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_16x16 = aom_highbd_dc_128_predictor_16x16_sse2; + aom_highbd_dc_128_predictor_16x32 = aom_highbd_dc_128_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_16x32 = aom_highbd_dc_128_predictor_16x32_sse2; + aom_highbd_dc_128_predictor_16x8 = aom_highbd_dc_128_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_16x8 = aom_highbd_dc_128_predictor_16x8_sse2; + aom_highbd_dc_128_predictor_32x16 = aom_highbd_dc_128_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_32x16 = aom_highbd_dc_128_predictor_32x16_sse2; + aom_highbd_dc_128_predictor_32x32 = aom_highbd_dc_128_predictor_32x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_32x32 = aom_highbd_dc_128_predictor_32x32_sse2; + aom_highbd_dc_128_predictor_4x4 = aom_highbd_dc_128_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_4x4 = aom_highbd_dc_128_predictor_4x4_sse2; + aom_highbd_dc_128_predictor_4x8 = aom_highbd_dc_128_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_4x8 = aom_highbd_dc_128_predictor_4x8_sse2; + aom_highbd_dc_128_predictor_8x16 = aom_highbd_dc_128_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_8x16 = aom_highbd_dc_128_predictor_8x16_sse2; + aom_highbd_dc_128_predictor_8x4 = aom_highbd_dc_128_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_8x4 = aom_highbd_dc_128_predictor_8x4_sse2; + aom_highbd_dc_128_predictor_8x8 = aom_highbd_dc_128_predictor_8x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_8x8 = aom_highbd_dc_128_predictor_8x8_sse2; + aom_highbd_dc_left_predictor_16x16 = aom_highbd_dc_left_predictor_16x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_16x16 = aom_highbd_dc_left_predictor_16x16_sse2; + aom_highbd_dc_left_predictor_16x32 = aom_highbd_dc_left_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_16x32 = aom_highbd_dc_left_predictor_16x32_sse2; + aom_highbd_dc_left_predictor_16x8 = aom_highbd_dc_left_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_16x8 = aom_highbd_dc_left_predictor_16x8_sse2; + aom_highbd_dc_left_predictor_32x16 = aom_highbd_dc_left_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_32x16 = aom_highbd_dc_left_predictor_32x16_sse2; + aom_highbd_dc_left_predictor_32x32 = aom_highbd_dc_left_predictor_32x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_32x32 = aom_highbd_dc_left_predictor_32x32_sse2; + aom_highbd_dc_left_predictor_4x4 = aom_highbd_dc_left_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_4x4 = aom_highbd_dc_left_predictor_4x4_sse2; + aom_highbd_dc_left_predictor_4x8 = aom_highbd_dc_left_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_4x8 = aom_highbd_dc_left_predictor_4x8_sse2; + aom_highbd_dc_left_predictor_8x16 = aom_highbd_dc_left_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_8x16 = aom_highbd_dc_left_predictor_8x16_sse2; + aom_highbd_dc_left_predictor_8x4 = aom_highbd_dc_left_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_8x4 = aom_highbd_dc_left_predictor_8x4_sse2; + aom_highbd_dc_left_predictor_8x8 = aom_highbd_dc_left_predictor_8x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_8x8 = aom_highbd_dc_left_predictor_8x8_sse2; aom_highbd_dc_predictor_16x16 = aom_highbd_dc_predictor_16x16_c; if (flags & HAS_SSE2) aom_highbd_dc_predictor_16x16 = aom_highbd_dc_predictor_16x16_sse2; + aom_highbd_dc_predictor_16x32 = aom_highbd_dc_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_16x32 = aom_highbd_dc_predictor_16x32_sse2; + aom_highbd_dc_predictor_16x8 = aom_highbd_dc_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_16x8 = aom_highbd_dc_predictor_16x8_sse2; + aom_highbd_dc_predictor_32x16 = aom_highbd_dc_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_32x16 = aom_highbd_dc_predictor_32x16_sse2; aom_highbd_dc_predictor_32x32 = aom_highbd_dc_predictor_32x32_c; if (flags & HAS_SSE2) aom_highbd_dc_predictor_32x32 = aom_highbd_dc_predictor_32x32_sse2; aom_highbd_dc_predictor_4x4 = aom_highbd_dc_predictor_4x4_c; if (flags & HAS_SSE2) aom_highbd_dc_predictor_4x4 = aom_highbd_dc_predictor_4x4_sse2; + aom_highbd_dc_predictor_4x8 = aom_highbd_dc_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_4x8 = aom_highbd_dc_predictor_4x8_sse2; + aom_highbd_dc_predictor_8x16 = aom_highbd_dc_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_8x16 = aom_highbd_dc_predictor_8x16_sse2; + aom_highbd_dc_predictor_8x4 = aom_highbd_dc_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_8x4 = aom_highbd_dc_predictor_8x4_sse2; aom_highbd_dc_predictor_8x8 = aom_highbd_dc_predictor_8x8_c; if (flags & HAS_SSE2) aom_highbd_dc_predictor_8x8 = aom_highbd_dc_predictor_8x8_sse2; + aom_highbd_dc_top_predictor_16x16 = aom_highbd_dc_top_predictor_16x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_16x16 = aom_highbd_dc_top_predictor_16x16_sse2; + aom_highbd_dc_top_predictor_16x32 = aom_highbd_dc_top_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_16x32 = aom_highbd_dc_top_predictor_16x32_sse2; + aom_highbd_dc_top_predictor_16x8 = aom_highbd_dc_top_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_16x8 = aom_highbd_dc_top_predictor_16x8_sse2; + aom_highbd_dc_top_predictor_32x16 = aom_highbd_dc_top_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_32x16 = aom_highbd_dc_top_predictor_32x16_sse2; + aom_highbd_dc_top_predictor_32x32 = aom_highbd_dc_top_predictor_32x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_32x32 = aom_highbd_dc_top_predictor_32x32_sse2; + aom_highbd_dc_top_predictor_4x4 = aom_highbd_dc_top_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_4x4 = aom_highbd_dc_top_predictor_4x4_sse2; + aom_highbd_dc_top_predictor_4x8 = aom_highbd_dc_top_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_4x8 = aom_highbd_dc_top_predictor_4x8_sse2; + aom_highbd_dc_top_predictor_8x16 = aom_highbd_dc_top_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_8x16 = aom_highbd_dc_top_predictor_8x16_sse2; + aom_highbd_dc_top_predictor_8x4 = aom_highbd_dc_top_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_8x4 = aom_highbd_dc_top_predictor_8x4_sse2; + aom_highbd_dc_top_predictor_8x8 = aom_highbd_dc_top_predictor_8x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_8x8 = aom_highbd_dc_top_predictor_8x8_sse2; aom_highbd_fdct16x16 = aom_highbd_fdct16x16_c; if (flags & HAS_SSE2) aom_highbd_fdct16x16 = aom_highbd_fdct16x16_sse2; aom_highbd_fdct32x32 = aom_highbd_fdct32x32_c; @@ -4109,30 +4493,56 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_highbd_fdct4x4 = aom_highbd_fdct4x4_sse2; aom_highbd_fdct8x8 = aom_highbd_fdct8x8_c; if (flags & HAS_SSE2) aom_highbd_fdct8x8 = aom_highbd_fdct8x8_sse2; + aom_highbd_h_predictor_16x16 = aom_highbd_h_predictor_16x16_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_16x16 = aom_highbd_h_predictor_16x16_sse2; + aom_highbd_h_predictor_16x32 = aom_highbd_h_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_16x32 = aom_highbd_h_predictor_16x32_sse2; + aom_highbd_h_predictor_16x8 = aom_highbd_h_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_16x8 = aom_highbd_h_predictor_16x8_sse2; + aom_highbd_h_predictor_32x16 = aom_highbd_h_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_32x16 = aom_highbd_h_predictor_32x16_sse2; + aom_highbd_h_predictor_32x32 = aom_highbd_h_predictor_32x32_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_32x32 = aom_highbd_h_predictor_32x32_sse2; + aom_highbd_h_predictor_4x4 = aom_highbd_h_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_4x4 = aom_highbd_h_predictor_4x4_sse2; + aom_highbd_h_predictor_4x8 = aom_highbd_h_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_4x8 = aom_highbd_h_predictor_4x8_sse2; + aom_highbd_h_predictor_8x16 = aom_highbd_h_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_8x16 = aom_highbd_h_predictor_8x16_sse2; + aom_highbd_h_predictor_8x4 = aom_highbd_h_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_8x4 = aom_highbd_h_predictor_8x4_sse2; + aom_highbd_h_predictor_8x8 = aom_highbd_h_predictor_8x8_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_8x8 = aom_highbd_h_predictor_8x8_sse2; aom_highbd_lpf_horizontal_4 = aom_highbd_lpf_horizontal_4_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_4 = aom_highbd_lpf_horizontal_4_sse2; aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_avx2; aom_highbd_lpf_horizontal_8 = aom_highbd_lpf_horizontal_8_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_8 = aom_highbd_lpf_horizontal_8_sse2; aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_avx2; aom_highbd_lpf_horizontal_edge_16 = aom_highbd_lpf_horizontal_edge_16_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_edge_16 = aom_highbd_lpf_horizontal_edge_16_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_edge_16 = aom_highbd_lpf_horizontal_edge_16_avx2; aom_highbd_lpf_horizontal_edge_8 = aom_highbd_lpf_horizontal_edge_8_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_edge_8 = aom_highbd_lpf_horizontal_edge_8_sse2; aom_highbd_lpf_vertical_16 = aom_highbd_lpf_vertical_16_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_16 = aom_highbd_lpf_vertical_16_sse2; aom_highbd_lpf_vertical_16_dual = aom_highbd_lpf_vertical_16_dual_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_16_dual = aom_highbd_lpf_vertical_16_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_16_dual = aom_highbd_lpf_vertical_16_dual_avx2; aom_highbd_lpf_vertical_4 = aom_highbd_lpf_vertical_4_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_4 = aom_highbd_lpf_vertical_4_sse2; aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_avx2; aom_highbd_lpf_vertical_8 = aom_highbd_lpf_vertical_8_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_8 = aom_highbd_lpf_vertical_8_sse2; aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_avx2; aom_highbd_masked_sad16x16 = aom_highbd_masked_sad16x16_c; if (flags & HAS_SSSE3) aom_highbd_masked_sad16x16 = aom_highbd_masked_sad16x16_ssse3; aom_highbd_masked_sad16x32 = aom_highbd_masked_sad16x32_c; @@ -4316,10 +4726,22 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_highbd_upsampled_pred = aom_highbd_upsampled_pred_sse2; aom_highbd_v_predictor_16x16 = aom_highbd_v_predictor_16x16_c; if (flags & HAS_SSE2) aom_highbd_v_predictor_16x16 = aom_highbd_v_predictor_16x16_sse2; + aom_highbd_v_predictor_16x32 = aom_highbd_v_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_16x32 = aom_highbd_v_predictor_16x32_sse2; + aom_highbd_v_predictor_16x8 = aom_highbd_v_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_16x8 = aom_highbd_v_predictor_16x8_sse2; + aom_highbd_v_predictor_32x16 = aom_highbd_v_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_32x16 = aom_highbd_v_predictor_32x16_sse2; aom_highbd_v_predictor_32x32 = aom_highbd_v_predictor_32x32_c; if (flags & HAS_SSE2) aom_highbd_v_predictor_32x32 = aom_highbd_v_predictor_32x32_sse2; aom_highbd_v_predictor_4x4 = aom_highbd_v_predictor_4x4_c; if (flags & HAS_SSE2) aom_highbd_v_predictor_4x4 = aom_highbd_v_predictor_4x4_sse2; + aom_highbd_v_predictor_4x8 = aom_highbd_v_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_4x8 = aom_highbd_v_predictor_4x8_sse2; + aom_highbd_v_predictor_8x16 = aom_highbd_v_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_8x16 = aom_highbd_v_predictor_8x16_sse2; + aom_highbd_v_predictor_8x4 = aom_highbd_v_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_8x4 = aom_highbd_v_predictor_8x4_sse2; aom_highbd_v_predictor_8x8 = aom_highbd_v_predictor_8x8_c; if (flags & HAS_SSE2) aom_highbd_v_predictor_8x8 = aom_highbd_v_predictor_8x8_sse2; aom_idct16x16_10_add = aom_idct16x16_10_add_c; @@ -4368,30 +4790,18 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_iwht4x4_16_add = aom_iwht4x4_16_add_sse2; aom_lpf_horizontal_4 = aom_lpf_horizontal_4_c; if (flags & HAS_SSE2) aom_lpf_horizontal_4 = aom_lpf_horizontal_4_sse2; - aom_lpf_horizontal_4_dual = aom_lpf_horizontal_4_dual_c; - if (flags & HAS_SSE2) aom_lpf_horizontal_4_dual = aom_lpf_horizontal_4_dual_sse2; aom_lpf_horizontal_8 = aom_lpf_horizontal_8_c; if (flags & HAS_SSE2) aom_lpf_horizontal_8 = aom_lpf_horizontal_8_sse2; - aom_lpf_horizontal_8_dual = aom_lpf_horizontal_8_dual_c; - if (flags & HAS_SSE2) aom_lpf_horizontal_8_dual = aom_lpf_horizontal_8_dual_sse2; aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_c; if (flags & HAS_SSE2) aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_sse2; - if (flags & HAS_AVX2) aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_avx2; aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_c; if (flags & HAS_SSE2) aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_sse2; - if (flags & HAS_AVX2) aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_avx2; aom_lpf_vertical_16 = aom_lpf_vertical_16_c; if (flags & HAS_SSE2) aom_lpf_vertical_16 = aom_lpf_vertical_16_sse2; - aom_lpf_vertical_16_dual = aom_lpf_vertical_16_dual_c; - if (flags & HAS_SSE2) aom_lpf_vertical_16_dual = aom_lpf_vertical_16_dual_sse2; aom_lpf_vertical_4 = aom_lpf_vertical_4_c; if (flags & HAS_SSE2) aom_lpf_vertical_4 = aom_lpf_vertical_4_sse2; - aom_lpf_vertical_4_dual = aom_lpf_vertical_4_dual_c; - if (flags & HAS_SSE2) aom_lpf_vertical_4_dual = aom_lpf_vertical_4_dual_sse2; aom_lpf_vertical_8 = aom_lpf_vertical_8_c; if (flags & HAS_SSE2) aom_lpf_vertical_8 = aom_lpf_vertical_8_sse2; - aom_lpf_vertical_8_dual = aom_lpf_vertical_8_dual_c; - if (flags & HAS_SSE2) aom_lpf_vertical_8_dual = aom_lpf_vertical_8_dual_sse2; aom_masked_sad16x16 = aom_masked_sad16x16_c; if (flags & HAS_SSSE3) aom_masked_sad16x16 = aom_masked_sad16x16_ssse3; aom_masked_sad16x32 = aom_masked_sad16x32_c; @@ -4507,6 +4917,31 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) aom_obmc_variance8x4 = aom_obmc_variance8x4_sse4_1; aom_obmc_variance8x8 = aom_obmc_variance8x8_c; if (flags & HAS_SSE4_1) aom_obmc_variance8x8 = aom_obmc_variance8x8_sse4_1; + aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_avx2; + aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_avx2; + aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_avx2; + aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_avx2; + aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_avx2; + aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_ssse3; + aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_ssse3; + aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_ssse3; + aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_ssse3; + aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_ssse3; aom_quantize_b = aom_quantize_b_c; if (flags & HAS_SSE2) aom_quantize_b = aom_quantize_b_sse2; aom_sad16x16 = aom_sad16x16_c; @@ -4627,6 +5062,26 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_satd = aom_satd_sse2; aom_scaled_2d = aom_scaled_2d_c; if (flags & HAS_SSSE3) aom_scaled_2d = aom_scaled_2d_ssse3; + aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_ssse3; + aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_ssse3; + aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_ssse3; + aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_ssse3; + aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_ssse3; + aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_ssse3; + aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_ssse3; + aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_ssse3; + aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_ssse3; + aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_ssse3; aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_c; if (flags & HAS_SSE2) aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_ssse3; @@ -4719,10 +5174,24 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_upsampled_pred = aom_upsampled_pred_sse2; aom_v_predictor_16x16 = aom_v_predictor_16x16_c; if (flags & HAS_SSE2) aom_v_predictor_16x16 = aom_v_predictor_16x16_sse2; + aom_v_predictor_16x32 = aom_v_predictor_16x32_c; + if (flags & HAS_SSE2) aom_v_predictor_16x32 = aom_v_predictor_16x32_sse2; + aom_v_predictor_16x8 = aom_v_predictor_16x8_c; + if (flags & HAS_SSE2) aom_v_predictor_16x8 = aom_v_predictor_16x8_sse2; + aom_v_predictor_32x16 = aom_v_predictor_32x16_c; + if (flags & HAS_SSE2) aom_v_predictor_32x16 = aom_v_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_v_predictor_32x16 = aom_v_predictor_32x16_avx2; aom_v_predictor_32x32 = aom_v_predictor_32x32_c; if (flags & HAS_SSE2) aom_v_predictor_32x32 = aom_v_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_v_predictor_32x32 = aom_v_predictor_32x32_avx2; aom_v_predictor_4x4 = aom_v_predictor_4x4_c; if (flags & HAS_SSE2) aom_v_predictor_4x4 = aom_v_predictor_4x4_sse2; + aom_v_predictor_4x8 = aom_v_predictor_4x8_c; + if (flags & HAS_SSE2) aom_v_predictor_4x8 = aom_v_predictor_4x8_sse2; + aom_v_predictor_8x16 = aom_v_predictor_8x16_c; + if (flags & HAS_SSE2) aom_v_predictor_8x16 = aom_v_predictor_8x16_sse2; + aom_v_predictor_8x4 = aom_v_predictor_8x4_c; + if (flags & HAS_SSE2) aom_v_predictor_8x4 = aom_v_predictor_8x4_sse2; aom_v_predictor_8x8 = aom_v_predictor_8x8_c; if (flags & HAS_SSE2) aom_v_predictor_8x8 = aom_v_predictor_8x8_sse2; aom_variance16x16 = aom_variance16x16_c; diff --git a/media/libaom/config/win/ia32/av1_rtcd.h b/media/libaom/config/win/ia32/av1_rtcd.h index 3bd4729ab..b5d286212 100644 --- a/media/libaom/config/win/ia32/av1_rtcd.h +++ b/media/libaom/config/win/ia32/av1_rtcd.h @@ -31,44 +31,39 @@ struct search_site_config; struct mv; union int_mv; struct yv12_buffer_config; -typedef uint16_t od_dering_in; #ifdef __cplusplus extern "C" { #endif -void aom_clpf_block_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_sse2(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_ssse3(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_sse4_1(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_block)(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_block_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_sse2(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_ssse3(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_sse4_1(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_block_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_hblock_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_sse2(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_ssse3(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_sse4_1(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_hblock)(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_hblock_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_sse2(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_ssse3(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_sse4_1(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_hblock_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void apply_selfguided_restoration_c(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); +void apply_selfguided_restoration_sse4_1(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); +RTCD_EXTERN void (*apply_selfguided_restoration)(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); + +void apply_selfguided_restoration_highbd_c(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); +void apply_selfguided_restoration_highbd_sse4_1(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); +RTCD_EXTERN void (*apply_selfguided_restoration_highbd)(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); int64_t av1_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); RTCD_EXTERN int64_t (*av1_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params); +void av1_convolve_2d_sse2(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params); +RTCD_EXTERN void (*av1_convolve_2d)(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params); + +void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); +void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); +RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); + void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); RTCD_EXTERN void (*av1_convolve_horiz)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); +void av1_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); +void av1_convolve_rounding_avx2(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); +RTCD_EXTERN void (*av1_convolve_rounding)(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); + void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); void av1_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); RTCD_EXTERN void (*av1_convolve_vert)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); @@ -76,9 +71,6 @@ RTCD_EXTERN void (*av1_convolve_vert)(const uint8_t *src, int src_stride, uint8_ int av1_diamond_search_sad_c(struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); #define av1_diamond_search_sad av1_diamond_search_sad_c -void av1_fdct8x8_quant_c(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -#define av1_fdct8x8_quant av1_fdct8x8_quant_c - void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); void av1_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); @@ -133,6 +125,14 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, struct t void av1_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); RTCD_EXTERN void (*av1_fht8x8)(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); +void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength); +void av1_filter_intra_edge_sse4_1(uint8_t *p, int sz, int strength); +RTCD_EXTERN void (*av1_filter_intra_edge)(uint8_t *p, int sz, int strength); + +void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength); +void av1_filter_intra_edge_high_sse4_1(uint16_t *p, int sz, int strength); +RTCD_EXTERN void (*av1_filter_intra_edge_high)(uint16_t *p, int sz, int strength); + int av1_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); #define av1_full_range_search av1_full_range_search_c @@ -141,46 +141,42 @@ int av1_full_search_sadx3(const struct macroblock *x, const struct mv *ref_mv, i int av1_full_search_sadx8(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); RTCD_EXTERN int (*av1_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); -void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type); +void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bsx, int bsy, TX_TYPE tx_type); #define av1_fwd_idtx av1_fwd_idtx_c -void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_16x16)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_16x16)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x32 av1_fwd_txfm2d_16x32_c -void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x8 av1_fwd_txfm2d_16x8_c -void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_32x16 av1_fwd_txfm2d_32x16_c -void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_32x32)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_32x32)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_4x4)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_4x4)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_4x8 av1_fwd_txfm2d_4x8_c -void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_64x64)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); - -void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x16 av1_fwd_txfm2d_8x16_c -void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x4 av1_fwd_txfm2d_8x4_c -void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_8x8)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_8x8)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); #define av1_fwht4x4 av1_fwht4x4_c @@ -207,6 +203,14 @@ void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c +void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); +void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_2d)(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); + +void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); +void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); + void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve_avg av1_highbd_convolve_avg_c @@ -221,6 +225,10 @@ void av1_highbd_convolve_init_c(void); void av1_highbd_convolve_init_sse4_1(void); RTCD_EXTERN void (*av1_highbd_convolve_init)(void); +void av1_highbd_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); +void av1_highbd_convolve_rounding_avx2(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_rounding)(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); + void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); void av1_highbd_convolve_vert_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); RTCD_EXTERN void (*av1_highbd_convolve_vert)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); @@ -267,9 +275,6 @@ void av1_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int dest void av1_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht8x8_64_add av1_highbd_iht8x8_64_add_c -void av1_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); -#define av1_highbd_quantize_b av1_highbd_quantize_b_c - void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); void av1_highbd_quantize_fp_sse4_1(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); void av1_highbd_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); @@ -282,6 +287,14 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); RTCD_EXTERN void (*av1_highbd_warp_affine)(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +void av1_highpass_filter_c(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_highpass_filter_sse4_1(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_highpass_filter)(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + +void av1_highpass_filter_highbd_c(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_highpass_filter_highbd_sse4_1(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_highpass_filter_highbd)(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); void av1_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); void av1_iht16x16_256_add_avx2(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); @@ -334,48 +347,45 @@ void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride void av1_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); RTCD_EXTERN void (*av1_iht8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); -void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_16x16)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_16x16)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x32 av1_inv_txfm2d_add_16x32_c -void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x8 av1_inv_txfm2d_add_16x8_c -void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_32x16 av1_inv_txfm2d_add_32x16_c -void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_32x32_avx2(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_32x32)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_32x32_avx2(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_32x32)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_4x8 av1_inv_txfm2d_add_4x8_c -void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -#define av1_inv_txfm2d_add_64x64 av1_inv_txfm2d_add_64x64_c - -void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x16 av1_inv_txfm2d_add_8x16_c -void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x4 av1_inv_txfm2d_add_8x4_c -void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); void av1_lowbd_convolve_init_c(void); void av1_lowbd_convolve_init_ssse3(void); RTCD_EXTERN void (*av1_lowbd_convolve_init)(void); -void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); +void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale); #define av1_quantize_b av1_quantize_b_c void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -387,10 +397,26 @@ void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int void av1_quantize_fp_32x32_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); RTCD_EXTERN void (*av1_quantize_fp_32x32)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void av1_selfguided_restoration_c(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_selfguided_restoration_sse4_1(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_selfguided_restoration)(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + +void av1_selfguided_restoration_highbd_c(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); +void av1_selfguided_restoration_highbd_sse4_1(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); +RTCD_EXTERN void (*av1_selfguided_restoration_highbd)(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); + void av1_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); void av1_temporal_filter_apply_sse2(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); RTCD_EXTERN void (*av1_temporal_filter_apply)(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); +void av1_upsample_intra_edge_c(uint8_t *p, int sz); +void av1_upsample_intra_edge_sse4_1(uint8_t *p, int sz); +RTCD_EXTERN void (*av1_upsample_intra_edge)(uint8_t *p, int sz); + +void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd); +void av1_upsample_intra_edge_high_sse4_1(uint16_t *p, int sz, int bd); +RTCD_EXTERN void (*av1_upsample_intra_edge_high)(uint16_t *p, int sz, int bd); + void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); @@ -408,64 +434,38 @@ uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d, con uint64_t av1_wedge_sse_from_residuals_sse2(const int16_t *r1, const int16_t *d, const uint8_t *m, int N); RTCD_EXTERN uint64_t (*av1_wedge_sse_from_residuals)(const int16_t *r1, const int16_t *d, const uint8_t *m, int N); +void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_sse2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_ssse3(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_sse4_1(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_avx2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +RTCD_EXTERN void (*cdef_filter_block)(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); + +int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_sse2(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_ssse3(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_sse4_1(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_avx2(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +RTCD_EXTERN int (*cdef_find_dir)(const uint16_t *img, int stride, int32_t *var, int coeff_shift); + double compute_cross_correlation_c(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); double compute_cross_correlation_sse4_1(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); RTCD_EXTERN double (*compute_cross_correlation)(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); -void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_4x4_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_sse2(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_ssse3(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_sse4_1(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_4x4_16bit_to_8bit)(uint8_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_8x8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_sse2(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_ssse3(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_sse4_1(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_8x8_16bit_to_8bit)(uint8_t *dst, int dstride, const uint16_t *src, int sstride); - void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); +void copy_rect8_16bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); RTCD_EXTERN void (*copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); +void copy_rect8_8bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); RTCD_EXTERN void (*copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); -int od_dir_find8_c(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_sse2(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_ssse3(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_sse4_1(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -RTCD_EXTERN int (*od_dir_find8)(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); - -void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_sse2(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_ssse3(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_sse4_1(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -RTCD_EXTERN void (*od_filter_dering_direction_4x4)(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); - -void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_sse2(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_ssse3(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_sse4_1(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -RTCD_EXTERN void (*od_filter_dering_direction_8x8)(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); - void aom_rtcd(void); #ifdef RTCD_C @@ -476,26 +476,20 @@ static void setup_rtcd_internal(void) (void)flags; - aom_clpf_block = aom_clpf_block_c; - if (flags & HAS_SSE2) aom_clpf_block = aom_clpf_block_sse2; - if (flags & HAS_SSSE3) aom_clpf_block = aom_clpf_block_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_block = aom_clpf_block_sse4_1; - aom_clpf_block_hbd = aom_clpf_block_hbd_c; - if (flags & HAS_SSE2) aom_clpf_block_hbd = aom_clpf_block_hbd_sse2; - if (flags & HAS_SSSE3) aom_clpf_block_hbd = aom_clpf_block_hbd_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_block_hbd = aom_clpf_block_hbd_sse4_1; - aom_clpf_hblock = aom_clpf_hblock_c; - if (flags & HAS_SSE2) aom_clpf_hblock = aom_clpf_hblock_sse2; - if (flags & HAS_SSSE3) aom_clpf_hblock = aom_clpf_hblock_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_hblock = aom_clpf_hblock_sse4_1; - aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_c; - if (flags & HAS_SSE2) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse2; - if (flags & HAS_SSSE3) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse4_1; + apply_selfguided_restoration = apply_selfguided_restoration_c; + if (flags & HAS_SSE4_1) apply_selfguided_restoration = apply_selfguided_restoration_sse4_1; + apply_selfguided_restoration_highbd = apply_selfguided_restoration_highbd_c; + if (flags & HAS_SSE4_1) apply_selfguided_restoration_highbd = apply_selfguided_restoration_highbd_sse4_1; av1_block_error = av1_block_error_c; if (flags & HAS_AVX2) av1_block_error = av1_block_error_avx2; + av1_convolve_2d = av1_convolve_2d_c; + if (flags & HAS_SSE2) av1_convolve_2d = av1_convolve_2d_sse2; + av1_convolve_2d_scale = av1_convolve_2d_scale_c; + if (flags & HAS_SSE4_1) av1_convolve_2d_scale = av1_convolve_2d_scale_sse4_1; av1_convolve_horiz = av1_convolve_horiz_c; if (flags & HAS_SSSE3) av1_convolve_horiz = av1_convolve_horiz_ssse3; + av1_convolve_rounding = av1_convolve_rounding_c; + if (flags & HAS_AVX2) av1_convolve_rounding = av1_convolve_rounding_avx2; av1_convolve_vert = av1_convolve_vert_c; if (flags & HAS_SSSE3) av1_convolve_vert = av1_convolve_vert_ssse3; av1_fht16x16 = av1_fht16x16_c; @@ -520,6 +514,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) av1_fht8x4 = av1_fht8x4_sse2; av1_fht8x8 = av1_fht8x8_c; if (flags & HAS_SSE2) av1_fht8x8 = av1_fht8x8_sse2; + av1_filter_intra_edge = av1_filter_intra_edge_c; + if (flags & HAS_SSE4_1) av1_filter_intra_edge = av1_filter_intra_edge_sse4_1; + av1_filter_intra_edge_high = av1_filter_intra_edge_high_c; + if (flags & HAS_SSE4_1) av1_filter_intra_edge_high = av1_filter_intra_edge_high_sse4_1; av1_full_search_sad = av1_full_search_sad_c; if (flags & HAS_SSE3) av1_full_search_sad = av1_full_search_sadx3; if (flags & HAS_SSE4_1) av1_full_search_sad = av1_full_search_sadx8; @@ -529,16 +527,20 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) av1_fwd_txfm2d_32x32 = av1_fwd_txfm2d_32x32_sse4_1; av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_c; if (flags & HAS_SSE4_1) av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_sse4_1; - av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_c; - if (flags & HAS_SSE4_1) av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_sse4_1; av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_c; if (flags & HAS_SSE4_1) av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_sse4_1; av1_highbd_block_error = av1_highbd_block_error_c; if (flags & HAS_SSE2) av1_highbd_block_error = av1_highbd_block_error_sse2; + av1_highbd_convolve_2d = av1_highbd_convolve_2d_c; + if (flags & HAS_SSSE3) av1_highbd_convolve_2d = av1_highbd_convolve_2d_ssse3; + av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_c; + if (flags & HAS_SSE4_1) av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_sse4_1; av1_highbd_convolve_horiz = av1_highbd_convolve_horiz_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_horiz = av1_highbd_convolve_horiz_sse4_1; av1_highbd_convolve_init = av1_highbd_convolve_init_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_init = av1_highbd_convolve_init_sse4_1; + av1_highbd_convolve_rounding = av1_highbd_convolve_rounding_c; + if (flags & HAS_AVX2) av1_highbd_convolve_rounding = av1_highbd_convolve_rounding_avx2; av1_highbd_convolve_vert = av1_highbd_convolve_vert_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_vert = av1_highbd_convolve_vert_sse4_1; av1_highbd_quantize_fp = av1_highbd_quantize_fp_c; @@ -546,6 +548,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) av1_highbd_quantize_fp = av1_highbd_quantize_fp_avx2; av1_highbd_warp_affine = av1_highbd_warp_affine_c; if (flags & HAS_SSSE3) av1_highbd_warp_affine = av1_highbd_warp_affine_ssse3; + av1_highpass_filter = av1_highpass_filter_c; + if (flags & HAS_SSE4_1) av1_highpass_filter = av1_highpass_filter_sse4_1; + av1_highpass_filter_highbd = av1_highpass_filter_highbd_c; + if (flags & HAS_SSE4_1) av1_highpass_filter_highbd = av1_highpass_filter_highbd_sse4_1; av1_iht16x16_256_add = av1_iht16x16_256_add_c; if (flags & HAS_SSE2) av1_iht16x16_256_add = av1_iht16x16_256_add_sse2; if (flags & HAS_AVX2) av1_iht16x16_256_add = av1_iht16x16_256_add_avx2; @@ -580,8 +586,16 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) av1_quantize_fp = av1_quantize_fp_avx2; av1_quantize_fp_32x32 = av1_quantize_fp_32x32_c; if (flags & HAS_AVX2) av1_quantize_fp_32x32 = av1_quantize_fp_32x32_avx2; + av1_selfguided_restoration = av1_selfguided_restoration_c; + if (flags & HAS_SSE4_1) av1_selfguided_restoration = av1_selfguided_restoration_sse4_1; + av1_selfguided_restoration_highbd = av1_selfguided_restoration_highbd_c; + if (flags & HAS_SSE4_1) av1_selfguided_restoration_highbd = av1_selfguided_restoration_highbd_sse4_1; av1_temporal_filter_apply = av1_temporal_filter_apply_c; if (flags & HAS_SSE2) av1_temporal_filter_apply = av1_temporal_filter_apply_sse2; + av1_upsample_intra_edge = av1_upsample_intra_edge_c; + if (flags & HAS_SSE4_1) av1_upsample_intra_edge = av1_upsample_intra_edge_sse4_1; + av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c; + if (flags & HAS_SSE4_1) av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_sse4_1; av1_warp_affine = av1_warp_affine_c; if (flags & HAS_SSE2) av1_warp_affine = av1_warp_affine_sse2; if (flags & HAS_SSSE3) av1_warp_affine = av1_warp_affine_ssse3; @@ -591,44 +605,28 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) av1_wedge_sign_from_residuals = av1_wedge_sign_from_residuals_sse2; av1_wedge_sse_from_residuals = av1_wedge_sse_from_residuals_c; if (flags & HAS_SSE2) av1_wedge_sse_from_residuals = av1_wedge_sse_from_residuals_sse2; + cdef_filter_block = cdef_filter_block_c; + if (flags & HAS_SSE2) cdef_filter_block = cdef_filter_block_sse2; + if (flags & HAS_SSSE3) cdef_filter_block = cdef_filter_block_ssse3; + if (flags & HAS_SSE4_1) cdef_filter_block = cdef_filter_block_sse4_1; + if (flags & HAS_AVX2) cdef_filter_block = cdef_filter_block_avx2; + cdef_find_dir = cdef_find_dir_c; + if (flags & HAS_SSE2) cdef_find_dir = cdef_find_dir_sse2; + if (flags & HAS_SSSE3) cdef_find_dir = cdef_find_dir_ssse3; + if (flags & HAS_SSE4_1) cdef_find_dir = cdef_find_dir_sse4_1; + if (flags & HAS_AVX2) cdef_find_dir = cdef_find_dir_avx2; compute_cross_correlation = compute_cross_correlation_c; if (flags & HAS_SSE4_1) compute_cross_correlation = compute_cross_correlation_sse4_1; - copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_c; - if (flags & HAS_SSE2) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_sse2; - if (flags & HAS_SSSE3) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_ssse3; - if (flags & HAS_SSE4_1) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_sse4_1; - copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_c; - if (flags & HAS_SSE2) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_sse2; - if (flags & HAS_SSSE3) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_ssse3; - if (flags & HAS_SSE4_1) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_sse4_1; - copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_c; - if (flags & HAS_SSE2) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_sse2; - if (flags & HAS_SSSE3) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_ssse3; - if (flags & HAS_SSE4_1) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_sse4_1; - copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_c; - if (flags & HAS_SSE2) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_sse2; - if (flags & HAS_SSSE3) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_ssse3; - if (flags & HAS_SSE4_1) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_sse4_1; copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_c; if (flags & HAS_SSE2) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse2; if (flags & HAS_SSSE3) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_ssse3; if (flags & HAS_SSE4_1) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse4_1; + if (flags & HAS_AVX2) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_avx2; copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_c; if (flags & HAS_SSE2) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse2; if (flags & HAS_SSSE3) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_ssse3; if (flags & HAS_SSE4_1) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse4_1; - od_dir_find8 = od_dir_find8_c; - if (flags & HAS_SSE2) od_dir_find8 = od_dir_find8_sse2; - if (flags & HAS_SSSE3) od_dir_find8 = od_dir_find8_ssse3; - if (flags & HAS_SSE4_1) od_dir_find8 = od_dir_find8_sse4_1; - od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_c; - if (flags & HAS_SSE2) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_sse2; - if (flags & HAS_SSSE3) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_ssse3; - if (flags & HAS_SSE4_1) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_sse4_1; - od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_c; - if (flags & HAS_SSE2) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_sse2; - if (flags & HAS_SSSE3) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_ssse3; - if (flags & HAS_SSE4_1) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_sse4_1; + if (flags & HAS_AVX2) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_avx2; } #endif diff --git a/media/libaom/config/win/mingw32/aom_config.asm b/media/libaom/config/win/mingw32/aom_config.asm index 2f2cddaf8..161c1fee3 100644..100755 --- a/media/libaom/config/win/mingw32/aom_config.asm +++ b/media/libaom/config/win/mingw32/aom_config.asm @@ -18,7 +18,7 @@ HAVE_AVX equ 1 HAVE_AVX2 equ 1 HAVE_AOM_PORTS equ 1 HAVE_FEXCEPT equ 1 -HAVE_PTHREAD_H equ 0 +HAVE_PTHREAD_H equ 1 HAVE_WXWIDGETS equ 0 CONFIG_DEPENDENCY_TRACKING equ 1 CONFIG_EXTERNAL_BUILD equ 1 @@ -46,8 +46,6 @@ CONFIG_AV1 equ 1 CONFIG_STATIC_MSVCRT equ 0 CONFIG_SPATIAL_RESAMPLING equ 1 CONFIG_REALTIME_ONLY equ 0 -CONFIG_ONTHEFLY_BITPACKING equ 0 -CONFIG_ERROR_CONCEALMENT equ 0 CONFIG_SHARED equ 0 CONFIG_STATIC equ 1 CONFIG_SMALL equ 0 @@ -60,73 +58,71 @@ CONFIG_ACCOUNTING equ 0 CONFIG_INSPECTION equ 0 CONFIG_DECODE_PERF_TESTS equ 0 CONFIG_ENCODE_PERF_TESTS equ 0 +CONFIG_BITSTREAM_DEBUG equ 0 +CONFIG_SYMBOLRATE equ 0 CONFIG_COEFFICIENT_RANGE_CHECKING equ 0 CONFIG_LOWBITDEPTH equ 1 CONFIG_HIGHBITDEPTH equ 1 CONFIG_EXPERIMENTAL equ 0 CONFIG_SIZE_LIMIT equ 1 -CONFIG_COLORSPACE_HEADERS equ 0 CONFIG_FP_MB_STATS equ 0 CONFIG_CDEF equ 1 +CONFIG_CDEF_SINGLEPASS equ 1 CONFIG_VAR_TX equ 1 CONFIG_RECT_TX equ 1 CONFIG_RECT_TX_EXT equ 0 CONFIG_TPL_MV equ 0 CONFIG_DUAL_FILTER equ 1 -CONFIG_CONVOLVE_ROUND equ 0 +CONFIG_CONVOLVE_ROUND equ 1 CONFIG_COMPOUND_ROUND equ 0 CONFIG_EXT_TX equ 1 -CONFIG_DPCM_INTRA equ 0 CONFIG_TX64X64 equ 0 CONFIG_EXT_INTRA equ 1 CONFIG_INTRA_INTERP equ 0 CONFIG_FILTER_INTRA equ 0 -CONFIG_INTRA_EDGE equ 0 +CONFIG_INTRA_EDGE equ 1 CONFIG_INTRABC equ 0 -CONFIG_EXT_INTER equ 1 CONFIG_INTERINTRA equ 1 CONFIG_WEDGE equ 1 CONFIG_COMPOUND_SEGMENT equ 1 CONFIG_EXT_REFS equ 1 -CONFIG_ALTREF2 equ 0 -CONFIG_SPEED_REFS equ 0 -CONFIG_GF_GROUPS equ 0 -CONFIG_FLEX_REFS equ 0 CONFIG_GLOBAL_MOTION equ 1 CONFIG_NEW_QUANT equ 0 CONFIG_SUPERTX equ 0 CONFIG_ANS equ 0 -CONFIG_LOOP_RESTORATION equ 0 +CONFIG_LOOP_RESTORATION equ 1 +CONFIG_STRIPED_LOOP_RESTORATION equ 0 CONFIG_EXT_PARTITION equ 0 CONFIG_EXT_PARTITION_TYPES equ 0 +CONFIG_EXT_PARTITION_TYPES_AB equ 0 CONFIG_UNPOISON_PARTITION_CTX equ 0 CONFIG_EXT_TILE equ 0 CONFIG_MOTION_VAR equ 1 CONFIG_NCOBMC equ 0 CONFIG_WARPED_MOTION equ 1 CONFIG_Q_ADAPT_PROBS equ 0 -CONFIG_BITSTREAM_DEBUG equ 0 CONFIG_INTER_STATS_ONLY equ 0 -CONFIG_ALT_INTRA equ 1 -CONFIG_PALETTE equ 1 CONFIG_PALETTE_DELTA_ENCODING equ 0 CONFIG_RAWBITS equ 0 -CONFIG_EC_SMALLMUL equ 1 +CONFIG_KF_CTX equ 0 CONFIG_PVQ equ 0 CONFIG_CFL equ 0 CONFIG_XIPHRC equ 0 CONFIG_DCT_ONLY equ 0 +CONFIG_DAALA_TX equ 0 CONFIG_DAALA_DCT4 equ 0 CONFIG_DAALA_DCT8 equ 0 +CONFIG_DAALA_DCT16 equ 0 +CONFIG_DAALA_DCT32 equ 0 +CONFIG_DAALA_DCT64 equ 0 CONFIG_CB4X4 equ 1 CONFIG_CHROMA_2X2 equ 0 CONFIG_CHROMA_SUB8X8 equ 1 CONFIG_FRAME_SIZE equ 0 -CONFIG_DELTA_Q equ 1 CONFIG_EXT_DELTA_Q equ 1 CONFIG_ADAPT_SCAN equ 0 -CONFIG_FILTER_7BIT equ 1 CONFIG_PARALLEL_DEBLOCKING equ 1 +CONFIG_DEBLOCK_13TAP equ 0 CONFIG_LOOPFILTERING_ACROSS_TILES equ 1 CONFIG_TEMPMV_SIGNALING equ 1 CONFIG_RD_DEBUG equ 0 @@ -135,29 +131,46 @@ CONFIG_COEF_INTERLEAVE equ 0 CONFIG_ENTROPY_STATS equ 0 CONFIG_MASKED_TX equ 0 CONFIG_DEPENDENT_HORZTILES equ 0 -CONFIG_DIST_8X8 equ 0 -CONFIG_DAALA_DIST equ 0 -CONFIG_TRIPRED equ 0 +CONFIG_DIST_8X8 equ 1 CONFIG_PALETTE_THROUGHPUT equ 1 CONFIG_REF_ADAPT equ 0 CONFIG_LV_MAP equ 0 +CONFIG_CTX1D equ 0 CONFIG_TXK_SEL equ 0 CONFIG_MV_COMPRESS equ 1 +CONFIG_SEGMENT_ZEROMV equ 0 CONFIG_FRAME_SUPERRES equ 0 CONFIG_NEW_MULTISYMBOL equ 0 CONFIG_COMPOUND_SINGLEREF equ 0 -CONFIG_AOM_QM equ 0 +CONFIG_AOM_QM equ 1 CONFIG_ONE_SIDED_COMPOUND equ 1 -CONFIG_EXT_COMP_REFS equ 0 +CONFIG_EXT_COMP_REFS equ 1 CONFIG_SMOOTH_HV equ 1 CONFIG_VAR_REFS equ 0 -CONFIG_RECT_INTRA_PRED equ 1 CONFIG_LGT equ 0 +CONFIG_LGT_FROM_PRED equ 0 CONFIG_SBL_SYMBOL equ 0 CONFIG_NCOBMC_ADAPT_WEIGHT equ 0 CONFIG_BGSPRITE equ 0 CONFIG_VAR_TX_NO_TX_MODE equ 0 CONFIG_MRC_TX equ 0 CONFIG_LPF_DIRECT equ 0 -CONFIG_UV_LVL equ 0 +CONFIG_LOOPFILTER_LEVEL equ 0 +CONFIG_NO_FRAME_CONTEXT_SIGNALING equ 0 +CONFIG_TXMG equ 1 +CONFIG_MAX_TILE equ 0 +CONFIG_HASH_ME equ 0 +CONFIG_COLORSPACE_HEADERS equ 0 +CONFIG_MFMV equ 0 +CONFIG_FRAME_MARKER equ 0 +CONFIG_JNT_COMP equ 0 +CONFIG_FRAME_SIGN_BIAS equ 0 +CONFIG_EXT_SKIP equ 0 +CONFIG_OBU equ 0 +CONFIG_AMVR equ 0 +CONFIG_LPF_SB equ 0 +CONFIG_OPT_REF_MV equ 0 +CONFIG_TMV equ 0 +CONFIG_RESTRICT_COMPRESSED_HDR equ 0 +CONFIG_HORZONLY_FRAME_SUPERRES equ 0 CONFIG_ANALYZER equ 0 diff --git a/media/libaom/config/win/mingw32/aom_config.h b/media/libaom/config/win/mingw32/aom_config.h index f03f3e822..78a0ba3bd 100644..100755 --- a/media/libaom/config/win/mingw32/aom_config.h +++ b/media/libaom/config/win/mingw32/aom_config.h @@ -9,7 +9,7 @@ /* This file automatically generated by configure. Do not edit! */ #ifndef AOM_CONFIG_H #define AOM_CONFIG_H -#define RESTRICT +#define RESTRICT #define INLINE inline #define ARCH_ARM 0 #define ARCH_MIPS 0 @@ -31,7 +31,7 @@ #define HAVE_AVX2 1 #define HAVE_AOM_PORTS 1 #define HAVE_FEXCEPT 1 -#define HAVE_PTHREAD_H 0 +#define HAVE_PTHREAD_H 1 #define HAVE_WXWIDGETS 0 #define CONFIG_DEPENDENCY_TRACKING 1 #define CONFIG_EXTERNAL_BUILD 1 @@ -59,8 +59,6 @@ #define CONFIG_STATIC_MSVCRT 0 #define CONFIG_SPATIAL_RESAMPLING 1 #define CONFIG_REALTIME_ONLY 0 -#define CONFIG_ONTHEFLY_BITPACKING 0 -#define CONFIG_ERROR_CONCEALMENT 0 #define CONFIG_SHARED 0 #define CONFIG_STATIC 1 #define CONFIG_SMALL 0 @@ -73,73 +71,71 @@ #define CONFIG_INSPECTION 0 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_ENCODE_PERF_TESTS 0 +#define CONFIG_BITSTREAM_DEBUG 0 +#define CONFIG_SYMBOLRATE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_LOWBITDEPTH 1 #define CONFIG_HIGHBITDEPTH 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_SIZE_LIMIT 1 -#define CONFIG_COLORSPACE_HEADERS 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_CDEF 1 +#define CONFIG_CDEF_SINGLEPASS 1 #define CONFIG_VAR_TX 1 #define CONFIG_RECT_TX 1 #define CONFIG_RECT_TX_EXT 0 #define CONFIG_TPL_MV 0 #define CONFIG_DUAL_FILTER 1 -#define CONFIG_CONVOLVE_ROUND 0 +#define CONFIG_CONVOLVE_ROUND 1 #define CONFIG_COMPOUND_ROUND 0 #define CONFIG_EXT_TX 1 -#define CONFIG_DPCM_INTRA 0 #define CONFIG_TX64X64 0 #define CONFIG_EXT_INTRA 1 #define CONFIG_INTRA_INTERP 0 #define CONFIG_FILTER_INTRA 0 -#define CONFIG_INTRA_EDGE 0 +#define CONFIG_INTRA_EDGE 1 #define CONFIG_INTRABC 0 -#define CONFIG_EXT_INTER 1 #define CONFIG_INTERINTRA 1 #define CONFIG_WEDGE 1 #define CONFIG_COMPOUND_SEGMENT 1 #define CONFIG_EXT_REFS 1 -#define CONFIG_ALTREF2 0 -#define CONFIG_SPEED_REFS 0 -#define CONFIG_GF_GROUPS 0 -#define CONFIG_FLEX_REFS 0 #define CONFIG_GLOBAL_MOTION 1 #define CONFIG_NEW_QUANT 0 #define CONFIG_SUPERTX 0 #define CONFIG_ANS 0 -#define CONFIG_LOOP_RESTORATION 0 +#define CONFIG_LOOP_RESTORATION 1 +#define CONFIG_STRIPED_LOOP_RESTORATION 0 #define CONFIG_EXT_PARTITION 0 #define CONFIG_EXT_PARTITION_TYPES 0 +#define CONFIG_EXT_PARTITION_TYPES_AB 0 #define CONFIG_UNPOISON_PARTITION_CTX 0 #define CONFIG_EXT_TILE 0 #define CONFIG_MOTION_VAR 1 #define CONFIG_NCOBMC 0 #define CONFIG_WARPED_MOTION 1 #define CONFIG_Q_ADAPT_PROBS 0 -#define CONFIG_BITSTREAM_DEBUG 0 #define CONFIG_INTER_STATS_ONLY 0 -#define CONFIG_ALT_INTRA 1 -#define CONFIG_PALETTE 1 #define CONFIG_PALETTE_DELTA_ENCODING 0 #define CONFIG_RAWBITS 0 -#define CONFIG_EC_SMALLMUL 1 +#define CONFIG_KF_CTX 0 #define CONFIG_PVQ 0 #define CONFIG_CFL 0 #define CONFIG_XIPHRC 0 #define CONFIG_DCT_ONLY 0 +#define CONFIG_DAALA_TX 0 #define CONFIG_DAALA_DCT4 0 #define CONFIG_DAALA_DCT8 0 +#define CONFIG_DAALA_DCT16 0 +#define CONFIG_DAALA_DCT32 0 +#define CONFIG_DAALA_DCT64 0 #define CONFIG_CB4X4 1 #define CONFIG_CHROMA_2X2 0 #define CONFIG_CHROMA_SUB8X8 1 #define CONFIG_FRAME_SIZE 0 -#define CONFIG_DELTA_Q 1 #define CONFIG_EXT_DELTA_Q 1 #define CONFIG_ADAPT_SCAN 0 -#define CONFIG_FILTER_7BIT 1 #define CONFIG_PARALLEL_DEBLOCKING 1 +#define CONFIG_DEBLOCK_13TAP 0 #define CONFIG_LOOPFILTERING_ACROSS_TILES 1 #define CONFIG_TEMPMV_SIGNALING 1 #define CONFIG_RD_DEBUG 0 @@ -148,31 +144,48 @@ #define CONFIG_ENTROPY_STATS 0 #define CONFIG_MASKED_TX 0 #define CONFIG_DEPENDENT_HORZTILES 0 -#define CONFIG_DIST_8X8 0 -#define CONFIG_DAALA_DIST 0 -#define CONFIG_TRIPRED 0 +#define CONFIG_DIST_8X8 1 #define CONFIG_PALETTE_THROUGHPUT 1 #define CONFIG_REF_ADAPT 0 #define CONFIG_LV_MAP 0 +#define CONFIG_CTX1D 0 #define CONFIG_TXK_SEL 0 #define CONFIG_MV_COMPRESS 1 +#define CONFIG_SEGMENT_ZEROMV 0 #define CONFIG_FRAME_SUPERRES 0 #define CONFIG_NEW_MULTISYMBOL 0 #define CONFIG_COMPOUND_SINGLEREF 0 -#define CONFIG_AOM_QM 0 +#define CONFIG_AOM_QM 1 #define CONFIG_ONE_SIDED_COMPOUND 1 -#define CONFIG_EXT_COMP_REFS 0 +#define CONFIG_EXT_COMP_REFS 1 #define CONFIG_SMOOTH_HV 1 #define CONFIG_VAR_REFS 0 -#define CONFIG_RECT_INTRA_PRED 1 #define CONFIG_LGT 0 +#define CONFIG_LGT_FROM_PRED 0 #define CONFIG_SBL_SYMBOL 0 #define CONFIG_NCOBMC_ADAPT_WEIGHT 0 #define CONFIG_BGSPRITE 0 #define CONFIG_VAR_TX_NO_TX_MODE 0 #define CONFIG_MRC_TX 0 #define CONFIG_LPF_DIRECT 0 -#define CONFIG_UV_LVL 0 +#define CONFIG_LOOPFILTER_LEVEL 0 +#define CONFIG_NO_FRAME_CONTEXT_SIGNALING 0 +#define CONFIG_TXMG 1 +#define CONFIG_MAX_TILE 0 +#define CONFIG_HASH_ME 0 +#define CONFIG_COLORSPACE_HEADERS 0 +#define CONFIG_MFMV 0 +#define CONFIG_FRAME_MARKER 0 +#define CONFIG_JNT_COMP 0 +#define CONFIG_FRAME_SIGN_BIAS 0 +#define CONFIG_EXT_SKIP 0 +#define CONFIG_OBU 0 +#define CONFIG_AMVR 0 +#define CONFIG_LPF_SB 0 +#define CONFIG_OPT_REF_MV 0 +#define CONFIG_TMV 0 +#define CONFIG_RESTRICT_COMPRESSED_HDR 0 +#define CONFIG_HORZONLY_FRAME_SUPERRES 0 #define CONFIG_ANALYZER 0 #define DECODE_WIDTH_LIMIT 8192 #define DECODE_HEIGHT_LIMIT 4608 diff --git a/media/libaom/config/win/mingw32/aom_dsp_rtcd.h b/media/libaom/config/win/mingw32/aom_dsp_rtcd.h index 964ccb4d2..f6529fafb 100644 --- a/media/libaom/config/win/mingw32/aom_dsp_rtcd.h +++ b/media/libaom/config/win/mingw32/aom_dsp_rtcd.h @@ -20,6 +20,9 @@ extern "C" { #endif +void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx); +#define aom_blend_a64_d32_mask aom_blend_a64_d32_mask_c + void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); void aom_blend_a64_hmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); RTCD_EXTERN void (*aom_blend_a64_hmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); @@ -51,6 +54,28 @@ void aom_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, void aom_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*aom_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_hip_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src_hip)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_horiz_hip aom_convolve8_add_src_horiz_hip_c + +void aom_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_vert_hip aom_convolve8_add_src_vert_hip_c + void aom_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void aom_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void aom_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -312,19 +337,24 @@ void aom_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uin RTCD_EXTERN void (*aom_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_c +void aom_dc_128_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_c +void aom_dc_128_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c void aom_dc_128_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_32x16 aom_dc_128_predictor_32x16_c +void aom_dc_128_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_128_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_128_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -332,13 +362,16 @@ void aom_dc_128_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8 RTCD_EXTERN void (*aom_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_c +void aom_dc_128_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_c +void aom_dc_128_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_8x4 aom_dc_128_predictor_8x4_c +void aom_dc_128_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -349,19 +382,24 @@ void aom_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const ui RTCD_EXTERN void (*aom_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_c +void aom_dc_left_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_c +void aom_dc_left_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c void aom_dc_left_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_32x16 aom_dc_left_predictor_32x16_c +void aom_dc_left_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_left_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_left_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -369,13 +407,16 @@ void aom_dc_left_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint RTCD_EXTERN void (*aom_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_c +void aom_dc_left_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_c +void aom_dc_left_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_8x4 aom_dc_left_predictor_8x4_c +void aom_dc_left_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -386,19 +427,24 @@ void aom_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t RTCD_EXTERN void (*aom_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_16x32 aom_dc_predictor_16x32_c +void aom_dc_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_16x8 aom_dc_predictor_16x8_c +void aom_dc_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_32x16 aom_dc_predictor_32x16_c +void aom_dc_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -406,13 +452,16 @@ void aom_dc_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t * RTCD_EXTERN void (*aom_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_4x8 aom_dc_predictor_4x8_c +void aom_dc_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_8x16 aom_dc_predictor_8x16_c +void aom_dc_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_8x4 aom_dc_predictor_8x4_c +void aom_dc_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -423,19 +472,24 @@ void aom_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uin RTCD_EXTERN void (*aom_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_c +void aom_dc_top_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_c +void aom_dc_top_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c void aom_dc_top_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_32x16 aom_dc_top_predictor_32x16_c +void aom_dc_top_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_top_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_top_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -443,13 +497,16 @@ void aom_dc_top_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8 RTCD_EXTERN void (*aom_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_c +void aom_dc_top_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_c +void aom_dc_top_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_8x4 aom_dc_top_predictor_8x4_c +void aom_dc_top_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -502,19 +559,23 @@ void aom_h_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t RTCD_EXTERN void (*aom_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_16x32 aom_h_predictor_16x32_c +void aom_h_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_16x8 aom_h_predictor_16x8_c +void aom_h_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_h_predictor_2x2 aom_h_predictor_2x2_c void aom_h_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_32x16 aom_h_predictor_32x16_c +void aom_h_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_h_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -522,13 +583,16 @@ void aom_h_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a RTCD_EXTERN void (*aom_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_4x8 aom_h_predictor_4x8_c +void aom_h_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_8x16 aom_h_predictor_8x16_c +void aom_h_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_8x4 aom_h_predictor_8x4_c +void aom_h_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -1452,6 +1516,25 @@ void aom_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *d void aom_highbd_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); RTCD_EXTERN void (*aom_highbd_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src aom_highbd_convolve8_add_src_c + +void aom_highbd_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_hip_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +RTCD_EXTERN void (*aom_highbd_convolve8_add_src_hip)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); + +void aom_highbd_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz aom_highbd_convolve8_add_src_horiz_c + +void aom_highbd_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz_hip aom_highbd_convolve8_add_src_horiz_hip_c + +void aom_highbd_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert aom_highbd_convolve8_add_src_vert_c + +void aom_highbd_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert_hip aom_highbd_convolve8_add_src_vert_hip_c + void aom_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_convolve8_avg_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); RTCD_EXTERN void (*aom_highbd_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); @@ -1483,7 +1566,8 @@ void aom_highbd_convolve_copy_avx2(const uint8_t *src, ptrdiff_t src_stride, uin RTCD_EXTERN void (*aom_highbd_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_d117_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_16x16 aom_highbd_d117_predictor_16x16_c +void aom_highbd_d117_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d117_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d117_predictor_16x32 aom_highbd_d117_predictor_16x32_c @@ -1498,10 +1582,12 @@ void aom_highbd_d117_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d117_predictor_32x16 aom_highbd_d117_predictor_32x16_c void aom_highbd_d117_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_32x32 aom_highbd_d117_predictor_32x32_c +void aom_highbd_d117_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d117_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_4x4 aom_highbd_d117_predictor_4x4_c +void aom_highbd_d117_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d117_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d117_predictor_4x8 aom_highbd_d117_predictor_4x8_c @@ -1513,10 +1599,12 @@ void aom_highbd_d117_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d117_predictor_8x4 aom_highbd_d117_predictor_8x4_c void aom_highbd_d117_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_8x8 aom_highbd_d117_predictor_8x8_c +void aom_highbd_d117_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_16x16 aom_highbd_d135_predictor_16x16_c +void aom_highbd_d135_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d135_predictor_16x32 aom_highbd_d135_predictor_16x32_c @@ -1531,10 +1619,12 @@ void aom_highbd_d135_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d135_predictor_32x16 aom_highbd_d135_predictor_32x16_c void aom_highbd_d135_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_32x32 aom_highbd_d135_predictor_32x32_c +void aom_highbd_d135_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_4x4 aom_highbd_d135_predictor_4x4_c +void aom_highbd_d135_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d135_predictor_4x8 aom_highbd_d135_predictor_4x8_c @@ -1546,10 +1636,12 @@ void aom_highbd_d135_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d135_predictor_8x4 aom_highbd_d135_predictor_8x4_c void aom_highbd_d135_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_8x8 aom_highbd_d135_predictor_8x8_c +void aom_highbd_d135_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_16x16 aom_highbd_d153_predictor_16x16_c +void aom_highbd_d153_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d153_predictor_16x32 aom_highbd_d153_predictor_16x32_c @@ -1564,10 +1656,12 @@ void aom_highbd_d153_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d153_predictor_32x16 aom_highbd_d153_predictor_32x16_c void aom_highbd_d153_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_32x32 aom_highbd_d153_predictor_32x32_c +void aom_highbd_d153_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_4x4 aom_highbd_d153_predictor_4x4_c +void aom_highbd_d153_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d153_predictor_4x8 aom_highbd_d153_predictor_4x8_c @@ -1579,7 +1673,8 @@ void aom_highbd_d153_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d153_predictor_8x4 aom_highbd_d153_predictor_8x4_c void aom_highbd_d153_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_8x8 aom_highbd_d153_predictor_8x8_c +void aom_highbd_d153_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d207e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d207e_predictor_16x16 aom_highbd_d207e_predictor_16x16_c @@ -1615,37 +1710,47 @@ void aom_highbd_d207e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const u #define aom_highbd_d207e_predictor_8x8 aom_highbd_d207e_predictor_8x8_c void aom_highbd_d45e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x16 aom_highbd_d45e_predictor_16x16_c +void aom_highbd_d45e_predictor_16x16_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x32 aom_highbd_d45e_predictor_16x32_c +void aom_highbd_d45e_predictor_16x32_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x8 aom_highbd_d45e_predictor_16x8_c +void aom_highbd_d45e_predictor_16x8_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d45e_predictor_2x2 aom_highbd_d45e_predictor_2x2_c void aom_highbd_d45e_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_32x16 aom_highbd_d45e_predictor_32x16_c +void aom_highbd_d45e_predictor_32x16_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_32x32 aom_highbd_d45e_predictor_32x32_c +void aom_highbd_d45e_predictor_32x32_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_4x4 aom_highbd_d45e_predictor_4x4_c +void aom_highbd_d45e_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_4x8 aom_highbd_d45e_predictor_4x8_c +void aom_highbd_d45e_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x16 aom_highbd_d45e_predictor_8x16_c +void aom_highbd_d45e_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x4 aom_highbd_d45e_predictor_8x4_c +void aom_highbd_d45e_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x8 aom_highbd_d45e_predictor_8x8_c +void aom_highbd_d45e_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d63e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d63e_predictor_16x16 aom_highbd_d63e_predictor_16x16_c @@ -1681,86 +1786,109 @@ void aom_highbd_d63e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d63e_predictor_8x8 aom_highbd_d63e_predictor_8x8_c void aom_highbd_dc_128_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x16 aom_highbd_dc_128_predictor_16x16_c +void aom_highbd_dc_128_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x32 aom_highbd_dc_128_predictor_16x32_c +void aom_highbd_dc_128_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x8 aom_highbd_dc_128_predictor_16x8_c +void aom_highbd_dc_128_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_128_predictor_2x2 aom_highbd_dc_128_predictor_2x2_c void aom_highbd_dc_128_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_32x16 aom_highbd_dc_128_predictor_32x16_c +void aom_highbd_dc_128_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_32x32 aom_highbd_dc_128_predictor_32x32_c +void aom_highbd_dc_128_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_4x4 aom_highbd_dc_128_predictor_4x4_c +void aom_highbd_dc_128_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_4x8 aom_highbd_dc_128_predictor_4x8_c +void aom_highbd_dc_128_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x16 aom_highbd_dc_128_predictor_8x16_c +void aom_highbd_dc_128_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x4 aom_highbd_dc_128_predictor_8x4_c +void aom_highbd_dc_128_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_128_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x8 aom_highbd_dc_128_predictor_8x8_c +void aom_highbd_dc_128_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_128_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x16 aom_highbd_dc_left_predictor_16x16_c +void aom_highbd_dc_left_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x32 aom_highbd_dc_left_predictor_16x32_c +void aom_highbd_dc_left_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x8 aom_highbd_dc_left_predictor_16x8_c +void aom_highbd_dc_left_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_left_predictor_2x2 aom_highbd_dc_left_predictor_2x2_c void aom_highbd_dc_left_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_32x16 aom_highbd_dc_left_predictor_32x16_c +void aom_highbd_dc_left_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_32x32 aom_highbd_dc_left_predictor_32x32_c +void aom_highbd_dc_left_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_4x4 aom_highbd_dc_left_predictor_4x4_c +void aom_highbd_dc_left_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_4x8 aom_highbd_dc_left_predictor_4x8_c +void aom_highbd_dc_left_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x16 aom_highbd_dc_left_predictor_8x16_c +void aom_highbd_dc_left_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x4 aom_highbd_dc_left_predictor_8x4_c +void aom_highbd_dc_left_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_left_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x8 aom_highbd_dc_left_predictor_8x8_c +void aom_highbd_dc_left_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_left_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); RTCD_EXTERN void (*aom_highbd_dc_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_16x32 aom_highbd_dc_predictor_16x32_c +void aom_highbd_dc_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_16x8 aom_highbd_dc_predictor_16x8_c +void aom_highbd_dc_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_predictor_2x2 aom_highbd_dc_predictor_2x2_c void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_32x16 aom_highbd_dc_predictor_32x16_c +void aom_highbd_dc_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -1771,50 +1899,63 @@ void aom_highbd_dc_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const u RTCD_EXTERN void (*aom_highbd_dc_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_4x8 aom_highbd_dc_predictor_4x8_c +void aom_highbd_dc_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_8x16 aom_highbd_dc_predictor_8x16_c +void aom_highbd_dc_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_8x4 aom_highbd_dc_predictor_8x4_c +void aom_highbd_dc_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); RTCD_EXTERN void (*aom_highbd_dc_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x16 aom_highbd_dc_top_predictor_16x16_c +void aom_highbd_dc_top_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x32 aom_highbd_dc_top_predictor_16x32_c +void aom_highbd_dc_top_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x8 aom_highbd_dc_top_predictor_16x8_c +void aom_highbd_dc_top_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_top_predictor_2x2 aom_highbd_dc_top_predictor_2x2_c void aom_highbd_dc_top_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_32x16 aom_highbd_dc_top_predictor_32x16_c +void aom_highbd_dc_top_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_32x32 aom_highbd_dc_top_predictor_32x32_c +void aom_highbd_dc_top_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_4x4 aom_highbd_dc_top_predictor_4x4_c +void aom_highbd_dc_top_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_4x8 aom_highbd_dc_top_predictor_4x8_c +void aom_highbd_dc_top_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x16 aom_highbd_dc_top_predictor_8x16_c +void aom_highbd_dc_top_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x4 aom_highbd_dc_top_predictor_8x4_c +void aom_highbd_dc_top_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_top_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x8 aom_highbd_dc_top_predictor_8x8_c +void aom_highbd_dc_top_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_dc_top_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride); void aom_highbd_fdct16x16_sse2(const int16_t *input, tran_low_t *output, int stride); @@ -1837,37 +1978,47 @@ void aom_highbd_fdct8x8_sse2(const int16_t *input, tran_low_t *output, int strid RTCD_EXTERN void (*aom_highbd_fdct8x8)(const int16_t *input, tran_low_t *output, int stride); void aom_highbd_h_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x16 aom_highbd_h_predictor_16x16_c +void aom_highbd_h_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x32 aom_highbd_h_predictor_16x32_c +void aom_highbd_h_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x8 aom_highbd_h_predictor_16x8_c +void aom_highbd_h_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_h_predictor_2x2 aom_highbd_h_predictor_2x2_c void aom_highbd_h_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_32x16 aom_highbd_h_predictor_32x16_c +void aom_highbd_h_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_32x32 aom_highbd_h_predictor_32x32_c +void aom_highbd_h_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_4x4 aom_highbd_h_predictor_4x4_c +void aom_highbd_h_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_4x8 aom_highbd_h_predictor_4x8_c +void aom_highbd_h_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x16 aom_highbd_h_predictor_8x16_c +void aom_highbd_h_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x4 aom_highbd_h_predictor_8x4_c +void aom_highbd_h_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_h_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x8 aom_highbd_h_predictor_8x8_c +void aom_highbd_h_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_h_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); #define aom_highbd_iwht4x4_16_add aom_highbd_iwht4x4_16_add_c @@ -1881,6 +2032,7 @@ RTCD_EXTERN void (*aom_highbd_lpf_horizontal_4)(uint16_t *s, int pitch, const ui void aom_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +void aom_highbd_lpf_horizontal_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); RTCD_EXTERN void (*aom_highbd_lpf_horizontal_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1889,10 +2041,12 @@ RTCD_EXTERN void (*aom_highbd_lpf_horizontal_8)(uint16_t *s, int pitch, const ui void aom_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +void aom_highbd_lpf_horizontal_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); RTCD_EXTERN void (*aom_highbd_lpf_horizontal_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +void aom_highbd_lpf_horizontal_edge_16_avx2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); RTCD_EXTERN void (*aom_highbd_lpf_horizontal_edge_16)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1905,6 +2059,7 @@ RTCD_EXTERN void (*aom_highbd_lpf_vertical_16)(uint16_t *s, int pitch, const uin void aom_highbd_lpf_vertical_16_dual_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +void aom_highbd_lpf_vertical_16_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); RTCD_EXTERN void (*aom_highbd_lpf_vertical_16_dual)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1913,6 +2068,7 @@ RTCD_EXTERN void (*aom_highbd_lpf_vertical_4)(uint16_t *s, int pitch, const uint void aom_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +void aom_highbd_lpf_vertical_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); RTCD_EXTERN void (*aom_highbd_lpf_vertical_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1921,6 +2077,7 @@ RTCD_EXTERN void (*aom_highbd_lpf_vertical_8)(uint16_t *s, int pitch, const uint void aom_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +void aom_highbd_lpf_vertical_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); RTCD_EXTERN void (*aom_highbd_lpf_vertical_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); unsigned int aom_highbd_masked_sad16x16_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); @@ -2502,16 +2659,19 @@ void aom_highbd_v_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const RTCD_EXTERN void (*aom_highbd_v_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_16x32 aom_highbd_v_predictor_16x32_c +void aom_highbd_v_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_16x8 aom_highbd_v_predictor_16x8_c +void aom_highbd_v_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_v_predictor_2x2 aom_highbd_v_predictor_2x2_c void aom_highbd_v_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_32x16 aom_highbd_v_predictor_32x16_c +void aom_highbd_v_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -2522,13 +2682,16 @@ void aom_highbd_v_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const ui RTCD_EXTERN void (*aom_highbd_v_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_4x8 aom_highbd_v_predictor_4x8_c +void aom_highbd_v_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_8x16 aom_highbd_v_predictor_8x16_c +void aom_highbd_v_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_8x4 aom_highbd_v_predictor_8x4_c +void aom_highbd_v_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_v_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -2618,25 +2781,21 @@ void aom_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, con RTCD_EXTERN void (*aom_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*aom_lpf_horizontal_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_horizontal_4_dual aom_lpf_horizontal_4_dual_c void aom_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); RTCD_EXTERN void (*aom_lpf_horizontal_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_horizontal_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*aom_lpf_horizontal_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_horizontal_8_dual aom_lpf_horizontal_8_dual_c void aom_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_edge_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_edge_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); RTCD_EXTERN void (*aom_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_edge_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_edge_8_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); RTCD_EXTERN void (*aom_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); @@ -2644,24 +2803,21 @@ void aom_lpf_vertical_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, cons RTCD_EXTERN void (*aom_lpf_vertical_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_vertical_16_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_vertical_16_dual)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_vertical_16_dual aom_lpf_vertical_16_dual_c void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); RTCD_EXTERN void (*aom_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*aom_lpf_vertical_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_vertical_4_dual aom_lpf_vertical_4_dual_c void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); RTCD_EXTERN void (*aom_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*aom_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_vertical_8_dual aom_lpf_vertical_8_dual_c unsigned int aom_masked_sad16x16_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); unsigned int aom_masked_sad16x16_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); @@ -2932,37 +3088,52 @@ unsigned int aom_obmc_variance8x8_sse4_1(const uint8_t *pre, int pre_stride, con RTCD_EXTERN unsigned int (*aom_obmc_variance8x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); void aom_paeth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_c +void aom_paeth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_c +void aom_paeth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_c +void aom_paeth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x8_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c void aom_paeth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_c +void aom_paeth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_c +void aom_paeth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_c +void aom_paeth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_c +void aom_paeth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_c +void aom_paeth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_c +void aom_paeth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_c +void aom_paeth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void aom_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -3261,37 +3432,47 @@ void aom_smooth_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_ #define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_c void aom_smooth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x16 aom_smooth_predictor_16x16_c +void aom_smooth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x32 aom_smooth_predictor_16x32_c +void aom_smooth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x8 aom_smooth_predictor_16x8_c +void aom_smooth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c void aom_smooth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_32x16 aom_smooth_predictor_32x16_c +void aom_smooth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_32x32 aom_smooth_predictor_32x32_c +void aom_smooth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_4x4 aom_smooth_predictor_4x4_c +void aom_smooth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_4x8 aom_smooth_predictor_4x8_c +void aom_smooth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x16 aom_smooth_predictor_8x16_c +void aom_smooth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x4 aom_smooth_predictor_8x4_c +void aom_smooth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x8 aom_smooth_predictor_8x8_c +void aom_smooth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_c @@ -3481,19 +3662,24 @@ void aom_v_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t RTCD_EXTERN void (*aom_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_16x32 aom_v_predictor_16x32_c +void aom_v_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_16x8 aom_v_predictor_16x8_c +void aom_v_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_v_predictor_2x2 aom_v_predictor_2x2_c void aom_v_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_32x16 aom_v_predictor_32x16_c +void aom_v_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_v_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_v_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); RTCD_EXTERN void (*aom_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -3501,13 +3687,16 @@ void aom_v_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a RTCD_EXTERN void (*aom_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_4x8 aom_v_predictor_4x8_c +void aom_v_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_8x16 aom_v_predictor_8x16_c +void aom_v_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_8x4 aom_v_predictor_8x4_c +void aom_v_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -3617,6 +3806,14 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_convolve8 = aom_convolve8_sse2; if (flags & HAS_SSSE3) aom_convolve8 = aom_convolve8_ssse3; if (flags & HAS_AVX2) aom_convolve8 = aom_convolve8_avx2; + aom_convolve8_add_src = aom_convolve8_add_src_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src = aom_convolve8_add_src_ssse3; + aom_convolve8_add_src_hip = aom_convolve8_add_src_hip_c; + if (flags & HAS_SSE2) aom_convolve8_add_src_hip = aom_convolve8_add_src_hip_sse2; + aom_convolve8_add_src_horiz = aom_convolve8_add_src_horiz_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src_horiz = aom_convolve8_add_src_horiz_ssse3; + aom_convolve8_add_src_vert = aom_convolve8_add_src_vert_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src_vert = aom_convolve8_add_src_vert_ssse3; aom_convolve8_avg = aom_convolve8_avg_c; if (flags & HAS_SSE2) aom_convolve8_avg = aom_convolve8_avg_sse2; if (flags & HAS_SSSE3) aom_convolve8_avg = aom_convolve8_avg_ssse3; @@ -3650,34 +3847,90 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_d63e_predictor_4x4 = aom_d63e_predictor_4x4_ssse3; aom_dc_128_predictor_16x16 = aom_dc_128_predictor_16x16_c; if (flags & HAS_SSE2) aom_dc_128_predictor_16x16 = aom_dc_128_predictor_16x16_sse2; + aom_dc_128_predictor_16x32 = aom_dc_128_predictor_16x32_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_16x32 = aom_dc_128_predictor_16x32_sse2; + aom_dc_128_predictor_16x8 = aom_dc_128_predictor_16x8_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_16x8 = aom_dc_128_predictor_16x8_sse2; + aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_avx2; aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_c; if (flags & HAS_SSE2) aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_avx2; aom_dc_128_predictor_4x4 = aom_dc_128_predictor_4x4_c; if (flags & HAS_SSE2) aom_dc_128_predictor_4x4 = aom_dc_128_predictor_4x4_sse2; + aom_dc_128_predictor_4x8 = aom_dc_128_predictor_4x8_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_4x8 = aom_dc_128_predictor_4x8_sse2; + aom_dc_128_predictor_8x16 = aom_dc_128_predictor_8x16_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_8x16 = aom_dc_128_predictor_8x16_sse2; + aom_dc_128_predictor_8x4 = aom_dc_128_predictor_8x4_c; + if (flags & HAS_SSE2) aom_dc_128_predictor_8x4 = aom_dc_128_predictor_8x4_sse2; aom_dc_128_predictor_8x8 = aom_dc_128_predictor_8x8_c; if (flags & HAS_SSE2) aom_dc_128_predictor_8x8 = aom_dc_128_predictor_8x8_sse2; aom_dc_left_predictor_16x16 = aom_dc_left_predictor_16x16_c; if (flags & HAS_SSE2) aom_dc_left_predictor_16x16 = aom_dc_left_predictor_16x16_sse2; + aom_dc_left_predictor_16x32 = aom_dc_left_predictor_16x32_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_16x32 = aom_dc_left_predictor_16x32_sse2; + aom_dc_left_predictor_16x8 = aom_dc_left_predictor_16x8_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_16x8 = aom_dc_left_predictor_16x8_sse2; + aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_avx2; aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_c; if (flags & HAS_SSE2) aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_avx2; aom_dc_left_predictor_4x4 = aom_dc_left_predictor_4x4_c; if (flags & HAS_SSE2) aom_dc_left_predictor_4x4 = aom_dc_left_predictor_4x4_sse2; + aom_dc_left_predictor_4x8 = aom_dc_left_predictor_4x8_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_4x8 = aom_dc_left_predictor_4x8_sse2; + aom_dc_left_predictor_8x16 = aom_dc_left_predictor_8x16_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_8x16 = aom_dc_left_predictor_8x16_sse2; + aom_dc_left_predictor_8x4 = aom_dc_left_predictor_8x4_c; + if (flags & HAS_SSE2) aom_dc_left_predictor_8x4 = aom_dc_left_predictor_8x4_sse2; aom_dc_left_predictor_8x8 = aom_dc_left_predictor_8x8_c; if (flags & HAS_SSE2) aom_dc_left_predictor_8x8 = aom_dc_left_predictor_8x8_sse2; aom_dc_predictor_16x16 = aom_dc_predictor_16x16_c; if (flags & HAS_SSE2) aom_dc_predictor_16x16 = aom_dc_predictor_16x16_sse2; + aom_dc_predictor_16x32 = aom_dc_predictor_16x32_c; + if (flags & HAS_SSE2) aom_dc_predictor_16x32 = aom_dc_predictor_16x32_sse2; + aom_dc_predictor_16x8 = aom_dc_predictor_16x8_c; + if (flags & HAS_SSE2) aom_dc_predictor_16x8 = aom_dc_predictor_16x8_sse2; + aom_dc_predictor_32x16 = aom_dc_predictor_32x16_c; + if (flags & HAS_SSE2) aom_dc_predictor_32x16 = aom_dc_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_predictor_32x16 = aom_dc_predictor_32x16_avx2; aom_dc_predictor_32x32 = aom_dc_predictor_32x32_c; if (flags & HAS_SSE2) aom_dc_predictor_32x32 = aom_dc_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_predictor_32x32 = aom_dc_predictor_32x32_avx2; aom_dc_predictor_4x4 = aom_dc_predictor_4x4_c; if (flags & HAS_SSE2) aom_dc_predictor_4x4 = aom_dc_predictor_4x4_sse2; + aom_dc_predictor_4x8 = aom_dc_predictor_4x8_c; + if (flags & HAS_SSE2) aom_dc_predictor_4x8 = aom_dc_predictor_4x8_sse2; + aom_dc_predictor_8x16 = aom_dc_predictor_8x16_c; + if (flags & HAS_SSE2) aom_dc_predictor_8x16 = aom_dc_predictor_8x16_sse2; + aom_dc_predictor_8x4 = aom_dc_predictor_8x4_c; + if (flags & HAS_SSE2) aom_dc_predictor_8x4 = aom_dc_predictor_8x4_sse2; aom_dc_predictor_8x8 = aom_dc_predictor_8x8_c; if (flags & HAS_SSE2) aom_dc_predictor_8x8 = aom_dc_predictor_8x8_sse2; aom_dc_top_predictor_16x16 = aom_dc_top_predictor_16x16_c; if (flags & HAS_SSE2) aom_dc_top_predictor_16x16 = aom_dc_top_predictor_16x16_sse2; + aom_dc_top_predictor_16x32 = aom_dc_top_predictor_16x32_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_16x32 = aom_dc_top_predictor_16x32_sse2; + aom_dc_top_predictor_16x8 = aom_dc_top_predictor_16x8_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_16x8 = aom_dc_top_predictor_16x8_sse2; + aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_avx2; aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_c; if (flags & HAS_SSE2) aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_avx2; aom_dc_top_predictor_4x4 = aom_dc_top_predictor_4x4_c; if (flags & HAS_SSE2) aom_dc_top_predictor_4x4 = aom_dc_top_predictor_4x4_sse2; + aom_dc_top_predictor_4x8 = aom_dc_top_predictor_4x8_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_4x8 = aom_dc_top_predictor_4x8_sse2; + aom_dc_top_predictor_8x16 = aom_dc_top_predictor_8x16_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_8x16 = aom_dc_top_predictor_8x16_sse2; + aom_dc_top_predictor_8x4 = aom_dc_top_predictor_8x4_c; + if (flags & HAS_SSE2) aom_dc_top_predictor_8x4 = aom_dc_top_predictor_8x4_sse2; aom_dc_top_predictor_8x8 = aom_dc_top_predictor_8x8_c; if (flags & HAS_SSE2) aom_dc_top_predictor_8x8 = aom_dc_top_predictor_8x8_sse2; aom_fdct16x16 = aom_fdct16x16_c; @@ -3703,10 +3956,23 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_get_mb_ss = aom_get_mb_ss_sse2; aom_h_predictor_16x16 = aom_h_predictor_16x16_c; if (flags & HAS_SSE2) aom_h_predictor_16x16 = aom_h_predictor_16x16_sse2; + aom_h_predictor_16x32 = aom_h_predictor_16x32_c; + if (flags & HAS_SSE2) aom_h_predictor_16x32 = aom_h_predictor_16x32_sse2; + aom_h_predictor_16x8 = aom_h_predictor_16x8_c; + if (flags & HAS_SSE2) aom_h_predictor_16x8 = aom_h_predictor_16x8_sse2; + aom_h_predictor_32x16 = aom_h_predictor_32x16_c; + if (flags & HAS_SSE2) aom_h_predictor_32x16 = aom_h_predictor_32x16_sse2; aom_h_predictor_32x32 = aom_h_predictor_32x32_c; if (flags & HAS_SSE2) aom_h_predictor_32x32 = aom_h_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_h_predictor_32x32 = aom_h_predictor_32x32_avx2; aom_h_predictor_4x4 = aom_h_predictor_4x4_c; if (flags & HAS_SSE2) aom_h_predictor_4x4 = aom_h_predictor_4x4_sse2; + aom_h_predictor_4x8 = aom_h_predictor_4x8_c; + if (flags & HAS_SSE2) aom_h_predictor_4x8 = aom_h_predictor_4x8_sse2; + aom_h_predictor_8x16 = aom_h_predictor_8x16_c; + if (flags & HAS_SSE2) aom_h_predictor_8x16 = aom_h_predictor_8x16_sse2; + aom_h_predictor_8x4 = aom_h_predictor_8x4_c; + if (flags & HAS_SSE2) aom_h_predictor_8x4 = aom_h_predictor_8x4_sse2; aom_h_predictor_8x8 = aom_h_predictor_8x8_c; if (flags & HAS_SSE2) aom_h_predictor_8x8 = aom_h_predictor_8x8_sse2; aom_hadamard_16x16 = aom_hadamard_16x16_c; @@ -4075,6 +4341,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_highbd_comp_avg_upsampled_pred = aom_highbd_comp_avg_upsampled_pred_sse2; aom_highbd_convolve8 = aom_highbd_convolve8_c; if (flags & HAS_AVX2) aom_highbd_convolve8 = aom_highbd_convolve8_avx2; + aom_highbd_convolve8_add_src_hip = aom_highbd_convolve8_add_src_hip_c; + if (flags & HAS_SSSE3) aom_highbd_convolve8_add_src_hip = aom_highbd_convolve8_add_src_hip_ssse3; aom_highbd_convolve8_avg = aom_highbd_convolve8_avg_c; if (flags & HAS_AVX2) aom_highbd_convolve8_avg = aom_highbd_convolve8_avg_avx2; aom_highbd_convolve8_avg_horiz = aom_highbd_convolve8_avg_horiz_c; @@ -4091,14 +4359,130 @@ static void setup_rtcd_internal(void) aom_highbd_convolve_copy = aom_highbd_convolve_copy_c; if (flags & HAS_SSE2) aom_highbd_convolve_copy = aom_highbd_convolve_copy_sse2; if (flags & HAS_AVX2) aom_highbd_convolve_copy = aom_highbd_convolve_copy_avx2; + aom_highbd_d117_predictor_16x16 = aom_highbd_d117_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_16x16 = aom_highbd_d117_predictor_16x16_ssse3; + aom_highbd_d117_predictor_32x32 = aom_highbd_d117_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_32x32 = aom_highbd_d117_predictor_32x32_ssse3; + aom_highbd_d117_predictor_4x4 = aom_highbd_d117_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_d117_predictor_4x4 = aom_highbd_d117_predictor_4x4_sse2; + aom_highbd_d117_predictor_8x8 = aom_highbd_d117_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_8x8 = aom_highbd_d117_predictor_8x8_ssse3; + aom_highbd_d135_predictor_16x16 = aom_highbd_d135_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_16x16 = aom_highbd_d135_predictor_16x16_ssse3; + aom_highbd_d135_predictor_32x32 = aom_highbd_d135_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_32x32 = aom_highbd_d135_predictor_32x32_ssse3; + aom_highbd_d135_predictor_4x4 = aom_highbd_d135_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_d135_predictor_4x4 = aom_highbd_d135_predictor_4x4_sse2; + aom_highbd_d135_predictor_8x8 = aom_highbd_d135_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_8x8 = aom_highbd_d135_predictor_8x8_ssse3; + aom_highbd_d153_predictor_16x16 = aom_highbd_d153_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_16x16 = aom_highbd_d153_predictor_16x16_ssse3; + aom_highbd_d153_predictor_32x32 = aom_highbd_d153_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_32x32 = aom_highbd_d153_predictor_32x32_ssse3; + aom_highbd_d153_predictor_4x4 = aom_highbd_d153_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_d153_predictor_4x4 = aom_highbd_d153_predictor_4x4_sse2; + aom_highbd_d153_predictor_8x8 = aom_highbd_d153_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_8x8 = aom_highbd_d153_predictor_8x8_ssse3; + aom_highbd_d45e_predictor_16x16 = aom_highbd_d45e_predictor_16x16_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x16 = aom_highbd_d45e_predictor_16x16_avx2; + aom_highbd_d45e_predictor_16x32 = aom_highbd_d45e_predictor_16x32_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x32 = aom_highbd_d45e_predictor_16x32_avx2; + aom_highbd_d45e_predictor_16x8 = aom_highbd_d45e_predictor_16x8_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x8 = aom_highbd_d45e_predictor_16x8_avx2; + aom_highbd_d45e_predictor_32x16 = aom_highbd_d45e_predictor_32x16_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_32x16 = aom_highbd_d45e_predictor_32x16_avx2; + aom_highbd_d45e_predictor_32x32 = aom_highbd_d45e_predictor_32x32_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_32x32 = aom_highbd_d45e_predictor_32x32_avx2; + aom_highbd_d45e_predictor_4x4 = aom_highbd_d45e_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_d45e_predictor_4x4 = aom_highbd_d45e_predictor_4x4_sse2; + aom_highbd_d45e_predictor_4x8 = aom_highbd_d45e_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_d45e_predictor_4x8 = aom_highbd_d45e_predictor_4x8_sse2; + aom_highbd_d45e_predictor_8x16 = aom_highbd_d45e_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_d45e_predictor_8x16 = aom_highbd_d45e_predictor_8x16_sse2; + aom_highbd_d45e_predictor_8x4 = aom_highbd_d45e_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_d45e_predictor_8x4 = aom_highbd_d45e_predictor_8x4_sse2; + aom_highbd_d45e_predictor_8x8 = aom_highbd_d45e_predictor_8x8_c; + if (flags & HAS_SSE2) aom_highbd_d45e_predictor_8x8 = aom_highbd_d45e_predictor_8x8_sse2; + aom_highbd_dc_128_predictor_16x16 = aom_highbd_dc_128_predictor_16x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_16x16 = aom_highbd_dc_128_predictor_16x16_sse2; + aom_highbd_dc_128_predictor_16x32 = aom_highbd_dc_128_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_16x32 = aom_highbd_dc_128_predictor_16x32_sse2; + aom_highbd_dc_128_predictor_16x8 = aom_highbd_dc_128_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_16x8 = aom_highbd_dc_128_predictor_16x8_sse2; + aom_highbd_dc_128_predictor_32x16 = aom_highbd_dc_128_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_32x16 = aom_highbd_dc_128_predictor_32x16_sse2; + aom_highbd_dc_128_predictor_32x32 = aom_highbd_dc_128_predictor_32x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_32x32 = aom_highbd_dc_128_predictor_32x32_sse2; + aom_highbd_dc_128_predictor_4x4 = aom_highbd_dc_128_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_4x4 = aom_highbd_dc_128_predictor_4x4_sse2; + aom_highbd_dc_128_predictor_4x8 = aom_highbd_dc_128_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_4x8 = aom_highbd_dc_128_predictor_4x8_sse2; + aom_highbd_dc_128_predictor_8x16 = aom_highbd_dc_128_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_8x16 = aom_highbd_dc_128_predictor_8x16_sse2; + aom_highbd_dc_128_predictor_8x4 = aom_highbd_dc_128_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_8x4 = aom_highbd_dc_128_predictor_8x4_sse2; + aom_highbd_dc_128_predictor_8x8 = aom_highbd_dc_128_predictor_8x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_8x8 = aom_highbd_dc_128_predictor_8x8_sse2; + aom_highbd_dc_left_predictor_16x16 = aom_highbd_dc_left_predictor_16x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_16x16 = aom_highbd_dc_left_predictor_16x16_sse2; + aom_highbd_dc_left_predictor_16x32 = aom_highbd_dc_left_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_16x32 = aom_highbd_dc_left_predictor_16x32_sse2; + aom_highbd_dc_left_predictor_16x8 = aom_highbd_dc_left_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_16x8 = aom_highbd_dc_left_predictor_16x8_sse2; + aom_highbd_dc_left_predictor_32x16 = aom_highbd_dc_left_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_32x16 = aom_highbd_dc_left_predictor_32x16_sse2; + aom_highbd_dc_left_predictor_32x32 = aom_highbd_dc_left_predictor_32x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_32x32 = aom_highbd_dc_left_predictor_32x32_sse2; + aom_highbd_dc_left_predictor_4x4 = aom_highbd_dc_left_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_4x4 = aom_highbd_dc_left_predictor_4x4_sse2; + aom_highbd_dc_left_predictor_4x8 = aom_highbd_dc_left_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_4x8 = aom_highbd_dc_left_predictor_4x8_sse2; + aom_highbd_dc_left_predictor_8x16 = aom_highbd_dc_left_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_8x16 = aom_highbd_dc_left_predictor_8x16_sse2; + aom_highbd_dc_left_predictor_8x4 = aom_highbd_dc_left_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_8x4 = aom_highbd_dc_left_predictor_8x4_sse2; + aom_highbd_dc_left_predictor_8x8 = aom_highbd_dc_left_predictor_8x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_8x8 = aom_highbd_dc_left_predictor_8x8_sse2; aom_highbd_dc_predictor_16x16 = aom_highbd_dc_predictor_16x16_c; if (flags & HAS_SSE2) aom_highbd_dc_predictor_16x16 = aom_highbd_dc_predictor_16x16_sse2; + aom_highbd_dc_predictor_16x32 = aom_highbd_dc_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_16x32 = aom_highbd_dc_predictor_16x32_sse2; + aom_highbd_dc_predictor_16x8 = aom_highbd_dc_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_16x8 = aom_highbd_dc_predictor_16x8_sse2; + aom_highbd_dc_predictor_32x16 = aom_highbd_dc_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_32x16 = aom_highbd_dc_predictor_32x16_sse2; aom_highbd_dc_predictor_32x32 = aom_highbd_dc_predictor_32x32_c; if (flags & HAS_SSE2) aom_highbd_dc_predictor_32x32 = aom_highbd_dc_predictor_32x32_sse2; aom_highbd_dc_predictor_4x4 = aom_highbd_dc_predictor_4x4_c; if (flags & HAS_SSE2) aom_highbd_dc_predictor_4x4 = aom_highbd_dc_predictor_4x4_sse2; + aom_highbd_dc_predictor_4x8 = aom_highbd_dc_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_4x8 = aom_highbd_dc_predictor_4x8_sse2; + aom_highbd_dc_predictor_8x16 = aom_highbd_dc_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_8x16 = aom_highbd_dc_predictor_8x16_sse2; + aom_highbd_dc_predictor_8x4 = aom_highbd_dc_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_predictor_8x4 = aom_highbd_dc_predictor_8x4_sse2; aom_highbd_dc_predictor_8x8 = aom_highbd_dc_predictor_8x8_c; if (flags & HAS_SSE2) aom_highbd_dc_predictor_8x8 = aom_highbd_dc_predictor_8x8_sse2; + aom_highbd_dc_top_predictor_16x16 = aom_highbd_dc_top_predictor_16x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_16x16 = aom_highbd_dc_top_predictor_16x16_sse2; + aom_highbd_dc_top_predictor_16x32 = aom_highbd_dc_top_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_16x32 = aom_highbd_dc_top_predictor_16x32_sse2; + aom_highbd_dc_top_predictor_16x8 = aom_highbd_dc_top_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_16x8 = aom_highbd_dc_top_predictor_16x8_sse2; + aom_highbd_dc_top_predictor_32x16 = aom_highbd_dc_top_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_32x16 = aom_highbd_dc_top_predictor_32x16_sse2; + aom_highbd_dc_top_predictor_32x32 = aom_highbd_dc_top_predictor_32x32_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_32x32 = aom_highbd_dc_top_predictor_32x32_sse2; + aom_highbd_dc_top_predictor_4x4 = aom_highbd_dc_top_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_4x4 = aom_highbd_dc_top_predictor_4x4_sse2; + aom_highbd_dc_top_predictor_4x8 = aom_highbd_dc_top_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_4x8 = aom_highbd_dc_top_predictor_4x8_sse2; + aom_highbd_dc_top_predictor_8x16 = aom_highbd_dc_top_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_8x16 = aom_highbd_dc_top_predictor_8x16_sse2; + aom_highbd_dc_top_predictor_8x4 = aom_highbd_dc_top_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_8x4 = aom_highbd_dc_top_predictor_8x4_sse2; + aom_highbd_dc_top_predictor_8x8 = aom_highbd_dc_top_predictor_8x8_c; + if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_8x8 = aom_highbd_dc_top_predictor_8x8_sse2; aom_highbd_fdct16x16 = aom_highbd_fdct16x16_c; if (flags & HAS_SSE2) aom_highbd_fdct16x16 = aom_highbd_fdct16x16_sse2; aom_highbd_fdct32x32 = aom_highbd_fdct32x32_c; @@ -4109,30 +4493,56 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_highbd_fdct4x4 = aom_highbd_fdct4x4_sse2; aom_highbd_fdct8x8 = aom_highbd_fdct8x8_c; if (flags & HAS_SSE2) aom_highbd_fdct8x8 = aom_highbd_fdct8x8_sse2; + aom_highbd_h_predictor_16x16 = aom_highbd_h_predictor_16x16_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_16x16 = aom_highbd_h_predictor_16x16_sse2; + aom_highbd_h_predictor_16x32 = aom_highbd_h_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_16x32 = aom_highbd_h_predictor_16x32_sse2; + aom_highbd_h_predictor_16x8 = aom_highbd_h_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_16x8 = aom_highbd_h_predictor_16x8_sse2; + aom_highbd_h_predictor_32x16 = aom_highbd_h_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_32x16 = aom_highbd_h_predictor_32x16_sse2; + aom_highbd_h_predictor_32x32 = aom_highbd_h_predictor_32x32_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_32x32 = aom_highbd_h_predictor_32x32_sse2; + aom_highbd_h_predictor_4x4 = aom_highbd_h_predictor_4x4_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_4x4 = aom_highbd_h_predictor_4x4_sse2; + aom_highbd_h_predictor_4x8 = aom_highbd_h_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_4x8 = aom_highbd_h_predictor_4x8_sse2; + aom_highbd_h_predictor_8x16 = aom_highbd_h_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_8x16 = aom_highbd_h_predictor_8x16_sse2; + aom_highbd_h_predictor_8x4 = aom_highbd_h_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_8x4 = aom_highbd_h_predictor_8x4_sse2; + aom_highbd_h_predictor_8x8 = aom_highbd_h_predictor_8x8_c; + if (flags & HAS_SSE2) aom_highbd_h_predictor_8x8 = aom_highbd_h_predictor_8x8_sse2; aom_highbd_lpf_horizontal_4 = aom_highbd_lpf_horizontal_4_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_4 = aom_highbd_lpf_horizontal_4_sse2; aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_avx2; aom_highbd_lpf_horizontal_8 = aom_highbd_lpf_horizontal_8_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_8 = aom_highbd_lpf_horizontal_8_sse2; aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_avx2; aom_highbd_lpf_horizontal_edge_16 = aom_highbd_lpf_horizontal_edge_16_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_edge_16 = aom_highbd_lpf_horizontal_edge_16_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_edge_16 = aom_highbd_lpf_horizontal_edge_16_avx2; aom_highbd_lpf_horizontal_edge_8 = aom_highbd_lpf_horizontal_edge_8_c; if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_edge_8 = aom_highbd_lpf_horizontal_edge_8_sse2; aom_highbd_lpf_vertical_16 = aom_highbd_lpf_vertical_16_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_16 = aom_highbd_lpf_vertical_16_sse2; aom_highbd_lpf_vertical_16_dual = aom_highbd_lpf_vertical_16_dual_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_16_dual = aom_highbd_lpf_vertical_16_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_16_dual = aom_highbd_lpf_vertical_16_dual_avx2; aom_highbd_lpf_vertical_4 = aom_highbd_lpf_vertical_4_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_4 = aom_highbd_lpf_vertical_4_sse2; aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_avx2; aom_highbd_lpf_vertical_8 = aom_highbd_lpf_vertical_8_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_8 = aom_highbd_lpf_vertical_8_sse2; aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_c; if (flags & HAS_SSE2) aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_avx2; aom_highbd_masked_sad16x16 = aom_highbd_masked_sad16x16_c; if (flags & HAS_SSSE3) aom_highbd_masked_sad16x16 = aom_highbd_masked_sad16x16_ssse3; aom_highbd_masked_sad16x32 = aom_highbd_masked_sad16x32_c; @@ -4316,10 +4726,22 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_highbd_upsampled_pred = aom_highbd_upsampled_pred_sse2; aom_highbd_v_predictor_16x16 = aom_highbd_v_predictor_16x16_c; if (flags & HAS_SSE2) aom_highbd_v_predictor_16x16 = aom_highbd_v_predictor_16x16_sse2; + aom_highbd_v_predictor_16x32 = aom_highbd_v_predictor_16x32_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_16x32 = aom_highbd_v_predictor_16x32_sse2; + aom_highbd_v_predictor_16x8 = aom_highbd_v_predictor_16x8_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_16x8 = aom_highbd_v_predictor_16x8_sse2; + aom_highbd_v_predictor_32x16 = aom_highbd_v_predictor_32x16_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_32x16 = aom_highbd_v_predictor_32x16_sse2; aom_highbd_v_predictor_32x32 = aom_highbd_v_predictor_32x32_c; if (flags & HAS_SSE2) aom_highbd_v_predictor_32x32 = aom_highbd_v_predictor_32x32_sse2; aom_highbd_v_predictor_4x4 = aom_highbd_v_predictor_4x4_c; if (flags & HAS_SSE2) aom_highbd_v_predictor_4x4 = aom_highbd_v_predictor_4x4_sse2; + aom_highbd_v_predictor_4x8 = aom_highbd_v_predictor_4x8_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_4x8 = aom_highbd_v_predictor_4x8_sse2; + aom_highbd_v_predictor_8x16 = aom_highbd_v_predictor_8x16_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_8x16 = aom_highbd_v_predictor_8x16_sse2; + aom_highbd_v_predictor_8x4 = aom_highbd_v_predictor_8x4_c; + if (flags & HAS_SSE2) aom_highbd_v_predictor_8x4 = aom_highbd_v_predictor_8x4_sse2; aom_highbd_v_predictor_8x8 = aom_highbd_v_predictor_8x8_c; if (flags & HAS_SSE2) aom_highbd_v_predictor_8x8 = aom_highbd_v_predictor_8x8_sse2; aom_idct16x16_10_add = aom_idct16x16_10_add_c; @@ -4368,30 +4790,18 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_iwht4x4_16_add = aom_iwht4x4_16_add_sse2; aom_lpf_horizontal_4 = aom_lpf_horizontal_4_c; if (flags & HAS_SSE2) aom_lpf_horizontal_4 = aom_lpf_horizontal_4_sse2; - aom_lpf_horizontal_4_dual = aom_lpf_horizontal_4_dual_c; - if (flags & HAS_SSE2) aom_lpf_horizontal_4_dual = aom_lpf_horizontal_4_dual_sse2; aom_lpf_horizontal_8 = aom_lpf_horizontal_8_c; if (flags & HAS_SSE2) aom_lpf_horizontal_8 = aom_lpf_horizontal_8_sse2; - aom_lpf_horizontal_8_dual = aom_lpf_horizontal_8_dual_c; - if (flags & HAS_SSE2) aom_lpf_horizontal_8_dual = aom_lpf_horizontal_8_dual_sse2; aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_c; if (flags & HAS_SSE2) aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_sse2; - if (flags & HAS_AVX2) aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_avx2; aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_c; if (flags & HAS_SSE2) aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_sse2; - if (flags & HAS_AVX2) aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_avx2; aom_lpf_vertical_16 = aom_lpf_vertical_16_c; if (flags & HAS_SSE2) aom_lpf_vertical_16 = aom_lpf_vertical_16_sse2; - aom_lpf_vertical_16_dual = aom_lpf_vertical_16_dual_c; - if (flags & HAS_SSE2) aom_lpf_vertical_16_dual = aom_lpf_vertical_16_dual_sse2; aom_lpf_vertical_4 = aom_lpf_vertical_4_c; if (flags & HAS_SSE2) aom_lpf_vertical_4 = aom_lpf_vertical_4_sse2; - aom_lpf_vertical_4_dual = aom_lpf_vertical_4_dual_c; - if (flags & HAS_SSE2) aom_lpf_vertical_4_dual = aom_lpf_vertical_4_dual_sse2; aom_lpf_vertical_8 = aom_lpf_vertical_8_c; if (flags & HAS_SSE2) aom_lpf_vertical_8 = aom_lpf_vertical_8_sse2; - aom_lpf_vertical_8_dual = aom_lpf_vertical_8_dual_c; - if (flags & HAS_SSE2) aom_lpf_vertical_8_dual = aom_lpf_vertical_8_dual_sse2; aom_masked_sad16x16 = aom_masked_sad16x16_c; if (flags & HAS_SSSE3) aom_masked_sad16x16 = aom_masked_sad16x16_ssse3; aom_masked_sad16x32 = aom_masked_sad16x32_c; @@ -4507,6 +4917,31 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) aom_obmc_variance8x4 = aom_obmc_variance8x4_sse4_1; aom_obmc_variance8x8 = aom_obmc_variance8x8_c; if (flags & HAS_SSE4_1) aom_obmc_variance8x8 = aom_obmc_variance8x8_sse4_1; + aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_avx2; + aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_avx2; + aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_avx2; + aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_avx2; + aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_avx2; + aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_ssse3; + aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_ssse3; + aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_ssse3; + aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_ssse3; + aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_ssse3; aom_quantize_b = aom_quantize_b_c; if (flags & HAS_SSE2) aom_quantize_b = aom_quantize_b_sse2; aom_sad16x16 = aom_sad16x16_c; @@ -4627,6 +5062,26 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_satd = aom_satd_sse2; aom_scaled_2d = aom_scaled_2d_c; if (flags & HAS_SSSE3) aom_scaled_2d = aom_scaled_2d_ssse3; + aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_ssse3; + aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_ssse3; + aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_ssse3; + aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_ssse3; + aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_ssse3; + aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_ssse3; + aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_ssse3; + aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_ssse3; + aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_ssse3; + aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_ssse3; aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_c; if (flags & HAS_SSE2) aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_ssse3; @@ -4719,10 +5174,24 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) aom_upsampled_pred = aom_upsampled_pred_sse2; aom_v_predictor_16x16 = aom_v_predictor_16x16_c; if (flags & HAS_SSE2) aom_v_predictor_16x16 = aom_v_predictor_16x16_sse2; + aom_v_predictor_16x32 = aom_v_predictor_16x32_c; + if (flags & HAS_SSE2) aom_v_predictor_16x32 = aom_v_predictor_16x32_sse2; + aom_v_predictor_16x8 = aom_v_predictor_16x8_c; + if (flags & HAS_SSE2) aom_v_predictor_16x8 = aom_v_predictor_16x8_sse2; + aom_v_predictor_32x16 = aom_v_predictor_32x16_c; + if (flags & HAS_SSE2) aom_v_predictor_32x16 = aom_v_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_v_predictor_32x16 = aom_v_predictor_32x16_avx2; aom_v_predictor_32x32 = aom_v_predictor_32x32_c; if (flags & HAS_SSE2) aom_v_predictor_32x32 = aom_v_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_v_predictor_32x32 = aom_v_predictor_32x32_avx2; aom_v_predictor_4x4 = aom_v_predictor_4x4_c; if (flags & HAS_SSE2) aom_v_predictor_4x4 = aom_v_predictor_4x4_sse2; + aom_v_predictor_4x8 = aom_v_predictor_4x8_c; + if (flags & HAS_SSE2) aom_v_predictor_4x8 = aom_v_predictor_4x8_sse2; + aom_v_predictor_8x16 = aom_v_predictor_8x16_c; + if (flags & HAS_SSE2) aom_v_predictor_8x16 = aom_v_predictor_8x16_sse2; + aom_v_predictor_8x4 = aom_v_predictor_8x4_c; + if (flags & HAS_SSE2) aom_v_predictor_8x4 = aom_v_predictor_8x4_sse2; aom_v_predictor_8x8 = aom_v_predictor_8x8_c; if (flags & HAS_SSE2) aom_v_predictor_8x8 = aom_v_predictor_8x8_sse2; aom_variance16x16 = aom_variance16x16_c; diff --git a/media/libaom/config/win/mingw32/av1_rtcd.h b/media/libaom/config/win/mingw32/av1_rtcd.h index 3bd4729ab..b5d286212 100644 --- a/media/libaom/config/win/mingw32/av1_rtcd.h +++ b/media/libaom/config/win/mingw32/av1_rtcd.h @@ -31,44 +31,39 @@ struct search_site_config; struct mv; union int_mv; struct yv12_buffer_config; -typedef uint16_t od_dering_in; #ifdef __cplusplus extern "C" { #endif -void aom_clpf_block_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_sse2(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_ssse3(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_sse4_1(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_block)(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_block_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_sse2(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_ssse3(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_sse4_1(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_block_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_hblock_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_sse2(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_ssse3(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_sse4_1(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_hblock)(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_hblock_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_sse2(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_ssse3(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_sse4_1(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_hblock_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void apply_selfguided_restoration_c(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); +void apply_selfguided_restoration_sse4_1(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); +RTCD_EXTERN void (*apply_selfguided_restoration)(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); + +void apply_selfguided_restoration_highbd_c(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); +void apply_selfguided_restoration_highbd_sse4_1(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); +RTCD_EXTERN void (*apply_selfguided_restoration_highbd)(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); int64_t av1_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); RTCD_EXTERN int64_t (*av1_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params); +void av1_convolve_2d_sse2(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params); +RTCD_EXTERN void (*av1_convolve_2d)(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params); + +void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); +void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); +RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); + void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); RTCD_EXTERN void (*av1_convolve_horiz)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); +void av1_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); +void av1_convolve_rounding_avx2(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); +RTCD_EXTERN void (*av1_convolve_rounding)(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); + void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); void av1_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); RTCD_EXTERN void (*av1_convolve_vert)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); @@ -76,9 +71,6 @@ RTCD_EXTERN void (*av1_convolve_vert)(const uint8_t *src, int src_stride, uint8_ int av1_diamond_search_sad_c(struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); #define av1_diamond_search_sad av1_diamond_search_sad_c -void av1_fdct8x8_quant_c(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -#define av1_fdct8x8_quant av1_fdct8x8_quant_c - void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); void av1_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); @@ -133,6 +125,14 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, struct t void av1_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); RTCD_EXTERN void (*av1_fht8x8)(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); +void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength); +void av1_filter_intra_edge_sse4_1(uint8_t *p, int sz, int strength); +RTCD_EXTERN void (*av1_filter_intra_edge)(uint8_t *p, int sz, int strength); + +void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength); +void av1_filter_intra_edge_high_sse4_1(uint16_t *p, int sz, int strength); +RTCD_EXTERN void (*av1_filter_intra_edge_high)(uint16_t *p, int sz, int strength); + int av1_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); #define av1_full_range_search av1_full_range_search_c @@ -141,46 +141,42 @@ int av1_full_search_sadx3(const struct macroblock *x, const struct mv *ref_mv, i int av1_full_search_sadx8(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); RTCD_EXTERN int (*av1_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); -void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type); +void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bsx, int bsy, TX_TYPE tx_type); #define av1_fwd_idtx av1_fwd_idtx_c -void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_16x16)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_16x16)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x32 av1_fwd_txfm2d_16x32_c -void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x8 av1_fwd_txfm2d_16x8_c -void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_32x16 av1_fwd_txfm2d_32x16_c -void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_32x32)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_32x32)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_4x4)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_4x4)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_4x8 av1_fwd_txfm2d_4x8_c -void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_64x64)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); - -void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x16 av1_fwd_txfm2d_8x16_c -void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x4 av1_fwd_txfm2d_8x4_c -void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_8x8)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_8x8)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); #define av1_fwht4x4 av1_fwht4x4_c @@ -207,6 +203,14 @@ void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c +void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); +void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_2d)(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); + +void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); +void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); + void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve_avg av1_highbd_convolve_avg_c @@ -221,6 +225,10 @@ void av1_highbd_convolve_init_c(void); void av1_highbd_convolve_init_sse4_1(void); RTCD_EXTERN void (*av1_highbd_convolve_init)(void); +void av1_highbd_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); +void av1_highbd_convolve_rounding_avx2(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_rounding)(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); + void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); void av1_highbd_convolve_vert_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); RTCD_EXTERN void (*av1_highbd_convolve_vert)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); @@ -267,9 +275,6 @@ void av1_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int dest void av1_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht8x8_64_add av1_highbd_iht8x8_64_add_c -void av1_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); -#define av1_highbd_quantize_b av1_highbd_quantize_b_c - void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); void av1_highbd_quantize_fp_sse4_1(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); void av1_highbd_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); @@ -282,6 +287,14 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); RTCD_EXTERN void (*av1_highbd_warp_affine)(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +void av1_highpass_filter_c(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_highpass_filter_sse4_1(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_highpass_filter)(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + +void av1_highpass_filter_highbd_c(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_highpass_filter_highbd_sse4_1(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_highpass_filter_highbd)(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); void av1_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); void av1_iht16x16_256_add_avx2(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); @@ -334,48 +347,45 @@ void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride void av1_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); RTCD_EXTERN void (*av1_iht8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); -void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_16x16)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_16x16)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x32 av1_inv_txfm2d_add_16x32_c -void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x8 av1_inv_txfm2d_add_16x8_c -void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_32x16 av1_inv_txfm2d_add_32x16_c -void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_32x32_avx2(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_32x32)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_32x32_avx2(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_32x32)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_4x8 av1_inv_txfm2d_add_4x8_c -void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -#define av1_inv_txfm2d_add_64x64 av1_inv_txfm2d_add_64x64_c - -void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x16 av1_inv_txfm2d_add_8x16_c -void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x4 av1_inv_txfm2d_add_8x4_c -void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); void av1_lowbd_convolve_init_c(void); void av1_lowbd_convolve_init_ssse3(void); RTCD_EXTERN void (*av1_lowbd_convolve_init)(void); -void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); +void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale); #define av1_quantize_b av1_quantize_b_c void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -387,10 +397,26 @@ void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int void av1_quantize_fp_32x32_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); RTCD_EXTERN void (*av1_quantize_fp_32x32)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void av1_selfguided_restoration_c(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_selfguided_restoration_sse4_1(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_selfguided_restoration)(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + +void av1_selfguided_restoration_highbd_c(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); +void av1_selfguided_restoration_highbd_sse4_1(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); +RTCD_EXTERN void (*av1_selfguided_restoration_highbd)(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); + void av1_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); void av1_temporal_filter_apply_sse2(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); RTCD_EXTERN void (*av1_temporal_filter_apply)(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); +void av1_upsample_intra_edge_c(uint8_t *p, int sz); +void av1_upsample_intra_edge_sse4_1(uint8_t *p, int sz); +RTCD_EXTERN void (*av1_upsample_intra_edge)(uint8_t *p, int sz); + +void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd); +void av1_upsample_intra_edge_high_sse4_1(uint16_t *p, int sz, int bd); +RTCD_EXTERN void (*av1_upsample_intra_edge_high)(uint16_t *p, int sz, int bd); + void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); @@ -408,64 +434,38 @@ uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d, con uint64_t av1_wedge_sse_from_residuals_sse2(const int16_t *r1, const int16_t *d, const uint8_t *m, int N); RTCD_EXTERN uint64_t (*av1_wedge_sse_from_residuals)(const int16_t *r1, const int16_t *d, const uint8_t *m, int N); +void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_sse2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_ssse3(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_sse4_1(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_avx2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +RTCD_EXTERN void (*cdef_filter_block)(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); + +int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_sse2(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_ssse3(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_sse4_1(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_avx2(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +RTCD_EXTERN int (*cdef_find_dir)(const uint16_t *img, int stride, int32_t *var, int coeff_shift); + double compute_cross_correlation_c(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); double compute_cross_correlation_sse4_1(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); RTCD_EXTERN double (*compute_cross_correlation)(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); -void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_4x4_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_sse2(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_ssse3(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_sse4_1(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_4x4_16bit_to_8bit)(uint8_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_8x8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_sse2(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_ssse3(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_sse4_1(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_8x8_16bit_to_8bit)(uint8_t *dst, int dstride, const uint16_t *src, int sstride); - void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); +void copy_rect8_16bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); RTCD_EXTERN void (*copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); +void copy_rect8_8bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); RTCD_EXTERN void (*copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); -int od_dir_find8_c(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_sse2(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_ssse3(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_sse4_1(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -RTCD_EXTERN int (*od_dir_find8)(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); - -void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_sse2(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_ssse3(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_sse4_1(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -RTCD_EXTERN void (*od_filter_dering_direction_4x4)(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); - -void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_sse2(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_ssse3(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_sse4_1(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -RTCD_EXTERN void (*od_filter_dering_direction_8x8)(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); - void aom_rtcd(void); #ifdef RTCD_C @@ -476,26 +476,20 @@ static void setup_rtcd_internal(void) (void)flags; - aom_clpf_block = aom_clpf_block_c; - if (flags & HAS_SSE2) aom_clpf_block = aom_clpf_block_sse2; - if (flags & HAS_SSSE3) aom_clpf_block = aom_clpf_block_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_block = aom_clpf_block_sse4_1; - aom_clpf_block_hbd = aom_clpf_block_hbd_c; - if (flags & HAS_SSE2) aom_clpf_block_hbd = aom_clpf_block_hbd_sse2; - if (flags & HAS_SSSE3) aom_clpf_block_hbd = aom_clpf_block_hbd_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_block_hbd = aom_clpf_block_hbd_sse4_1; - aom_clpf_hblock = aom_clpf_hblock_c; - if (flags & HAS_SSE2) aom_clpf_hblock = aom_clpf_hblock_sse2; - if (flags & HAS_SSSE3) aom_clpf_hblock = aom_clpf_hblock_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_hblock = aom_clpf_hblock_sse4_1; - aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_c; - if (flags & HAS_SSE2) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse2; - if (flags & HAS_SSSE3) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse4_1; + apply_selfguided_restoration = apply_selfguided_restoration_c; + if (flags & HAS_SSE4_1) apply_selfguided_restoration = apply_selfguided_restoration_sse4_1; + apply_selfguided_restoration_highbd = apply_selfguided_restoration_highbd_c; + if (flags & HAS_SSE4_1) apply_selfguided_restoration_highbd = apply_selfguided_restoration_highbd_sse4_1; av1_block_error = av1_block_error_c; if (flags & HAS_AVX2) av1_block_error = av1_block_error_avx2; + av1_convolve_2d = av1_convolve_2d_c; + if (flags & HAS_SSE2) av1_convolve_2d = av1_convolve_2d_sse2; + av1_convolve_2d_scale = av1_convolve_2d_scale_c; + if (flags & HAS_SSE4_1) av1_convolve_2d_scale = av1_convolve_2d_scale_sse4_1; av1_convolve_horiz = av1_convolve_horiz_c; if (flags & HAS_SSSE3) av1_convolve_horiz = av1_convolve_horiz_ssse3; + av1_convolve_rounding = av1_convolve_rounding_c; + if (flags & HAS_AVX2) av1_convolve_rounding = av1_convolve_rounding_avx2; av1_convolve_vert = av1_convolve_vert_c; if (flags & HAS_SSSE3) av1_convolve_vert = av1_convolve_vert_ssse3; av1_fht16x16 = av1_fht16x16_c; @@ -520,6 +514,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) av1_fht8x4 = av1_fht8x4_sse2; av1_fht8x8 = av1_fht8x8_c; if (flags & HAS_SSE2) av1_fht8x8 = av1_fht8x8_sse2; + av1_filter_intra_edge = av1_filter_intra_edge_c; + if (flags & HAS_SSE4_1) av1_filter_intra_edge = av1_filter_intra_edge_sse4_1; + av1_filter_intra_edge_high = av1_filter_intra_edge_high_c; + if (flags & HAS_SSE4_1) av1_filter_intra_edge_high = av1_filter_intra_edge_high_sse4_1; av1_full_search_sad = av1_full_search_sad_c; if (flags & HAS_SSE3) av1_full_search_sad = av1_full_search_sadx3; if (flags & HAS_SSE4_1) av1_full_search_sad = av1_full_search_sadx8; @@ -529,16 +527,20 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) av1_fwd_txfm2d_32x32 = av1_fwd_txfm2d_32x32_sse4_1; av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_c; if (flags & HAS_SSE4_1) av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_sse4_1; - av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_c; - if (flags & HAS_SSE4_1) av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_sse4_1; av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_c; if (flags & HAS_SSE4_1) av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_sse4_1; av1_highbd_block_error = av1_highbd_block_error_c; if (flags & HAS_SSE2) av1_highbd_block_error = av1_highbd_block_error_sse2; + av1_highbd_convolve_2d = av1_highbd_convolve_2d_c; + if (flags & HAS_SSSE3) av1_highbd_convolve_2d = av1_highbd_convolve_2d_ssse3; + av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_c; + if (flags & HAS_SSE4_1) av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_sse4_1; av1_highbd_convolve_horiz = av1_highbd_convolve_horiz_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_horiz = av1_highbd_convolve_horiz_sse4_1; av1_highbd_convolve_init = av1_highbd_convolve_init_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_init = av1_highbd_convolve_init_sse4_1; + av1_highbd_convolve_rounding = av1_highbd_convolve_rounding_c; + if (flags & HAS_AVX2) av1_highbd_convolve_rounding = av1_highbd_convolve_rounding_avx2; av1_highbd_convolve_vert = av1_highbd_convolve_vert_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_vert = av1_highbd_convolve_vert_sse4_1; av1_highbd_quantize_fp = av1_highbd_quantize_fp_c; @@ -546,6 +548,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) av1_highbd_quantize_fp = av1_highbd_quantize_fp_avx2; av1_highbd_warp_affine = av1_highbd_warp_affine_c; if (flags & HAS_SSSE3) av1_highbd_warp_affine = av1_highbd_warp_affine_ssse3; + av1_highpass_filter = av1_highpass_filter_c; + if (flags & HAS_SSE4_1) av1_highpass_filter = av1_highpass_filter_sse4_1; + av1_highpass_filter_highbd = av1_highpass_filter_highbd_c; + if (flags & HAS_SSE4_1) av1_highpass_filter_highbd = av1_highpass_filter_highbd_sse4_1; av1_iht16x16_256_add = av1_iht16x16_256_add_c; if (flags & HAS_SSE2) av1_iht16x16_256_add = av1_iht16x16_256_add_sse2; if (flags & HAS_AVX2) av1_iht16x16_256_add = av1_iht16x16_256_add_avx2; @@ -580,8 +586,16 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) av1_quantize_fp = av1_quantize_fp_avx2; av1_quantize_fp_32x32 = av1_quantize_fp_32x32_c; if (flags & HAS_AVX2) av1_quantize_fp_32x32 = av1_quantize_fp_32x32_avx2; + av1_selfguided_restoration = av1_selfguided_restoration_c; + if (flags & HAS_SSE4_1) av1_selfguided_restoration = av1_selfguided_restoration_sse4_1; + av1_selfguided_restoration_highbd = av1_selfguided_restoration_highbd_c; + if (flags & HAS_SSE4_1) av1_selfguided_restoration_highbd = av1_selfguided_restoration_highbd_sse4_1; av1_temporal_filter_apply = av1_temporal_filter_apply_c; if (flags & HAS_SSE2) av1_temporal_filter_apply = av1_temporal_filter_apply_sse2; + av1_upsample_intra_edge = av1_upsample_intra_edge_c; + if (flags & HAS_SSE4_1) av1_upsample_intra_edge = av1_upsample_intra_edge_sse4_1; + av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c; + if (flags & HAS_SSE4_1) av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_sse4_1; av1_warp_affine = av1_warp_affine_c; if (flags & HAS_SSE2) av1_warp_affine = av1_warp_affine_sse2; if (flags & HAS_SSSE3) av1_warp_affine = av1_warp_affine_ssse3; @@ -591,44 +605,28 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE2) av1_wedge_sign_from_residuals = av1_wedge_sign_from_residuals_sse2; av1_wedge_sse_from_residuals = av1_wedge_sse_from_residuals_c; if (flags & HAS_SSE2) av1_wedge_sse_from_residuals = av1_wedge_sse_from_residuals_sse2; + cdef_filter_block = cdef_filter_block_c; + if (flags & HAS_SSE2) cdef_filter_block = cdef_filter_block_sse2; + if (flags & HAS_SSSE3) cdef_filter_block = cdef_filter_block_ssse3; + if (flags & HAS_SSE4_1) cdef_filter_block = cdef_filter_block_sse4_1; + if (flags & HAS_AVX2) cdef_filter_block = cdef_filter_block_avx2; + cdef_find_dir = cdef_find_dir_c; + if (flags & HAS_SSE2) cdef_find_dir = cdef_find_dir_sse2; + if (flags & HAS_SSSE3) cdef_find_dir = cdef_find_dir_ssse3; + if (flags & HAS_SSE4_1) cdef_find_dir = cdef_find_dir_sse4_1; + if (flags & HAS_AVX2) cdef_find_dir = cdef_find_dir_avx2; compute_cross_correlation = compute_cross_correlation_c; if (flags & HAS_SSE4_1) compute_cross_correlation = compute_cross_correlation_sse4_1; - copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_c; - if (flags & HAS_SSE2) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_sse2; - if (flags & HAS_SSSE3) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_ssse3; - if (flags & HAS_SSE4_1) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_sse4_1; - copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_c; - if (flags & HAS_SSE2) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_sse2; - if (flags & HAS_SSSE3) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_ssse3; - if (flags & HAS_SSE4_1) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_sse4_1; - copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_c; - if (flags & HAS_SSE2) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_sse2; - if (flags & HAS_SSSE3) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_ssse3; - if (flags & HAS_SSE4_1) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_sse4_1; - copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_c; - if (flags & HAS_SSE2) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_sse2; - if (flags & HAS_SSSE3) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_ssse3; - if (flags & HAS_SSE4_1) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_sse4_1; copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_c; if (flags & HAS_SSE2) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse2; if (flags & HAS_SSSE3) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_ssse3; if (flags & HAS_SSE4_1) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse4_1; + if (flags & HAS_AVX2) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_avx2; copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_c; if (flags & HAS_SSE2) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse2; if (flags & HAS_SSSE3) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_ssse3; if (flags & HAS_SSE4_1) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse4_1; - od_dir_find8 = od_dir_find8_c; - if (flags & HAS_SSE2) od_dir_find8 = od_dir_find8_sse2; - if (flags & HAS_SSSE3) od_dir_find8 = od_dir_find8_ssse3; - if (flags & HAS_SSE4_1) od_dir_find8 = od_dir_find8_sse4_1; - od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_c; - if (flags & HAS_SSE2) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_sse2; - if (flags & HAS_SSSE3) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_ssse3; - if (flags & HAS_SSE4_1) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_sse4_1; - od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_c; - if (flags & HAS_SSE2) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_sse2; - if (flags & HAS_SSSE3) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_ssse3; - if (flags & HAS_SSE4_1) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_sse4_1; + if (flags & HAS_AVX2) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_avx2; } #endif diff --git a/media/libaom/config/win/x64/aom_config.asm b/media/libaom/config/win/x64/aom_config.asm index a6f33361c..35a1d0fa8 100644 --- a/media/libaom/config/win/x64/aom_config.asm +++ b/media/libaom/config/win/x64/aom_config.asm @@ -46,8 +46,6 @@ CONFIG_AV1 equ 1 CONFIG_STATIC_MSVCRT equ 0 CONFIG_SPATIAL_RESAMPLING equ 1 CONFIG_REALTIME_ONLY equ 0 -CONFIG_ONTHEFLY_BITPACKING equ 0 -CONFIG_ERROR_CONCEALMENT equ 0 CONFIG_SHARED equ 0 CONFIG_STATIC equ 1 CONFIG_SMALL equ 0 @@ -60,73 +58,71 @@ CONFIG_ACCOUNTING equ 0 CONFIG_INSPECTION equ 0 CONFIG_DECODE_PERF_TESTS equ 0 CONFIG_ENCODE_PERF_TESTS equ 0 +CONFIG_BITSTREAM_DEBUG equ 0 +CONFIG_SYMBOLRATE equ 0 CONFIG_COEFFICIENT_RANGE_CHECKING equ 0 CONFIG_LOWBITDEPTH equ 1 CONFIG_HIGHBITDEPTH equ 1 CONFIG_EXPERIMENTAL equ 0 CONFIG_SIZE_LIMIT equ 1 -CONFIG_COLORSPACE_HEADERS equ 0 CONFIG_FP_MB_STATS equ 0 CONFIG_CDEF equ 1 +CONFIG_CDEF_SINGLEPASS equ 1 CONFIG_VAR_TX equ 1 CONFIG_RECT_TX equ 1 CONFIG_RECT_TX_EXT equ 0 CONFIG_TPL_MV equ 0 CONFIG_DUAL_FILTER equ 1 -CONFIG_CONVOLVE_ROUND equ 0 +CONFIG_CONVOLVE_ROUND equ 1 CONFIG_COMPOUND_ROUND equ 0 CONFIG_EXT_TX equ 1 -CONFIG_DPCM_INTRA equ 0 CONFIG_TX64X64 equ 0 CONFIG_EXT_INTRA equ 1 CONFIG_INTRA_INTERP equ 0 CONFIG_FILTER_INTRA equ 0 -CONFIG_INTRA_EDGE equ 0 +CONFIG_INTRA_EDGE equ 1 CONFIG_INTRABC equ 0 -CONFIG_EXT_INTER equ 1 CONFIG_INTERINTRA equ 1 CONFIG_WEDGE equ 1 CONFIG_COMPOUND_SEGMENT equ 1 CONFIG_EXT_REFS equ 1 -CONFIG_ALTREF2 equ 0 -CONFIG_SPEED_REFS equ 0 -CONFIG_GF_GROUPS equ 0 -CONFIG_FLEX_REFS equ 0 CONFIG_GLOBAL_MOTION equ 1 CONFIG_NEW_QUANT equ 0 CONFIG_SUPERTX equ 0 CONFIG_ANS equ 0 -CONFIG_LOOP_RESTORATION equ 0 +CONFIG_LOOP_RESTORATION equ 1 +CONFIG_STRIPED_LOOP_RESTORATION equ 0 CONFIG_EXT_PARTITION equ 0 CONFIG_EXT_PARTITION_TYPES equ 0 +CONFIG_EXT_PARTITION_TYPES_AB equ 0 CONFIG_UNPOISON_PARTITION_CTX equ 0 CONFIG_EXT_TILE equ 0 CONFIG_MOTION_VAR equ 1 CONFIG_NCOBMC equ 0 CONFIG_WARPED_MOTION equ 1 CONFIG_Q_ADAPT_PROBS equ 0 -CONFIG_BITSTREAM_DEBUG equ 0 CONFIG_INTER_STATS_ONLY equ 0 -CONFIG_ALT_INTRA equ 1 -CONFIG_PALETTE equ 1 CONFIG_PALETTE_DELTA_ENCODING equ 0 CONFIG_RAWBITS equ 0 -CONFIG_EC_SMALLMUL equ 1 +CONFIG_KF_CTX equ 0 CONFIG_PVQ equ 0 CONFIG_CFL equ 0 CONFIG_XIPHRC equ 0 CONFIG_DCT_ONLY equ 0 +CONFIG_DAALA_TX equ 0 CONFIG_DAALA_DCT4 equ 0 CONFIG_DAALA_DCT8 equ 0 +CONFIG_DAALA_DCT16 equ 0 +CONFIG_DAALA_DCT32 equ 0 +CONFIG_DAALA_DCT64 equ 0 CONFIG_CB4X4 equ 1 CONFIG_CHROMA_2X2 equ 0 CONFIG_CHROMA_SUB8X8 equ 1 CONFIG_FRAME_SIZE equ 0 -CONFIG_DELTA_Q equ 1 CONFIG_EXT_DELTA_Q equ 1 CONFIG_ADAPT_SCAN equ 0 -CONFIG_FILTER_7BIT equ 1 CONFIG_PARALLEL_DEBLOCKING equ 1 +CONFIG_DEBLOCK_13TAP equ 0 CONFIG_LOOPFILTERING_ACROSS_TILES equ 1 CONFIG_TEMPMV_SIGNALING equ 1 CONFIG_RD_DEBUG equ 0 @@ -135,29 +131,46 @@ CONFIG_COEF_INTERLEAVE equ 0 CONFIG_ENTROPY_STATS equ 0 CONFIG_MASKED_TX equ 0 CONFIG_DEPENDENT_HORZTILES equ 0 -CONFIG_DIST_8X8 equ 0 -CONFIG_DAALA_DIST equ 0 -CONFIG_TRIPRED equ 0 +CONFIG_DIST_8X8 equ 1 CONFIG_PALETTE_THROUGHPUT equ 1 CONFIG_REF_ADAPT equ 0 CONFIG_LV_MAP equ 0 +CONFIG_CTX1D equ 0 CONFIG_TXK_SEL equ 0 CONFIG_MV_COMPRESS equ 1 +CONFIG_SEGMENT_ZEROMV equ 0 CONFIG_FRAME_SUPERRES equ 0 CONFIG_NEW_MULTISYMBOL equ 0 CONFIG_COMPOUND_SINGLEREF equ 0 -CONFIG_AOM_QM equ 0 +CONFIG_AOM_QM equ 1 CONFIG_ONE_SIDED_COMPOUND equ 1 -CONFIG_EXT_COMP_REFS equ 0 +CONFIG_EXT_COMP_REFS equ 1 CONFIG_SMOOTH_HV equ 1 CONFIG_VAR_REFS equ 0 -CONFIG_RECT_INTRA_PRED equ 1 CONFIG_LGT equ 0 +CONFIG_LGT_FROM_PRED equ 0 CONFIG_SBL_SYMBOL equ 0 CONFIG_NCOBMC_ADAPT_WEIGHT equ 0 CONFIG_BGSPRITE equ 0 CONFIG_VAR_TX_NO_TX_MODE equ 0 CONFIG_MRC_TX equ 0 CONFIG_LPF_DIRECT equ 0 -CONFIG_UV_LVL equ 0 +CONFIG_LOOPFILTER_LEVEL equ 0 +CONFIG_NO_FRAME_CONTEXT_SIGNALING equ 0 +CONFIG_TXMG equ 1 +CONFIG_MAX_TILE equ 0 +CONFIG_HASH_ME equ 0 +CONFIG_COLORSPACE_HEADERS equ 0 +CONFIG_MFMV equ 0 +CONFIG_FRAME_MARKER equ 0 +CONFIG_JNT_COMP equ 0 +CONFIG_FRAME_SIGN_BIAS equ 0 +CONFIG_EXT_SKIP equ 0 +CONFIG_OBU equ 0 +CONFIG_AMVR equ 0 +CONFIG_LPF_SB equ 0 +CONFIG_OPT_REF_MV equ 0 +CONFIG_TMV equ 0 +CONFIG_RESTRICT_COMPRESSED_HDR equ 0 +CONFIG_HORZONLY_FRAME_SUPERRES equ 0 CONFIG_ANALYZER equ 0 diff --git a/media/libaom/config/win/x64/aom_config.h b/media/libaom/config/win/x64/aom_config.h index ede001474..b606bf6e0 100644 --- a/media/libaom/config/win/x64/aom_config.h +++ b/media/libaom/config/win/x64/aom_config.h @@ -59,8 +59,6 @@ #define CONFIG_STATIC_MSVCRT 0 #define CONFIG_SPATIAL_RESAMPLING 1 #define CONFIG_REALTIME_ONLY 0 -#define CONFIG_ONTHEFLY_BITPACKING 0 -#define CONFIG_ERROR_CONCEALMENT 0 #define CONFIG_SHARED 0 #define CONFIG_STATIC 1 #define CONFIG_SMALL 0 @@ -73,73 +71,71 @@ #define CONFIG_INSPECTION 0 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_ENCODE_PERF_TESTS 0 +#define CONFIG_BITSTREAM_DEBUG 0 +#define CONFIG_SYMBOLRATE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_LOWBITDEPTH 1 #define CONFIG_HIGHBITDEPTH 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_SIZE_LIMIT 1 -#define CONFIG_COLORSPACE_HEADERS 0 #define CONFIG_FP_MB_STATS 0 #define CONFIG_CDEF 1 +#define CONFIG_CDEF_SINGLEPASS 1 #define CONFIG_VAR_TX 1 #define CONFIG_RECT_TX 1 #define CONFIG_RECT_TX_EXT 0 #define CONFIG_TPL_MV 0 #define CONFIG_DUAL_FILTER 1 -#define CONFIG_CONVOLVE_ROUND 0 +#define CONFIG_CONVOLVE_ROUND 1 #define CONFIG_COMPOUND_ROUND 0 #define CONFIG_EXT_TX 1 -#define CONFIG_DPCM_INTRA 0 #define CONFIG_TX64X64 0 #define CONFIG_EXT_INTRA 1 #define CONFIG_INTRA_INTERP 0 #define CONFIG_FILTER_INTRA 0 -#define CONFIG_INTRA_EDGE 0 +#define CONFIG_INTRA_EDGE 1 #define CONFIG_INTRABC 0 -#define CONFIG_EXT_INTER 1 #define CONFIG_INTERINTRA 1 #define CONFIG_WEDGE 1 #define CONFIG_COMPOUND_SEGMENT 1 #define CONFIG_EXT_REFS 1 -#define CONFIG_ALTREF2 0 -#define CONFIG_SPEED_REFS 0 -#define CONFIG_GF_GROUPS 0 -#define CONFIG_FLEX_REFS 0 #define CONFIG_GLOBAL_MOTION 1 #define CONFIG_NEW_QUANT 0 #define CONFIG_SUPERTX 0 #define CONFIG_ANS 0 -#define CONFIG_LOOP_RESTORATION 0 +#define CONFIG_LOOP_RESTORATION 1 +#define CONFIG_STRIPED_LOOP_RESTORATION 0 #define CONFIG_EXT_PARTITION 0 #define CONFIG_EXT_PARTITION_TYPES 0 +#define CONFIG_EXT_PARTITION_TYPES_AB 0 #define CONFIG_UNPOISON_PARTITION_CTX 0 #define CONFIG_EXT_TILE 0 #define CONFIG_MOTION_VAR 1 #define CONFIG_NCOBMC 0 #define CONFIG_WARPED_MOTION 1 #define CONFIG_Q_ADAPT_PROBS 0 -#define CONFIG_BITSTREAM_DEBUG 0 #define CONFIG_INTER_STATS_ONLY 0 -#define CONFIG_ALT_INTRA 1 -#define CONFIG_PALETTE 1 #define CONFIG_PALETTE_DELTA_ENCODING 0 #define CONFIG_RAWBITS 0 -#define CONFIG_EC_SMALLMUL 1 +#define CONFIG_KF_CTX 0 #define CONFIG_PVQ 0 #define CONFIG_CFL 0 #define CONFIG_XIPHRC 0 #define CONFIG_DCT_ONLY 0 +#define CONFIG_DAALA_TX 0 #define CONFIG_DAALA_DCT4 0 #define CONFIG_DAALA_DCT8 0 +#define CONFIG_DAALA_DCT16 0 +#define CONFIG_DAALA_DCT32 0 +#define CONFIG_DAALA_DCT64 0 #define CONFIG_CB4X4 1 #define CONFIG_CHROMA_2X2 0 #define CONFIG_CHROMA_SUB8X8 1 #define CONFIG_FRAME_SIZE 0 -#define CONFIG_DELTA_Q 1 #define CONFIG_EXT_DELTA_Q 1 #define CONFIG_ADAPT_SCAN 0 -#define CONFIG_FILTER_7BIT 1 #define CONFIG_PARALLEL_DEBLOCKING 1 +#define CONFIG_DEBLOCK_13TAP 0 #define CONFIG_LOOPFILTERING_ACROSS_TILES 1 #define CONFIG_TEMPMV_SIGNALING 1 #define CONFIG_RD_DEBUG 0 @@ -148,31 +144,48 @@ #define CONFIG_ENTROPY_STATS 0 #define CONFIG_MASKED_TX 0 #define CONFIG_DEPENDENT_HORZTILES 0 -#define CONFIG_DIST_8X8 0 -#define CONFIG_DAALA_DIST 0 -#define CONFIG_TRIPRED 0 +#define CONFIG_DIST_8X8 1 #define CONFIG_PALETTE_THROUGHPUT 1 #define CONFIG_REF_ADAPT 0 #define CONFIG_LV_MAP 0 +#define CONFIG_CTX1D 0 #define CONFIG_TXK_SEL 0 #define CONFIG_MV_COMPRESS 1 +#define CONFIG_SEGMENT_ZEROMV 0 #define CONFIG_FRAME_SUPERRES 0 #define CONFIG_NEW_MULTISYMBOL 0 #define CONFIG_COMPOUND_SINGLEREF 0 -#define CONFIG_AOM_QM 0 +#define CONFIG_AOM_QM 1 #define CONFIG_ONE_SIDED_COMPOUND 1 -#define CONFIG_EXT_COMP_REFS 0 +#define CONFIG_EXT_COMP_REFS 1 #define CONFIG_SMOOTH_HV 1 #define CONFIG_VAR_REFS 0 -#define CONFIG_RECT_INTRA_PRED 1 #define CONFIG_LGT 0 +#define CONFIG_LGT_FROM_PRED 0 #define CONFIG_SBL_SYMBOL 0 #define CONFIG_NCOBMC_ADAPT_WEIGHT 0 #define CONFIG_BGSPRITE 0 #define CONFIG_VAR_TX_NO_TX_MODE 0 #define CONFIG_MRC_TX 0 #define CONFIG_LPF_DIRECT 0 -#define CONFIG_UV_LVL 0 +#define CONFIG_LOOPFILTER_LEVEL 0 +#define CONFIG_NO_FRAME_CONTEXT_SIGNALING 0 +#define CONFIG_TXMG 1 +#define CONFIG_MAX_TILE 0 +#define CONFIG_HASH_ME 0 +#define CONFIG_COLORSPACE_HEADERS 0 +#define CONFIG_MFMV 0 +#define CONFIG_FRAME_MARKER 0 +#define CONFIG_JNT_COMP 0 +#define CONFIG_FRAME_SIGN_BIAS 0 +#define CONFIG_EXT_SKIP 0 +#define CONFIG_OBU 0 +#define CONFIG_AMVR 0 +#define CONFIG_LPF_SB 0 +#define CONFIG_OPT_REF_MV 0 +#define CONFIG_TMV 0 +#define CONFIG_RESTRICT_COMPRESSED_HDR 0 +#define CONFIG_HORZONLY_FRAME_SUPERRES 0 #define CONFIG_ANALYZER 0 #define DECODE_WIDTH_LIMIT 8192 #define DECODE_HEIGHT_LIMIT 4608 diff --git a/media/libaom/config/win/x64/aom_dsp_rtcd.h b/media/libaom/config/win/x64/aom_dsp_rtcd.h index 6ee856f1d..d2bee385f 100644 --- a/media/libaom/config/win/x64/aom_dsp_rtcd.h +++ b/media/libaom/config/win/x64/aom_dsp_rtcd.h @@ -20,6 +20,9 @@ extern "C" { #endif +void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx); +#define aom_blend_a64_d32_mask aom_blend_a64_d32_mask_c + void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); void aom_blend_a64_hmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); RTCD_EXTERN void (*aom_blend_a64_hmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); @@ -51,6 +54,28 @@ void aom_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, void aom_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*aom_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_hip_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_hip aom_convolve8_add_src_hip_sse2 + +void aom_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_horiz_hip aom_convolve8_add_src_horiz_hip_c + +void aom_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_vert_hip aom_convolve8_add_src_vert_hip_c + void aom_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void aom_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void aom_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -312,33 +337,41 @@ void aom_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uin #define aom_dc_128_predictor_16x16 aom_dc_128_predictor_16x16_sse2 void aom_dc_128_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_c +void aom_dc_128_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_sse2 void aom_dc_128_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_c +void aom_dc_128_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_sse2 void aom_dc_128_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c void aom_dc_128_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_32x16 aom_dc_128_predictor_32x16_c +void aom_dc_128_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_128_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_32x32 aom_dc_128_predictor_32x32_sse2 +void aom_dc_128_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_128_predictor_4x4 aom_dc_128_predictor_4x4_sse2 void aom_dc_128_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_c +void aom_dc_128_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_sse2 void aom_dc_128_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_c +void aom_dc_128_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_sse2 void aom_dc_128_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_8x4 aom_dc_128_predictor_8x4_c +void aom_dc_128_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_8x4 aom_dc_128_predictor_8x4_sse2 void aom_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -349,33 +382,41 @@ void aom_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const ui #define aom_dc_left_predictor_16x16 aom_dc_left_predictor_16x16_sse2 void aom_dc_left_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_c +void aom_dc_left_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_sse2 void aom_dc_left_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_c +void aom_dc_left_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_sse2 void aom_dc_left_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c void aom_dc_left_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_32x16 aom_dc_left_predictor_32x16_c +void aom_dc_left_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_left_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_32x32 aom_dc_left_predictor_32x32_sse2 +void aom_dc_left_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_left_predictor_4x4 aom_dc_left_predictor_4x4_sse2 void aom_dc_left_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_c +void aom_dc_left_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_sse2 void aom_dc_left_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_c +void aom_dc_left_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_sse2 void aom_dc_left_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_8x4 aom_dc_left_predictor_8x4_c +void aom_dc_left_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_8x4 aom_dc_left_predictor_8x4_sse2 void aom_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -386,33 +427,41 @@ void aom_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t #define aom_dc_predictor_16x16 aom_dc_predictor_16x16_sse2 void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_16x32 aom_dc_predictor_16x32_c +void aom_dc_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_16x32 aom_dc_predictor_16x32_sse2 void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_16x8 aom_dc_predictor_16x8_c +void aom_dc_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_16x8 aom_dc_predictor_16x8_sse2 void aom_dc_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_32x16 aom_dc_predictor_32x16_c +void aom_dc_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_32x32 aom_dc_predictor_32x32_sse2 +void aom_dc_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_predictor_4x4 aom_dc_predictor_4x4_sse2 void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_4x8 aom_dc_predictor_4x8_c +void aom_dc_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_4x8 aom_dc_predictor_4x8_sse2 void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_8x16 aom_dc_predictor_8x16_c +void aom_dc_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_8x16 aom_dc_predictor_8x16_sse2 void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_8x4 aom_dc_predictor_8x4_c +void aom_dc_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_8x4 aom_dc_predictor_8x4_sse2 void aom_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -423,33 +472,41 @@ void aom_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uin #define aom_dc_top_predictor_16x16 aom_dc_top_predictor_16x16_sse2 void aom_dc_top_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_c +void aom_dc_top_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_sse2 void aom_dc_top_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_c +void aom_dc_top_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_sse2 void aom_dc_top_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c void aom_dc_top_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_32x16 aom_dc_top_predictor_32x16_c +void aom_dc_top_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_top_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_32x32 aom_dc_top_predictor_32x32_sse2 +void aom_dc_top_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_top_predictor_4x4 aom_dc_top_predictor_4x4_sse2 void aom_dc_top_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_c +void aom_dc_top_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_sse2 void aom_dc_top_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_c +void aom_dc_top_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_sse2 void aom_dc_top_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_8x4 aom_dc_top_predictor_8x4_c +void aom_dc_top_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_8x4 aom_dc_top_predictor_8x4_sse2 void aom_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -503,33 +560,40 @@ void aom_h_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t #define aom_h_predictor_16x16 aom_h_predictor_16x16_sse2 void aom_h_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_16x32 aom_h_predictor_16x32_c +void aom_h_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_16x32 aom_h_predictor_16x32_sse2 void aom_h_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_16x8 aom_h_predictor_16x8_c +void aom_h_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_16x8 aom_h_predictor_16x8_sse2 void aom_h_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_h_predictor_2x2 aom_h_predictor_2x2_c void aom_h_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_32x16 aom_h_predictor_32x16_c +void aom_h_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_32x16 aom_h_predictor_32x16_sse2 void aom_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_32x32 aom_h_predictor_32x32_sse2 +void aom_h_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_h_predictor_4x4 aom_h_predictor_4x4_sse2 void aom_h_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_4x8 aom_h_predictor_4x8_c +void aom_h_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_4x8 aom_h_predictor_4x8_sse2 void aom_h_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_8x16 aom_h_predictor_8x16_c +void aom_h_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_8x16 aom_h_predictor_8x16_sse2 void aom_h_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_8x4 aom_h_predictor_8x4_c +void aom_h_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_8x4 aom_h_predictor_8x4_sse2 void aom_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -1455,6 +1519,26 @@ void aom_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t void aom_highbd_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); RTCD_EXTERN void (*aom_highbd_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src aom_highbd_convolve8_add_src_sse2 + +void aom_highbd_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_hip_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +RTCD_EXTERN void (*aom_highbd_convolve8_add_src_hip)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); + +void aom_highbd_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz aom_highbd_convolve8_add_src_horiz_c + +void aom_highbd_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz_hip aom_highbd_convolve8_add_src_horiz_hip_c + +void aom_highbd_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert aom_highbd_convolve8_add_src_vert_c + +void aom_highbd_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert_hip aom_highbd_convolve8_add_src_vert_hip_c + void aom_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_convolve8_avg_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); @@ -1491,7 +1575,8 @@ void aom_highbd_convolve_copy_avx2(const uint8_t *src, ptrdiff_t src_stride, uin RTCD_EXTERN void (*aom_highbd_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_d117_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_16x16 aom_highbd_d117_predictor_16x16_c +void aom_highbd_d117_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d117_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d117_predictor_16x32 aom_highbd_d117_predictor_16x32_c @@ -1506,10 +1591,12 @@ void aom_highbd_d117_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d117_predictor_32x16 aom_highbd_d117_predictor_32x16_c void aom_highbd_d117_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_32x32 aom_highbd_d117_predictor_32x32_c +void aom_highbd_d117_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d117_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_4x4 aom_highbd_d117_predictor_4x4_c +void aom_highbd_d117_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d117_predictor_4x4 aom_highbd_d117_predictor_4x4_sse2 void aom_highbd_d117_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d117_predictor_4x8 aom_highbd_d117_predictor_4x8_c @@ -1521,10 +1608,12 @@ void aom_highbd_d117_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d117_predictor_8x4 aom_highbd_d117_predictor_8x4_c void aom_highbd_d117_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_8x8 aom_highbd_d117_predictor_8x8_c +void aom_highbd_d117_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_16x16 aom_highbd_d135_predictor_16x16_c +void aom_highbd_d135_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d135_predictor_16x32 aom_highbd_d135_predictor_16x32_c @@ -1539,10 +1628,12 @@ void aom_highbd_d135_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d135_predictor_32x16 aom_highbd_d135_predictor_32x16_c void aom_highbd_d135_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_32x32 aom_highbd_d135_predictor_32x32_c +void aom_highbd_d135_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_4x4 aom_highbd_d135_predictor_4x4_c +void aom_highbd_d135_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d135_predictor_4x4 aom_highbd_d135_predictor_4x4_sse2 void aom_highbd_d135_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d135_predictor_4x8 aom_highbd_d135_predictor_4x8_c @@ -1554,10 +1645,12 @@ void aom_highbd_d135_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d135_predictor_8x4 aom_highbd_d135_predictor_8x4_c void aom_highbd_d135_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_8x8 aom_highbd_d135_predictor_8x8_c +void aom_highbd_d135_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_16x16 aom_highbd_d153_predictor_16x16_c +void aom_highbd_d153_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d153_predictor_16x32 aom_highbd_d153_predictor_16x32_c @@ -1572,10 +1665,12 @@ void aom_highbd_d153_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d153_predictor_32x16 aom_highbd_d153_predictor_32x16_c void aom_highbd_d153_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_32x32 aom_highbd_d153_predictor_32x32_c +void aom_highbd_d153_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_4x4 aom_highbd_d153_predictor_4x4_c +void aom_highbd_d153_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d153_predictor_4x4 aom_highbd_d153_predictor_4x4_sse2 void aom_highbd_d153_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d153_predictor_4x8 aom_highbd_d153_predictor_4x8_c @@ -1587,7 +1682,8 @@ void aom_highbd_d153_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d153_predictor_8x4 aom_highbd_d153_predictor_8x4_c void aom_highbd_d153_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_8x8 aom_highbd_d153_predictor_8x8_c +void aom_highbd_d153_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d207e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d207e_predictor_16x16 aom_highbd_d207e_predictor_16x16_c @@ -1623,37 +1719,47 @@ void aom_highbd_d207e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const u #define aom_highbd_d207e_predictor_8x8 aom_highbd_d207e_predictor_8x8_c void aom_highbd_d45e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x16 aom_highbd_d45e_predictor_16x16_c +void aom_highbd_d45e_predictor_16x16_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x32 aom_highbd_d45e_predictor_16x32_c +void aom_highbd_d45e_predictor_16x32_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x8 aom_highbd_d45e_predictor_16x8_c +void aom_highbd_d45e_predictor_16x8_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d45e_predictor_2x2 aom_highbd_d45e_predictor_2x2_c void aom_highbd_d45e_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_32x16 aom_highbd_d45e_predictor_32x16_c +void aom_highbd_d45e_predictor_32x16_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_32x32 aom_highbd_d45e_predictor_32x32_c +void aom_highbd_d45e_predictor_32x32_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_4x4 aom_highbd_d45e_predictor_4x4_c +void aom_highbd_d45e_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_4x4 aom_highbd_d45e_predictor_4x4_sse2 void aom_highbd_d45e_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_4x8 aom_highbd_d45e_predictor_4x8_c +void aom_highbd_d45e_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_4x8 aom_highbd_d45e_predictor_4x8_sse2 void aom_highbd_d45e_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x16 aom_highbd_d45e_predictor_8x16_c +void aom_highbd_d45e_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_8x16 aom_highbd_d45e_predictor_8x16_sse2 void aom_highbd_d45e_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x4 aom_highbd_d45e_predictor_8x4_c +void aom_highbd_d45e_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_8x4 aom_highbd_d45e_predictor_8x4_sse2 void aom_highbd_d45e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x8 aom_highbd_d45e_predictor_8x8_c +void aom_highbd_d45e_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_8x8 aom_highbd_d45e_predictor_8x8_sse2 void aom_highbd_d63e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d63e_predictor_16x16 aom_highbd_d63e_predictor_16x16_c @@ -1689,86 +1795,109 @@ void aom_highbd_d63e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d63e_predictor_8x8 aom_highbd_d63e_predictor_8x8_c void aom_highbd_dc_128_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x16 aom_highbd_dc_128_predictor_16x16_c +void aom_highbd_dc_128_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_16x16 aom_highbd_dc_128_predictor_16x16_sse2 void aom_highbd_dc_128_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x32 aom_highbd_dc_128_predictor_16x32_c +void aom_highbd_dc_128_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_16x32 aom_highbd_dc_128_predictor_16x32_sse2 void aom_highbd_dc_128_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x8 aom_highbd_dc_128_predictor_16x8_c +void aom_highbd_dc_128_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_16x8 aom_highbd_dc_128_predictor_16x8_sse2 void aom_highbd_dc_128_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_128_predictor_2x2 aom_highbd_dc_128_predictor_2x2_c void aom_highbd_dc_128_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_32x16 aom_highbd_dc_128_predictor_32x16_c +void aom_highbd_dc_128_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_32x16 aom_highbd_dc_128_predictor_32x16_sse2 void aom_highbd_dc_128_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_32x32 aom_highbd_dc_128_predictor_32x32_c +void aom_highbd_dc_128_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_32x32 aom_highbd_dc_128_predictor_32x32_sse2 void aom_highbd_dc_128_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_4x4 aom_highbd_dc_128_predictor_4x4_c +void aom_highbd_dc_128_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_4x4 aom_highbd_dc_128_predictor_4x4_sse2 void aom_highbd_dc_128_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_4x8 aom_highbd_dc_128_predictor_4x8_c +void aom_highbd_dc_128_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_4x8 aom_highbd_dc_128_predictor_4x8_sse2 void aom_highbd_dc_128_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x16 aom_highbd_dc_128_predictor_8x16_c +void aom_highbd_dc_128_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_8x16 aom_highbd_dc_128_predictor_8x16_sse2 void aom_highbd_dc_128_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x4 aom_highbd_dc_128_predictor_8x4_c +void aom_highbd_dc_128_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_8x4 aom_highbd_dc_128_predictor_8x4_sse2 void aom_highbd_dc_128_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x8 aom_highbd_dc_128_predictor_8x8_c +void aom_highbd_dc_128_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_8x8 aom_highbd_dc_128_predictor_8x8_sse2 void aom_highbd_dc_left_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x16 aom_highbd_dc_left_predictor_16x16_c +void aom_highbd_dc_left_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_16x16 aom_highbd_dc_left_predictor_16x16_sse2 void aom_highbd_dc_left_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x32 aom_highbd_dc_left_predictor_16x32_c +void aom_highbd_dc_left_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_16x32 aom_highbd_dc_left_predictor_16x32_sse2 void aom_highbd_dc_left_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x8 aom_highbd_dc_left_predictor_16x8_c +void aom_highbd_dc_left_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_16x8 aom_highbd_dc_left_predictor_16x8_sse2 void aom_highbd_dc_left_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_left_predictor_2x2 aom_highbd_dc_left_predictor_2x2_c void aom_highbd_dc_left_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_32x16 aom_highbd_dc_left_predictor_32x16_c +void aom_highbd_dc_left_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_32x16 aom_highbd_dc_left_predictor_32x16_sse2 void aom_highbd_dc_left_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_32x32 aom_highbd_dc_left_predictor_32x32_c +void aom_highbd_dc_left_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_32x32 aom_highbd_dc_left_predictor_32x32_sse2 void aom_highbd_dc_left_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_4x4 aom_highbd_dc_left_predictor_4x4_c +void aom_highbd_dc_left_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_4x4 aom_highbd_dc_left_predictor_4x4_sse2 void aom_highbd_dc_left_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_4x8 aom_highbd_dc_left_predictor_4x8_c +void aom_highbd_dc_left_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_4x8 aom_highbd_dc_left_predictor_4x8_sse2 void aom_highbd_dc_left_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x16 aom_highbd_dc_left_predictor_8x16_c +void aom_highbd_dc_left_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_8x16 aom_highbd_dc_left_predictor_8x16_sse2 void aom_highbd_dc_left_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x4 aom_highbd_dc_left_predictor_8x4_c +void aom_highbd_dc_left_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_8x4 aom_highbd_dc_left_predictor_8x4_sse2 void aom_highbd_dc_left_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x8 aom_highbd_dc_left_predictor_8x8_c +void aom_highbd_dc_left_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_8x8 aom_highbd_dc_left_predictor_8x8_sse2 void aom_highbd_dc_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_predictor_16x16 aom_highbd_dc_predictor_16x16_sse2 void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_16x32 aom_highbd_dc_predictor_16x32_c +void aom_highbd_dc_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_16x32 aom_highbd_dc_predictor_16x32_sse2 void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_16x8 aom_highbd_dc_predictor_16x8_c +void aom_highbd_dc_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_16x8 aom_highbd_dc_predictor_16x8_sse2 void aom_highbd_dc_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_predictor_2x2 aom_highbd_dc_predictor_2x2_c void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_32x16 aom_highbd_dc_predictor_32x16_c +void aom_highbd_dc_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_32x16 aom_highbd_dc_predictor_32x16_sse2 void aom_highbd_dc_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -1779,50 +1908,63 @@ void aom_highbd_dc_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const u #define aom_highbd_dc_predictor_4x4 aom_highbd_dc_predictor_4x4_sse2 void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_4x8 aom_highbd_dc_predictor_4x8_c +void aom_highbd_dc_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_4x8 aom_highbd_dc_predictor_4x8_sse2 void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_8x16 aom_highbd_dc_predictor_8x16_c +void aom_highbd_dc_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_8x16 aom_highbd_dc_predictor_8x16_sse2 void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_8x4 aom_highbd_dc_predictor_8x4_c +void aom_highbd_dc_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_8x4 aom_highbd_dc_predictor_8x4_sse2 void aom_highbd_dc_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_predictor_8x8 aom_highbd_dc_predictor_8x8_sse2 void aom_highbd_dc_top_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x16 aom_highbd_dc_top_predictor_16x16_c +void aom_highbd_dc_top_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_16x16 aom_highbd_dc_top_predictor_16x16_sse2 void aom_highbd_dc_top_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x32 aom_highbd_dc_top_predictor_16x32_c +void aom_highbd_dc_top_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_16x32 aom_highbd_dc_top_predictor_16x32_sse2 void aom_highbd_dc_top_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x8 aom_highbd_dc_top_predictor_16x8_c +void aom_highbd_dc_top_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_16x8 aom_highbd_dc_top_predictor_16x8_sse2 void aom_highbd_dc_top_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_top_predictor_2x2 aom_highbd_dc_top_predictor_2x2_c void aom_highbd_dc_top_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_32x16 aom_highbd_dc_top_predictor_32x16_c +void aom_highbd_dc_top_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_32x16 aom_highbd_dc_top_predictor_32x16_sse2 void aom_highbd_dc_top_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_32x32 aom_highbd_dc_top_predictor_32x32_c +void aom_highbd_dc_top_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_32x32 aom_highbd_dc_top_predictor_32x32_sse2 void aom_highbd_dc_top_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_4x4 aom_highbd_dc_top_predictor_4x4_c +void aom_highbd_dc_top_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_4x4 aom_highbd_dc_top_predictor_4x4_sse2 void aom_highbd_dc_top_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_4x8 aom_highbd_dc_top_predictor_4x8_c +void aom_highbd_dc_top_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_4x8 aom_highbd_dc_top_predictor_4x8_sse2 void aom_highbd_dc_top_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x16 aom_highbd_dc_top_predictor_8x16_c +void aom_highbd_dc_top_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_8x16 aom_highbd_dc_top_predictor_8x16_sse2 void aom_highbd_dc_top_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x4 aom_highbd_dc_top_predictor_8x4_c +void aom_highbd_dc_top_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_8x4 aom_highbd_dc_top_predictor_8x4_sse2 void aom_highbd_dc_top_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x8 aom_highbd_dc_top_predictor_8x8_c +void aom_highbd_dc_top_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_8x8 aom_highbd_dc_top_predictor_8x8_sse2 void aom_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride); void aom_highbd_fdct16x16_sse2(const int16_t *input, tran_low_t *output, int stride); @@ -1845,37 +1987,47 @@ void aom_highbd_fdct8x8_sse2(const int16_t *input, tran_low_t *output, int strid #define aom_highbd_fdct8x8 aom_highbd_fdct8x8_sse2 void aom_highbd_h_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x16 aom_highbd_h_predictor_16x16_c +void aom_highbd_h_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_16x16 aom_highbd_h_predictor_16x16_sse2 void aom_highbd_h_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x32 aom_highbd_h_predictor_16x32_c +void aom_highbd_h_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_16x32 aom_highbd_h_predictor_16x32_sse2 void aom_highbd_h_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x8 aom_highbd_h_predictor_16x8_c +void aom_highbd_h_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_16x8 aom_highbd_h_predictor_16x8_sse2 void aom_highbd_h_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_h_predictor_2x2 aom_highbd_h_predictor_2x2_c void aom_highbd_h_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_32x16 aom_highbd_h_predictor_32x16_c +void aom_highbd_h_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_32x16 aom_highbd_h_predictor_32x16_sse2 void aom_highbd_h_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_32x32 aom_highbd_h_predictor_32x32_c +void aom_highbd_h_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_32x32 aom_highbd_h_predictor_32x32_sse2 void aom_highbd_h_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_4x4 aom_highbd_h_predictor_4x4_c +void aom_highbd_h_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_4x4 aom_highbd_h_predictor_4x4_sse2 void aom_highbd_h_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_4x8 aom_highbd_h_predictor_4x8_c +void aom_highbd_h_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_4x8 aom_highbd_h_predictor_4x8_sse2 void aom_highbd_h_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x16 aom_highbd_h_predictor_8x16_c +void aom_highbd_h_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_8x16 aom_highbd_h_predictor_8x16_sse2 void aom_highbd_h_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x4 aom_highbd_h_predictor_8x4_c +void aom_highbd_h_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_8x4 aom_highbd_h_predictor_8x4_sse2 void aom_highbd_h_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x8 aom_highbd_h_predictor_8x8_c +void aom_highbd_h_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_8x8 aom_highbd_h_predictor_8x8_sse2 void aom_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); #define aom_highbd_iwht4x4_16_add aom_highbd_iwht4x4_16_add_c @@ -1889,7 +2041,8 @@ void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int pitch, const uint8_t *bli void aom_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); -#define aom_highbd_lpf_horizontal_4_dual aom_highbd_lpf_horizontal_4_dual_sse2 +void aom_highbd_lpf_horizontal_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_horizontal_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1897,11 +2050,13 @@ void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int pitch, const uint8_t *bli void aom_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); -#define aom_highbd_lpf_horizontal_8_dual aom_highbd_lpf_horizontal_8_dual_sse2 +void aom_highbd_lpf_horizontal_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_horizontal_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); -#define aom_highbd_lpf_horizontal_edge_16 aom_highbd_lpf_horizontal_edge_16_sse2 +void aom_highbd_lpf_horizontal_edge_16_avx2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_horizontal_edge_16)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1913,7 +2068,8 @@ void aom_highbd_lpf_vertical_16_sse2(uint16_t *s, int pitch, const uint8_t *blim void aom_highbd_lpf_vertical_16_dual_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); -#define aom_highbd_lpf_vertical_16_dual aom_highbd_lpf_vertical_16_dual_sse2 +void aom_highbd_lpf_vertical_16_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_vertical_16_dual)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_4_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1921,7 +2077,8 @@ void aom_highbd_lpf_vertical_4_sse2(uint16_t *s, int pitch, const uint8_t *blimi void aom_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); -#define aom_highbd_lpf_vertical_4_dual aom_highbd_lpf_vertical_4_dual_sse2 +void aom_highbd_lpf_vertical_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_vertical_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1929,7 +2086,8 @@ void aom_highbd_lpf_vertical_8_sse2(uint16_t *s, int pitch, const uint8_t *blimi void aom_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); -#define aom_highbd_lpf_vertical_8_dual aom_highbd_lpf_vertical_8_dual_sse2 +void aom_highbd_lpf_vertical_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_vertical_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); unsigned int aom_highbd_masked_sad16x16_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); unsigned int aom_highbd_masked_sad16x16_ssse3(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); @@ -2510,16 +2668,19 @@ void aom_highbd_v_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_v_predictor_16x16 aom_highbd_v_predictor_16x16_sse2 void aom_highbd_v_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_16x32 aom_highbd_v_predictor_16x32_c +void aom_highbd_v_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_16x32 aom_highbd_v_predictor_16x32_sse2 void aom_highbd_v_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_16x8 aom_highbd_v_predictor_16x8_c +void aom_highbd_v_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_16x8 aom_highbd_v_predictor_16x8_sse2 void aom_highbd_v_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_v_predictor_2x2 aom_highbd_v_predictor_2x2_c void aom_highbd_v_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_32x16 aom_highbd_v_predictor_32x16_c +void aom_highbd_v_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_32x16 aom_highbd_v_predictor_32x16_sse2 void aom_highbd_v_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -2530,13 +2691,16 @@ void aom_highbd_v_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_v_predictor_4x4 aom_highbd_v_predictor_4x4_sse2 void aom_highbd_v_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_4x8 aom_highbd_v_predictor_4x8_c +void aom_highbd_v_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_4x8 aom_highbd_v_predictor_4x8_sse2 void aom_highbd_v_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_8x16 aom_highbd_v_predictor_8x16_c +void aom_highbd_v_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_8x16 aom_highbd_v_predictor_8x16_sse2 void aom_highbd_v_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_8x4 aom_highbd_v_predictor_8x4_c +void aom_highbd_v_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_8x4 aom_highbd_v_predictor_8x4_sse2 void aom_highbd_v_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -2626,50 +2790,43 @@ void aom_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, con #define aom_lpf_horizontal_4 aom_lpf_horizontal_4_sse2 void aom_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define aom_lpf_horizontal_4_dual aom_lpf_horizontal_4_dual_sse2 +#define aom_lpf_horizontal_4_dual aom_lpf_horizontal_4_dual_c void aom_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); #define aom_lpf_horizontal_8 aom_lpf_horizontal_8_sse2 void aom_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_horizontal_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define aom_lpf_horizontal_8_dual aom_lpf_horizontal_8_dual_sse2 +#define aom_lpf_horizontal_8_dual aom_lpf_horizontal_8_dual_c void aom_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_edge_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_edge_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_horizontal_edge_16 aom_lpf_horizontal_edge_16_sse2 void aom_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_edge_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_edge_8_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_horizontal_edge_8 aom_lpf_horizontal_edge_8_sse2 void aom_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); #define aom_lpf_vertical_16 aom_lpf_vertical_16_sse2 void aom_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_vertical_16_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define aom_lpf_vertical_16_dual aom_lpf_vertical_16_dual_sse2 +#define aom_lpf_vertical_16_dual aom_lpf_vertical_16_dual_c void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); #define aom_lpf_vertical_4 aom_lpf_vertical_4_sse2 void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define aom_lpf_vertical_4_dual aom_lpf_vertical_4_dual_sse2 +#define aom_lpf_vertical_4_dual aom_lpf_vertical_4_dual_c void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); #define aom_lpf_vertical_8 aom_lpf_vertical_8_sse2 void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define aom_lpf_vertical_8_dual aom_lpf_vertical_8_dual_sse2 +#define aom_lpf_vertical_8_dual aom_lpf_vertical_8_dual_c unsigned int aom_masked_sad16x16_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); unsigned int aom_masked_sad16x16_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); @@ -2940,37 +3097,52 @@ unsigned int aom_obmc_variance8x8_sse4_1(const uint8_t *pre, int pre_stride, con RTCD_EXTERN unsigned int (*aom_obmc_variance8x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); void aom_paeth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_c +void aom_paeth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_c +void aom_paeth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_c +void aom_paeth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x8_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c void aom_paeth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_c +void aom_paeth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_c +void aom_paeth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_c +void aom_paeth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_c +void aom_paeth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_c +void aom_paeth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_c +void aom_paeth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_c +void aom_paeth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void aom_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -3273,37 +3445,47 @@ void aom_smooth_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_ #define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_c void aom_smooth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x16 aom_smooth_predictor_16x16_c +void aom_smooth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x32 aom_smooth_predictor_16x32_c +void aom_smooth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x8 aom_smooth_predictor_16x8_c +void aom_smooth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c void aom_smooth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_32x16 aom_smooth_predictor_32x16_c +void aom_smooth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_32x32 aom_smooth_predictor_32x32_c +void aom_smooth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_4x4 aom_smooth_predictor_4x4_c +void aom_smooth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_4x8 aom_smooth_predictor_4x8_c +void aom_smooth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x16 aom_smooth_predictor_8x16_c +void aom_smooth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x4 aom_smooth_predictor_8x4_c +void aom_smooth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x8 aom_smooth_predictor_8x8_c +void aom_smooth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_c @@ -3493,33 +3675,41 @@ void aom_v_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t #define aom_v_predictor_16x16 aom_v_predictor_16x16_sse2 void aom_v_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_16x32 aom_v_predictor_16x32_c +void aom_v_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_16x32 aom_v_predictor_16x32_sse2 void aom_v_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_16x8 aom_v_predictor_16x8_c +void aom_v_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_16x8 aom_v_predictor_16x8_sse2 void aom_v_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_v_predictor_2x2 aom_v_predictor_2x2_c void aom_v_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_32x16 aom_v_predictor_32x16_c +void aom_v_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_v_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_32x32 aom_v_predictor_32x32_sse2 +void aom_v_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_v_predictor_4x4 aom_v_predictor_4x4_sse2 void aom_v_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_4x8 aom_v_predictor_4x8_c +void aom_v_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_4x8 aom_v_predictor_4x8_sse2 void aom_v_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_8x16 aom_v_predictor_8x16_c +void aom_v_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_8x16 aom_v_predictor_8x16_sse2 void aom_v_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_8x4 aom_v_predictor_8x4_c +void aom_v_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_8x4 aom_v_predictor_8x4_sse2 void aom_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -3626,6 +3816,12 @@ static void setup_rtcd_internal(void) aom_convolve8 = aom_convolve8_sse2; if (flags & HAS_SSSE3) aom_convolve8 = aom_convolve8_ssse3; if (flags & HAS_AVX2) aom_convolve8 = aom_convolve8_avx2; + aom_convolve8_add_src = aom_convolve8_add_src_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src = aom_convolve8_add_src_ssse3; + aom_convolve8_add_src_horiz = aom_convolve8_add_src_horiz_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src_horiz = aom_convolve8_add_src_horiz_ssse3; + aom_convolve8_add_src_vert = aom_convolve8_add_src_vert_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src_vert = aom_convolve8_add_src_vert_ssse3; aom_convolve8_avg = aom_convolve8_avg_sse2; if (flags & HAS_SSSE3) aom_convolve8_avg = aom_convolve8_avg_ssse3; aom_convolve8_avg_horiz = aom_convolve8_avg_horiz_sse2; @@ -3648,6 +3844,22 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_d153_predictor_8x8 = aom_d153_predictor_8x8_ssse3; aom_d63e_predictor_4x4 = aom_d63e_predictor_4x4_c; if (flags & HAS_SSSE3) aom_d63e_predictor_4x4 = aom_d63e_predictor_4x4_ssse3; + aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_avx2; + aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_avx2; + aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_avx2; + aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_avx2; + aom_dc_predictor_32x16 = aom_dc_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_predictor_32x16 = aom_dc_predictor_32x16_avx2; + aom_dc_predictor_32x32 = aom_dc_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_predictor_32x32 = aom_dc_predictor_32x32_avx2; + aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_avx2; + aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_avx2; aom_fdct32x32 = aom_fdct32x32_sse2; if (flags & HAS_AVX2) aom_fdct32x32 = aom_fdct32x32_avx2; aom_fdct32x32_rd = aom_fdct32x32_rd_sse2; @@ -3656,6 +3868,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_fdct8x8 = aom_fdct8x8_ssse3; aom_get16x16var = aom_get16x16var_sse2; if (flags & HAS_AVX2) aom_get16x16var = aom_get16x16var_avx2; + aom_h_predictor_32x32 = aom_h_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_h_predictor_32x32 = aom_h_predictor_32x32_avx2; aom_hadamard_8x8 = aom_hadamard_8x8_sse2; if (flags & HAS_SSSE3) aom_hadamard_8x8 = aom_hadamard_8x8_ssse3; aom_highbd_10_masked_sub_pixel_variance16x16 = aom_highbd_10_masked_sub_pixel_variance16x16_c; @@ -3814,6 +4028,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) aom_highbd_blend_a64_vmask = aom_highbd_blend_a64_vmask_sse4_1; aom_highbd_convolve8 = aom_highbd_convolve8_sse2; if (flags & HAS_AVX2) aom_highbd_convolve8 = aom_highbd_convolve8_avx2; + aom_highbd_convolve8_add_src_hip = aom_highbd_convolve8_add_src_hip_c; + if (flags & HAS_SSSE3) aom_highbd_convolve8_add_src_hip = aom_highbd_convolve8_add_src_hip_ssse3; aom_highbd_convolve8_avg = aom_highbd_convolve8_avg_sse2; if (flags & HAS_AVX2) aom_highbd_convolve8_avg = aom_highbd_convolve8_avg_avx2; aom_highbd_convolve8_avg_horiz = aom_highbd_convolve8_avg_horiz_sse2; @@ -3828,6 +4044,46 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) aom_highbd_convolve_avg = aom_highbd_convolve_avg_avx2; aom_highbd_convolve_copy = aom_highbd_convolve_copy_sse2; if (flags & HAS_AVX2) aom_highbd_convolve_copy = aom_highbd_convolve_copy_avx2; + aom_highbd_d117_predictor_16x16 = aom_highbd_d117_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_16x16 = aom_highbd_d117_predictor_16x16_ssse3; + aom_highbd_d117_predictor_32x32 = aom_highbd_d117_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_32x32 = aom_highbd_d117_predictor_32x32_ssse3; + aom_highbd_d117_predictor_8x8 = aom_highbd_d117_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_8x8 = aom_highbd_d117_predictor_8x8_ssse3; + aom_highbd_d135_predictor_16x16 = aom_highbd_d135_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_16x16 = aom_highbd_d135_predictor_16x16_ssse3; + aom_highbd_d135_predictor_32x32 = aom_highbd_d135_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_32x32 = aom_highbd_d135_predictor_32x32_ssse3; + aom_highbd_d135_predictor_8x8 = aom_highbd_d135_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_8x8 = aom_highbd_d135_predictor_8x8_ssse3; + aom_highbd_d153_predictor_16x16 = aom_highbd_d153_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_16x16 = aom_highbd_d153_predictor_16x16_ssse3; + aom_highbd_d153_predictor_32x32 = aom_highbd_d153_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_32x32 = aom_highbd_d153_predictor_32x32_ssse3; + aom_highbd_d153_predictor_8x8 = aom_highbd_d153_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_8x8 = aom_highbd_d153_predictor_8x8_ssse3; + aom_highbd_d45e_predictor_16x16 = aom_highbd_d45e_predictor_16x16_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x16 = aom_highbd_d45e_predictor_16x16_avx2; + aom_highbd_d45e_predictor_16x32 = aom_highbd_d45e_predictor_16x32_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x32 = aom_highbd_d45e_predictor_16x32_avx2; + aom_highbd_d45e_predictor_16x8 = aom_highbd_d45e_predictor_16x8_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x8 = aom_highbd_d45e_predictor_16x8_avx2; + aom_highbd_d45e_predictor_32x16 = aom_highbd_d45e_predictor_32x16_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_32x16 = aom_highbd_d45e_predictor_32x16_avx2; + aom_highbd_d45e_predictor_32x32 = aom_highbd_d45e_predictor_32x32_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_32x32 = aom_highbd_d45e_predictor_32x32_avx2; + aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_avx2; + aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_avx2; + aom_highbd_lpf_horizontal_edge_16 = aom_highbd_lpf_horizontal_edge_16_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_edge_16 = aom_highbd_lpf_horizontal_edge_16_avx2; + aom_highbd_lpf_vertical_16_dual = aom_highbd_lpf_vertical_16_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_16_dual = aom_highbd_lpf_vertical_16_dual_avx2; + aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_avx2; + aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_avx2; aom_highbd_masked_sad16x16 = aom_highbd_masked_sad16x16_c; if (flags & HAS_SSSE3) aom_highbd_masked_sad16x16 = aom_highbd_masked_sad16x16_ssse3; aom_highbd_masked_sad16x32 = aom_highbd_masked_sad16x32_c; @@ -3979,10 +4235,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_idct8x8_12_add = aom_idct8x8_12_add_ssse3; aom_idct8x8_64_add = aom_idct8x8_64_add_sse2; if (flags & HAS_SSSE3) aom_idct8x8_64_add = aom_idct8x8_64_add_ssse3; - aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_sse2; - if (flags & HAS_AVX2) aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_avx2; - aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_sse2; - if (flags & HAS_AVX2) aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_avx2; aom_masked_sad16x16 = aom_masked_sad16x16_c; if (flags & HAS_SSSE3) aom_masked_sad16x16 = aom_masked_sad16x16_ssse3; aom_masked_sad16x32 = aom_masked_sad16x32_c; @@ -4089,6 +4341,31 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) aom_obmc_variance8x4 = aom_obmc_variance8x4_sse4_1; aom_obmc_variance8x8 = aom_obmc_variance8x8_c; if (flags & HAS_SSE4_1) aom_obmc_variance8x8 = aom_obmc_variance8x8_sse4_1; + aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_avx2; + aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_avx2; + aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_avx2; + aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_avx2; + aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_avx2; + aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_ssse3; + aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_ssse3; + aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_ssse3; + aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_ssse3; + aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_ssse3; aom_quantize_b = aom_quantize_b_sse2; if (flags & HAS_SSSE3) aom_quantize_b = aom_quantize_b_ssse3; if (flags & HAS_AVX) aom_quantize_b = aom_quantize_b_avx; @@ -4147,6 +4424,26 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) aom_sad8x8x8 = aom_sad8x8x8_sse4_1; aom_scaled_2d = aom_scaled_2d_c; if (flags & HAS_SSSE3) aom_scaled_2d = aom_scaled_2d_ssse3; + aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_ssse3; + aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_ssse3; + aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_ssse3; + aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_ssse3; + aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_ssse3; + aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_ssse3; + aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_ssse3; + aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_ssse3; + aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_ssse3; + aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_ssse3; aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_ssse3; aom_sub_pixel_avg_variance16x32 = aom_sub_pixel_avg_variance16x32_sse2; @@ -4203,6 +4500,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_sub_pixel_variance8x4 = aom_sub_pixel_variance8x4_ssse3; aom_sub_pixel_variance8x8 = aom_sub_pixel_variance8x8_sse2; if (flags & HAS_SSSE3) aom_sub_pixel_variance8x8 = aom_sub_pixel_variance8x8_ssse3; + aom_v_predictor_32x16 = aom_v_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_v_predictor_32x16 = aom_v_predictor_32x16_avx2; + aom_v_predictor_32x32 = aom_v_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_v_predictor_32x32 = aom_v_predictor_32x32_avx2; aom_variance16x16 = aom_variance16x16_sse2; if (flags & HAS_AVX2) aom_variance16x16 = aom_variance16x16_avx2; aom_variance32x16 = aom_variance32x16_sse2; diff --git a/media/libaom/config/win/x64/av1_rtcd.h b/media/libaom/config/win/x64/av1_rtcd.h index d4fc99c10..66c0349aa 100644 --- a/media/libaom/config/win/x64/av1_rtcd.h +++ b/media/libaom/config/win/x64/av1_rtcd.h @@ -31,44 +31,39 @@ struct search_site_config; struct mv; union int_mv; struct yv12_buffer_config; -typedef uint16_t od_dering_in; #ifdef __cplusplus extern "C" { #endif -void aom_clpf_block_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_sse2(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_ssse3(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_sse4_1(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_block)(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_block_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_sse2(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_ssse3(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_block_hbd_sse4_1(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_block_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_hblock_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_sse2(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_ssse3(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_sse4_1(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_hblock)(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); - -void aom_clpf_hblock_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_sse2(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_ssse3(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -void aom_clpf_hblock_hbd_sse4_1(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); -RTCD_EXTERN void (*aom_clpf_hblock_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void apply_selfguided_restoration_c(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); +void apply_selfguided_restoration_sse4_1(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); +RTCD_EXTERN void (*apply_selfguided_restoration)(uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf); + +void apply_selfguided_restoration_highbd_c(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); +void apply_selfguided_restoration_highbd_sse4_1(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); +RTCD_EXTERN void (*apply_selfguided_restoration_highbd)(uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf); int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); int64_t av1_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); RTCD_EXTERN int64_t (*av1_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params); +void av1_convolve_2d_sse2(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params); +#define av1_convolve_2d av1_convolve_2d_sse2 + +void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); +void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); +RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params); + void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); RTCD_EXTERN void (*av1_convolve_horiz)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); +void av1_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); +void av1_convolve_rounding_avx2(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); +RTCD_EXTERN void (*av1_convolve_rounding)(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits); + void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); void av1_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); RTCD_EXTERN void (*av1_convolve_vert)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); @@ -76,9 +71,6 @@ RTCD_EXTERN void (*av1_convolve_vert)(const uint8_t *src, int src_stride, uint8_ int av1_diamond_search_sad_c(struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); #define av1_diamond_search_sad av1_diamond_search_sad_c -void av1_fdct8x8_quant_c(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -#define av1_fdct8x8_quant av1_fdct8x8_quant_c - void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); void av1_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); @@ -133,6 +125,14 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, struct t void av1_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht8x8 av1_fht8x8_sse2 +void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength); +void av1_filter_intra_edge_sse4_1(uint8_t *p, int sz, int strength); +RTCD_EXTERN void (*av1_filter_intra_edge)(uint8_t *p, int sz, int strength); + +void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength); +void av1_filter_intra_edge_high_sse4_1(uint16_t *p, int sz, int strength); +RTCD_EXTERN void (*av1_filter_intra_edge_high)(uint16_t *p, int sz, int strength); + int av1_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); #define av1_full_range_search av1_full_range_search_c @@ -141,46 +141,42 @@ int av1_full_search_sadx3(const struct macroblock *x, const struct mv *ref_mv, i int av1_full_search_sadx8(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); RTCD_EXTERN int (*av1_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); -void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type); +void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bsx, int bsy, TX_TYPE tx_type); #define av1_fwd_idtx av1_fwd_idtx_c -void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_16x16)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_16x16)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x32 av1_fwd_txfm2d_16x32_c -void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_16x8 av1_fwd_txfm2d_16x8_c -void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_32x16 av1_fwd_txfm2d_32x16_c -void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_32x32)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_32x32)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_4x4)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_4x4)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_4x8 av1_fwd_txfm2d_4x8_c -void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_64x64)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); - -void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x16 av1_fwd_txfm2d_8x16_c -void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_fwd_txfm2d_8x4 av1_fwd_txfm2d_8x4_c -void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_fwd_txfm2d_8x8)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_8x8)(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd); void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); #define av1_fwht4x4 av1_fwht4x4_c @@ -213,6 +209,14 @@ void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8 void av1_highbd_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_sse2 +void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); +void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_2d)(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd); + +void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); +void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd); + void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); #define av1_highbd_convolve_avg av1_highbd_convolve_avg_c @@ -227,6 +231,10 @@ void av1_highbd_convolve_init_c(void); void av1_highbd_convolve_init_sse4_1(void); RTCD_EXTERN void (*av1_highbd_convolve_init)(void); +void av1_highbd_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); +void av1_highbd_convolve_rounding_avx2(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_rounding)(const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd); + void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); void av1_highbd_convolve_vert_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); RTCD_EXTERN void (*av1_highbd_convolve_vert)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); @@ -273,9 +281,6 @@ void av1_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int dest void av1_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht8x8_64_add av1_highbd_iht8x8_64_add_c -void av1_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); -#define av1_highbd_quantize_b av1_highbd_quantize_b_c - void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); void av1_highbd_quantize_fp_sse4_1(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); void av1_highbd_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); @@ -288,6 +293,14 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); RTCD_EXTERN void (*av1_highbd_warp_affine)(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +void av1_highpass_filter_c(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_highpass_filter_sse4_1(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_highpass_filter)(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + +void av1_highpass_filter_highbd_c(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_highpass_filter_highbd_sse4_1(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_highpass_filter_highbd)(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); void av1_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); void av1_iht16x16_256_add_avx2(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); @@ -340,48 +353,45 @@ void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride void av1_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_iht8x8_64_add av1_iht8x8_64_add_sse2 -void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_16x16)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_16x16)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x32 av1_inv_txfm2d_add_16x32_c -void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_16x8 av1_inv_txfm2d_add_16x8_c -void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_32x16 av1_inv_txfm2d_add_32x16_c -void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_32x32_avx2(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_32x32)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_32x32_avx2(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_32x32)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); -void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_4x8 av1_inv_txfm2d_add_4x8_c -void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -#define av1_inv_txfm2d_add_64x64 av1_inv_txfm2d_add_64x64_c - -void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x16 av1_inv_txfm2d_add_8x16_c -void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); #define av1_inv_txfm2d_add_8x4 av1_inv_txfm2d_add_8x4_c -void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); -RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd); void av1_lowbd_convolve_init_c(void); void av1_lowbd_convolve_init_ssse3(void); RTCD_EXTERN void (*av1_lowbd_convolve_init)(void); -void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); +void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale); #define av1_quantize_b av1_quantize_b_c void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -393,10 +403,26 @@ void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int void av1_quantize_fp_32x32_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); RTCD_EXTERN void (*av1_quantize_fp_32x32)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void av1_selfguided_restoration_c(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +void av1_selfguided_restoration_sse4_1(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); +RTCD_EXTERN void (*av1_selfguided_restoration)(uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps); + +void av1_selfguided_restoration_highbd_c(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); +void av1_selfguided_restoration_highbd_sse4_1(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); +RTCD_EXTERN void (*av1_selfguided_restoration_highbd)(uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps); + void av1_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); void av1_temporal_filter_apply_sse2(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); #define av1_temporal_filter_apply av1_temporal_filter_apply_sse2 +void av1_upsample_intra_edge_c(uint8_t *p, int sz); +void av1_upsample_intra_edge_sse4_1(uint8_t *p, int sz); +RTCD_EXTERN void (*av1_upsample_intra_edge)(uint8_t *p, int sz); + +void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd); +void av1_upsample_intra_edge_high_sse4_1(uint16_t *p, int sz, int bd); +RTCD_EXTERN void (*av1_upsample_intra_edge_high)(uint16_t *p, int sz, int bd); + void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); @@ -414,64 +440,38 @@ uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d, con uint64_t av1_wedge_sse_from_residuals_sse2(const int16_t *r1, const int16_t *d, const uint8_t *m, int N); #define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_sse2 +void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_sse2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_ssse3(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_sse4_1(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +void cdef_filter_block_avx2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); +RTCD_EXTERN void (*cdef_filter_block)(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max); + +int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_sse2(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_ssse3(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_sse4_1(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +int cdef_find_dir_avx2(const uint16_t *img, int stride, int32_t *var, int coeff_shift); +RTCD_EXTERN int (*cdef_find_dir)(const uint16_t *img, int stride, int32_t *var, int coeff_shift); + double compute_cross_correlation_c(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); double compute_cross_correlation_sse4_1(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); RTCD_EXTERN double (*compute_cross_correlation)(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); -void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_4x4_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_sse2(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_ssse3(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_4x4_16bit_to_8bit_sse4_1(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_4x4_16bit_to_8bit)(uint8_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_8x8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride); - -void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_sse2(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_ssse3(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -void copy_8x8_16bit_to_8bit_sse4_1(uint8_t *dst, int dstride, const uint16_t *src, int sstride); -RTCD_EXTERN void (*copy_8x8_16bit_to_8bit)(uint8_t *dst, int dstride, const uint16_t *src, int sstride); - void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); +void copy_rect8_16bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); RTCD_EXTERN void (*copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); void copy_rect8_8bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); +void copy_rect8_8bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); RTCD_EXTERN void (*copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); -int od_dir_find8_c(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_sse2(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_ssse3(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -int od_dir_find8_sse4_1(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); -RTCD_EXTERN int (*od_dir_find8)(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); - -void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_sse2(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_ssse3(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_4x4_sse4_1(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -RTCD_EXTERN void (*od_filter_dering_direction_4x4)(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); - -void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_sse2(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_ssse3(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -void od_filter_dering_direction_8x8_sse4_1(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); -RTCD_EXTERN void (*od_filter_dering_direction_8x8)(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); - void aom_rtcd(void); #ifdef RTCD_C @@ -482,28 +482,28 @@ static void setup_rtcd_internal(void) (void)flags; - aom_clpf_block = aom_clpf_block_sse2; - if (flags & HAS_SSSE3) aom_clpf_block = aom_clpf_block_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_block = aom_clpf_block_sse4_1; - aom_clpf_block_hbd = aom_clpf_block_hbd_sse2; - if (flags & HAS_SSSE3) aom_clpf_block_hbd = aom_clpf_block_hbd_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_block_hbd = aom_clpf_block_hbd_sse4_1; - aom_clpf_hblock = aom_clpf_hblock_sse2; - if (flags & HAS_SSSE3) aom_clpf_hblock = aom_clpf_hblock_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_hblock = aom_clpf_hblock_sse4_1; - aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse2; - if (flags & HAS_SSSE3) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_ssse3; - if (flags & HAS_SSE4_1) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse4_1; + apply_selfguided_restoration = apply_selfguided_restoration_c; + if (flags & HAS_SSE4_1) apply_selfguided_restoration = apply_selfguided_restoration_sse4_1; + apply_selfguided_restoration_highbd = apply_selfguided_restoration_highbd_c; + if (flags & HAS_SSE4_1) apply_selfguided_restoration_highbd = apply_selfguided_restoration_highbd_sse4_1; av1_block_error = av1_block_error_c; if (flags & HAS_AVX2) av1_block_error = av1_block_error_avx2; + av1_convolve_2d_scale = av1_convolve_2d_scale_c; + if (flags & HAS_SSE4_1) av1_convolve_2d_scale = av1_convolve_2d_scale_sse4_1; av1_convolve_horiz = av1_convolve_horiz_c; if (flags & HAS_SSSE3) av1_convolve_horiz = av1_convolve_horiz_ssse3; + av1_convolve_rounding = av1_convolve_rounding_c; + if (flags & HAS_AVX2) av1_convolve_rounding = av1_convolve_rounding_avx2; av1_convolve_vert = av1_convolve_vert_c; if (flags & HAS_SSSE3) av1_convolve_vert = av1_convolve_vert_ssse3; av1_fht16x16 = av1_fht16x16_sse2; if (flags & HAS_AVX2) av1_fht16x16 = av1_fht16x16_avx2; av1_fht32x32 = av1_fht32x32_sse2; if (flags & HAS_AVX2) av1_fht32x32 = av1_fht32x32_avx2; + av1_filter_intra_edge = av1_filter_intra_edge_c; + if (flags & HAS_SSE4_1) av1_filter_intra_edge = av1_filter_intra_edge_sse4_1; + av1_filter_intra_edge_high = av1_filter_intra_edge_high_c; + if (flags & HAS_SSE4_1) av1_filter_intra_edge_high = av1_filter_intra_edge_high_sse4_1; av1_full_search_sad = av1_full_search_sad_c; if (flags & HAS_SSE3) av1_full_search_sad = av1_full_search_sadx3; if (flags & HAS_SSE4_1) av1_full_search_sad = av1_full_search_sadx8; @@ -513,14 +513,18 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) av1_fwd_txfm2d_32x32 = av1_fwd_txfm2d_32x32_sse4_1; av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_c; if (flags & HAS_SSE4_1) av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_sse4_1; - av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_c; - if (flags & HAS_SSE4_1) av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_sse4_1; av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_c; if (flags & HAS_SSE4_1) av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_sse4_1; + av1_highbd_convolve_2d = av1_highbd_convolve_2d_c; + if (flags & HAS_SSSE3) av1_highbd_convolve_2d = av1_highbd_convolve_2d_ssse3; + av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_c; + if (flags & HAS_SSE4_1) av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_sse4_1; av1_highbd_convolve_horiz = av1_highbd_convolve_horiz_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_horiz = av1_highbd_convolve_horiz_sse4_1; av1_highbd_convolve_init = av1_highbd_convolve_init_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_init = av1_highbd_convolve_init_sse4_1; + av1_highbd_convolve_rounding = av1_highbd_convolve_rounding_c; + if (flags & HAS_AVX2) av1_highbd_convolve_rounding = av1_highbd_convolve_rounding_avx2; av1_highbd_convolve_vert = av1_highbd_convolve_vert_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_vert = av1_highbd_convolve_vert_sse4_1; av1_highbd_quantize_fp = av1_highbd_quantize_fp_c; @@ -528,6 +532,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) av1_highbd_quantize_fp = av1_highbd_quantize_fp_avx2; av1_highbd_warp_affine = av1_highbd_warp_affine_c; if (flags & HAS_SSSE3) av1_highbd_warp_affine = av1_highbd_warp_affine_ssse3; + av1_highpass_filter = av1_highpass_filter_c; + if (flags & HAS_SSE4_1) av1_highpass_filter = av1_highpass_filter_sse4_1; + av1_highpass_filter_highbd = av1_highpass_filter_highbd_c; + if (flags & HAS_SSE4_1) av1_highpass_filter_highbd = av1_highpass_filter_highbd_sse4_1; av1_iht16x16_256_add = av1_iht16x16_256_add_sse2; if (flags & HAS_AVX2) av1_iht16x16_256_add = av1_iht16x16_256_add_avx2; av1_inv_txfm2d_add_16x16 = av1_inv_txfm2d_add_16x16_c; @@ -544,37 +552,34 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) av1_quantize_fp = av1_quantize_fp_avx2; av1_quantize_fp_32x32 = av1_quantize_fp_32x32_c; if (flags & HAS_AVX2) av1_quantize_fp_32x32 = av1_quantize_fp_32x32_avx2; + av1_selfguided_restoration = av1_selfguided_restoration_c; + if (flags & HAS_SSE4_1) av1_selfguided_restoration = av1_selfguided_restoration_sse4_1; + av1_selfguided_restoration_highbd = av1_selfguided_restoration_highbd_c; + if (flags & HAS_SSE4_1) av1_selfguided_restoration_highbd = av1_selfguided_restoration_highbd_sse4_1; + av1_upsample_intra_edge = av1_upsample_intra_edge_c; + if (flags & HAS_SSE4_1) av1_upsample_intra_edge = av1_upsample_intra_edge_sse4_1; + av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c; + if (flags & HAS_SSE4_1) av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_sse4_1; av1_warp_affine = av1_warp_affine_sse2; if (flags & HAS_SSSE3) av1_warp_affine = av1_warp_affine_ssse3; + cdef_filter_block = cdef_filter_block_sse2; + if (flags & HAS_SSSE3) cdef_filter_block = cdef_filter_block_ssse3; + if (flags & HAS_SSE4_1) cdef_filter_block = cdef_filter_block_sse4_1; + if (flags & HAS_AVX2) cdef_filter_block = cdef_filter_block_avx2; + cdef_find_dir = cdef_find_dir_sse2; + if (flags & HAS_SSSE3) cdef_find_dir = cdef_find_dir_ssse3; + if (flags & HAS_SSE4_1) cdef_find_dir = cdef_find_dir_sse4_1; + if (flags & HAS_AVX2) cdef_find_dir = cdef_find_dir_avx2; compute_cross_correlation = compute_cross_correlation_c; if (flags & HAS_SSE4_1) compute_cross_correlation = compute_cross_correlation_sse4_1; - copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_sse2; - if (flags & HAS_SSSE3) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_ssse3; - if (flags & HAS_SSE4_1) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_sse4_1; - copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_sse2; - if (flags & HAS_SSSE3) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_ssse3; - if (flags & HAS_SSE4_1) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_sse4_1; - copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_sse2; - if (flags & HAS_SSSE3) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_ssse3; - if (flags & HAS_SSE4_1) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_sse4_1; - copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_sse2; - if (flags & HAS_SSSE3) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_ssse3; - if (flags & HAS_SSE4_1) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_sse4_1; copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse2; if (flags & HAS_SSSE3) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_ssse3; if (flags & HAS_SSE4_1) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse4_1; + if (flags & HAS_AVX2) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_avx2; copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse2; if (flags & HAS_SSSE3) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_ssse3; if (flags & HAS_SSE4_1) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse4_1; - od_dir_find8 = od_dir_find8_sse2; - if (flags & HAS_SSSE3) od_dir_find8 = od_dir_find8_ssse3; - if (flags & HAS_SSE4_1) od_dir_find8 = od_dir_find8_sse4_1; - od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_sse2; - if (flags & HAS_SSSE3) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_ssse3; - if (flags & HAS_SSE4_1) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_sse4_1; - od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_sse2; - if (flags & HAS_SSSE3) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_ssse3; - if (flags & HAS_SSE4_1) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_sse4_1; + if (flags & HAS_AVX2) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_avx2; } #endif |