From df9477dfa60ebb5d31bc142e58ce46535c17abce Mon Sep 17 00:00:00 2001 From: trav90 Date: Wed, 17 Oct 2018 05:59:08 -0500 Subject: Update aom to slightly newer commit ID --- media/libaom/config/mac/x64/aom_config.asm | 39 +- media/libaom/config/mac/x64/aom_config.h | 39 +- media/libaom/config/mac/x64/aom_dsp_rtcd.h | 2207 +++++++++++++++++++++++++++- media/libaom/config/mac/x64/av1_rtcd.h | 267 +++- 4 files changed, 2470 insertions(+), 82 deletions(-) (limited to 'media/libaom/config/mac') diff --git a/media/libaom/config/mac/x64/aom_config.asm b/media/libaom/config/mac/x64/aom_config.asm index 77fb58b56..fec4de39c 100644 --- a/media/libaom/config/mac/x64/aom_config.asm +++ b/media/libaom/config/mac/x64/aom_config.asm @@ -2,8 +2,6 @@ %define ARCH_MIPS 0 %define ARCH_X86 0 %define ARCH_X86_64 1 -%define HAVE_EDSP 0 -%define HAVE_MEDIA 0 %define HAVE_NEON 0 %define HAVE_NEON_ASM 0 %define HAVE_MIPS32 0 @@ -45,8 +43,6 @@ %define CONFIG_AV1_ENCODER 1 %define CONFIG_AV1_DECODER 1 %define CONFIG_AV1 1 -%define CONFIG_ENCODERS 1 -%define CONFIG_DECODERS 1 %define CONFIG_STATIC_MSVCRT 0 %define CONFIG_SPATIAL_RESAMPLING 1 %define CONFIG_REALTIME_ONLY 0 @@ -66,36 +62,35 @@ %define CONFIG_ENCODE_PERF_TESTS 0 %define CONFIG_COEFFICIENT_RANGE_CHECKING 0 %define CONFIG_LOWBITDEPTH 1 -%define CONFIG_HIGHBITDEPTH 0 +%define CONFIG_HIGHBITDEPTH 1 %define CONFIG_EXPERIMENTAL 0 %define CONFIG_SIZE_LIMIT 1 %define CONFIG_FP_MB_STATS 0 %define CONFIG_CDEF 1 -%define CONFIG_VAR_TX 0 +%define CONFIG_VAR_TX 1 %define CONFIG_RECT_TX 1 -%define CONFIG_REF_MV 1 +%define CONFIG_RECT_TX_EXT 0 %define CONFIG_TPL_MV 0 %define CONFIG_DUAL_FILTER 1 %define CONFIG_CONVOLVE_ROUND 0 %define CONFIG_COMPOUND_ROUND 0 -%define CONFIG_EXT_TX 0 +%define CONFIG_EXT_TX 1 +%define CONFIG_DPCM_INTRA 0 %define CONFIG_TX64X64 0 -%define CONFIG_SUB8X8_MC 0 %define CONFIG_EXT_INTRA 1 %define CONFIG_INTRA_INTERP 0 %define CONFIG_FILTER_INTRA 0 +%define CONFIG_INTRA_EDGE 0 %define CONFIG_INTRABC 0 -%define CONFIG_EXT_INTER 0 -%define CONFIG_INTERINTRA 0 -%define CONFIG_WEDGE 0 -%define CONFIG_COMPOUND_SEGMENT 0 +%define CONFIG_EXT_INTER 1 +%define CONFIG_INTERINTRA 1 +%define CONFIG_WEDGE 1 +%define CONFIG_COMPOUND_SEGMENT 1 %define CONFIG_EXT_REFS 1 %define CONFIG_GLOBAL_MOTION 1 %define CONFIG_NEW_QUANT 0 %define CONFIG_SUPERTX 0 %define CONFIG_ANS 0 -%define CONFIG_EC_MULTISYMBOL 1 -%define CONFIG_NEW_TOKENSET 1 %define CONFIG_LOOP_RESTORATION 0 %define CONFIG_EXT_PARTITION 0 %define CONFIG_EXT_PARTITION_TYPES 0 @@ -105,20 +100,18 @@ %define CONFIG_NCOBMC 0 %define CONFIG_WARPED_MOTION 1 %define CONFIG_Q_ADAPT_PROBS 0 -%define CONFIG_SUBFRAME_PROB_UPDATE 0 %define CONFIG_BITSTREAM_DEBUG 0 %define CONFIG_ALT_INTRA 1 %define CONFIG_PALETTE 1 %define CONFIG_PALETTE_DELTA_ENCODING 0 -%define CONFIG_DAALA_EC 1 %define CONFIG_RAWBITS 0 -%define CONFIG_EC_SMALLMUL 0 +%define CONFIG_EC_SMALLMUL 1 %define CONFIG_PVQ 0 %define CONFIG_CFL 0 %define CONFIG_XIPHRC 0 %define CONFIG_CB4X4 1 %define CONFIG_CHROMA_2X2 0 -%define CONFIG_CHROMA_SUB8X8 0 +%define CONFIG_CHROMA_SUB8X8 1 %define CONFIG_FRAME_SIZE 0 %define CONFIG_DELTA_Q 1 %define CONFIG_EXT_DELTA_Q 1 @@ -126,10 +119,10 @@ %define CONFIG_FILTER_7BIT 1 %define CONFIG_PARALLEL_DEBLOCKING 0 %define CONFIG_PARALLEL_DEBLOCKING_15TAP 0 -%define CONFIG_LOOPFILTERING_ACROSS_TILES 0 +%define CONFIG_LOOPFILTERING_ACROSS_TILES 1 %define CONFIG_TILE_GROUPS 1 %define CONFIG_EC_ADAPT 1 -%define CONFIG_TEMPMV_SIGNALING 0 +%define CONFIG_TEMPMV_SIGNALING 1 %define CONFIG_RD_DEBUG 0 %define CONFIG_REFERENCE_BUFFER 1 %define CONFIG_COEF_INTERLEAVE 0 @@ -147,5 +140,7 @@ %define CONFIG_NEW_MULTISYMBOL 0 %define CONFIG_COMPOUND_SINGLEREF 0 %define CONFIG_AOM_QM 0 -%define CONFIG_LOWDELAY_COMPOUND 0 +%define CONFIG_ONE_SIDED_COMPOUND 1 +%define CONFIG_SMOOTH_HV 0 +%define CONFIG_VAR_REFS 0 %define CONFIG_ANALYZER 0 diff --git a/media/libaom/config/mac/x64/aom_config.h b/media/libaom/config/mac/x64/aom_config.h index 4a163ebdd..31e1af025 100644 --- a/media/libaom/config/mac/x64/aom_config.h +++ b/media/libaom/config/mac/x64/aom_config.h @@ -15,8 +15,6 @@ #define ARCH_MIPS 0 #define ARCH_X86 0 #define ARCH_X86_64 1 -#define HAVE_EDSP 0 -#define HAVE_MEDIA 0 #define HAVE_NEON 0 #define HAVE_NEON_ASM 0 #define HAVE_MIPS32 0 @@ -58,8 +56,6 @@ #define CONFIG_AV1_ENCODER 1 #define CONFIG_AV1_DECODER 1 #define CONFIG_AV1 1 -#define CONFIG_ENCODERS 1 -#define CONFIG_DECODERS 1 #define CONFIG_STATIC_MSVCRT 0 #define CONFIG_SPATIAL_RESAMPLING 1 #define CONFIG_REALTIME_ONLY 0 @@ -79,36 +75,35 @@ #define CONFIG_ENCODE_PERF_TESTS 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_LOWBITDEPTH 1 -#define CONFIG_HIGHBITDEPTH 0 +#define CONFIG_HIGHBITDEPTH 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_SIZE_LIMIT 1 #define CONFIG_FP_MB_STATS 0 #define CONFIG_CDEF 1 -#define CONFIG_VAR_TX 0 +#define CONFIG_VAR_TX 1 #define CONFIG_RECT_TX 1 -#define CONFIG_REF_MV 1 +#define CONFIG_RECT_TX_EXT 0 #define CONFIG_TPL_MV 0 #define CONFIG_DUAL_FILTER 1 #define CONFIG_CONVOLVE_ROUND 0 #define CONFIG_COMPOUND_ROUND 0 -#define CONFIG_EXT_TX 0 +#define CONFIG_EXT_TX 1 +#define CONFIG_DPCM_INTRA 0 #define CONFIG_TX64X64 0 -#define CONFIG_SUB8X8_MC 0 #define CONFIG_EXT_INTRA 1 #define CONFIG_INTRA_INTERP 0 #define CONFIG_FILTER_INTRA 0 +#define CONFIG_INTRA_EDGE 0 #define CONFIG_INTRABC 0 -#define CONFIG_EXT_INTER 0 -#define CONFIG_INTERINTRA 0 -#define CONFIG_WEDGE 0 -#define CONFIG_COMPOUND_SEGMENT 0 +#define CONFIG_EXT_INTER 1 +#define CONFIG_INTERINTRA 1 +#define CONFIG_WEDGE 1 +#define CONFIG_COMPOUND_SEGMENT 1 #define CONFIG_EXT_REFS 1 #define CONFIG_GLOBAL_MOTION 1 #define CONFIG_NEW_QUANT 0 #define CONFIG_SUPERTX 0 #define CONFIG_ANS 0 -#define CONFIG_EC_MULTISYMBOL 1 -#define CONFIG_NEW_TOKENSET 1 #define CONFIG_LOOP_RESTORATION 0 #define CONFIG_EXT_PARTITION 0 #define CONFIG_EXT_PARTITION_TYPES 0 @@ -118,20 +113,18 @@ #define CONFIG_NCOBMC 0 #define CONFIG_WARPED_MOTION 1 #define CONFIG_Q_ADAPT_PROBS 0 -#define CONFIG_SUBFRAME_PROB_UPDATE 0 #define CONFIG_BITSTREAM_DEBUG 0 #define CONFIG_ALT_INTRA 1 #define CONFIG_PALETTE 1 #define CONFIG_PALETTE_DELTA_ENCODING 0 -#define CONFIG_DAALA_EC 1 #define CONFIG_RAWBITS 0 -#define CONFIG_EC_SMALLMUL 0 +#define CONFIG_EC_SMALLMUL 1 #define CONFIG_PVQ 0 #define CONFIG_CFL 0 #define CONFIG_XIPHRC 0 #define CONFIG_CB4X4 1 #define CONFIG_CHROMA_2X2 0 -#define CONFIG_CHROMA_SUB8X8 0 +#define CONFIG_CHROMA_SUB8X8 1 #define CONFIG_FRAME_SIZE 0 #define CONFIG_DELTA_Q 1 #define CONFIG_EXT_DELTA_Q 1 @@ -139,10 +132,10 @@ #define CONFIG_FILTER_7BIT 1 #define CONFIG_PARALLEL_DEBLOCKING 0 #define CONFIG_PARALLEL_DEBLOCKING_15TAP 0 -#define CONFIG_LOOPFILTERING_ACROSS_TILES 0 +#define CONFIG_LOOPFILTERING_ACROSS_TILES 1 #define CONFIG_TILE_GROUPS 1 #define CONFIG_EC_ADAPT 1 -#define CONFIG_TEMPMV_SIGNALING 0 +#define CONFIG_TEMPMV_SIGNALING 1 #define CONFIG_RD_DEBUG 0 #define CONFIG_REFERENCE_BUFFER 1 #define CONFIG_COEF_INTERLEAVE 0 @@ -160,7 +153,9 @@ #define CONFIG_NEW_MULTISYMBOL 0 #define CONFIG_COMPOUND_SINGLEREF 0 #define CONFIG_AOM_QM 0 -#define CONFIG_LOWDELAY_COMPOUND 0 +#define CONFIG_ONE_SIDED_COMPOUND 1 +#define CONFIG_SMOOTH_HV 0 +#define CONFIG_VAR_REFS 0 #define CONFIG_ANALYZER 0 #define DECODE_WIDTH_LIMIT 8192 #define DECODE_HEIGHT_LIMIT 4608 diff --git a/media/libaom/config/mac/x64/aom_dsp_rtcd.h b/media/libaom/config/mac/x64/aom_dsp_rtcd.h index 6fda772d8..0bfb89de5 100644 --- a/media/libaom/config/mac/x64/aom_dsp_rtcd.h +++ b/media/libaom/config/mac/x64/aom_dsp_rtcd.h @@ -20,14 +20,6 @@ extern "C" { #endif -unsigned int aom_avg_4x4_c(const uint8_t *, int p); -unsigned int aom_avg_4x4_sse2(const uint8_t *, int p); -#define aom_avg_4x4 aom_avg_4x4_sse2 - -unsigned int aom_avg_8x8_c(const uint8_t *, int p); -unsigned int aom_avg_8x8_sse2(const uint8_t *, int p); -#define aom_avg_8x8 aom_avg_8x8_sse2 - void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); void aom_blend_a64_hmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); RTCD_EXTERN void (*aom_blend_a64_hmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); @@ -47,6 +39,12 @@ void aom_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred, int void aom_comp_avg_upsampled_pred_sse2(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride); #define aom_comp_avg_upsampled_pred aom_comp_avg_upsampled_pred_sse2 +void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask); +#define aom_comp_mask_pred aom_comp_mask_pred_c + +void aom_comp_mask_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask); +#define aom_comp_mask_upsampled_pred aom_comp_mask_upsampled_pred_c + void aom_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void aom_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void aom_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -344,38 +342,1756 @@ void aom_hadamard_8x8_sse2(const int16_t *src_diff, int src_stride, int16_t *coe void aom_hadamard_8x8_ssse3(const int16_t *src_diff, int src_stride, int16_t *coeff); RTCD_EXTERN void (*aom_hadamard_8x8)(const int16_t *src_diff, int src_stride, int16_t *coeff); +void aom_highbd_10_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define aom_highbd_10_get16x16var aom_highbd_10_get16x16var_c + +void aom_highbd_10_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define aom_highbd_10_get8x8var aom_highbd_10_get8x8var_c + +unsigned int aom_highbd_10_masked_sub_pixel_variance16x16_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_10_masked_sub_pixel_variance16x16_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_masked_sub_pixel_variance16x16)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_10_masked_sub_pixel_variance16x32_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_10_masked_sub_pixel_variance16x32_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_masked_sub_pixel_variance16x32)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_10_masked_sub_pixel_variance16x8_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_10_masked_sub_pixel_variance16x8_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_masked_sub_pixel_variance16x8)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_10_masked_sub_pixel_variance32x16_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_10_masked_sub_pixel_variance32x16_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_masked_sub_pixel_variance32x16)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_10_masked_sub_pixel_variance32x32_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_10_masked_sub_pixel_variance32x32_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_masked_sub_pixel_variance32x32)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_10_masked_sub_pixel_variance32x64_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_10_masked_sub_pixel_variance32x64_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_masked_sub_pixel_variance32x64)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_10_masked_sub_pixel_variance4x4_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_10_masked_sub_pixel_variance4x4_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_masked_sub_pixel_variance4x4)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_10_masked_sub_pixel_variance4x8_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_10_masked_sub_pixel_variance4x8_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_masked_sub_pixel_variance4x8)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_10_masked_sub_pixel_variance64x32_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_10_masked_sub_pixel_variance64x32_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_masked_sub_pixel_variance64x32)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_10_masked_sub_pixel_variance64x64_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_10_masked_sub_pixel_variance64x64_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_masked_sub_pixel_variance64x64)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_10_masked_sub_pixel_variance8x16_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_10_masked_sub_pixel_variance8x16_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_masked_sub_pixel_variance8x16)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_10_masked_sub_pixel_variance8x4_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_10_masked_sub_pixel_variance8x4_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_masked_sub_pixel_variance8x4)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_10_masked_sub_pixel_variance8x8_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_10_masked_sub_pixel_variance8x8_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_masked_sub_pixel_variance8x8)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_10_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int aom_highbd_10_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define aom_highbd_10_mse16x16 aom_highbd_10_mse16x16_sse2 + +unsigned int aom_highbd_10_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define aom_highbd_10_mse16x8 aom_highbd_10_mse16x8_c + +unsigned int aom_highbd_10_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define aom_highbd_10_mse8x16 aom_highbd_10_mse8x16_c + +unsigned int aom_highbd_10_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int aom_highbd_10_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define aom_highbd_10_mse8x8 aom_highbd_10_mse8x8_sse2 + +unsigned int aom_highbd_10_obmc_sub_pixel_variance16x16_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_10_obmc_sub_pixel_variance16x16 aom_highbd_10_obmc_sub_pixel_variance16x16_c + +unsigned int aom_highbd_10_obmc_sub_pixel_variance16x32_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_10_obmc_sub_pixel_variance16x32 aom_highbd_10_obmc_sub_pixel_variance16x32_c + +unsigned int aom_highbd_10_obmc_sub_pixel_variance16x8_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_10_obmc_sub_pixel_variance16x8 aom_highbd_10_obmc_sub_pixel_variance16x8_c + +unsigned int aom_highbd_10_obmc_sub_pixel_variance32x16_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_10_obmc_sub_pixel_variance32x16 aom_highbd_10_obmc_sub_pixel_variance32x16_c + +unsigned int aom_highbd_10_obmc_sub_pixel_variance32x32_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_10_obmc_sub_pixel_variance32x32 aom_highbd_10_obmc_sub_pixel_variance32x32_c + +unsigned int aom_highbd_10_obmc_sub_pixel_variance32x64_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_10_obmc_sub_pixel_variance32x64 aom_highbd_10_obmc_sub_pixel_variance32x64_c + +unsigned int aom_highbd_10_obmc_sub_pixel_variance4x4_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_10_obmc_sub_pixel_variance4x4 aom_highbd_10_obmc_sub_pixel_variance4x4_c + +unsigned int aom_highbd_10_obmc_sub_pixel_variance4x8_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_10_obmc_sub_pixel_variance4x8 aom_highbd_10_obmc_sub_pixel_variance4x8_c + +unsigned int aom_highbd_10_obmc_sub_pixel_variance64x32_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_10_obmc_sub_pixel_variance64x32 aom_highbd_10_obmc_sub_pixel_variance64x32_c + +unsigned int aom_highbd_10_obmc_sub_pixel_variance64x64_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_10_obmc_sub_pixel_variance64x64 aom_highbd_10_obmc_sub_pixel_variance64x64_c + +unsigned int aom_highbd_10_obmc_sub_pixel_variance8x16_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_10_obmc_sub_pixel_variance8x16 aom_highbd_10_obmc_sub_pixel_variance8x16_c + +unsigned int aom_highbd_10_obmc_sub_pixel_variance8x4_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_10_obmc_sub_pixel_variance8x4 aom_highbd_10_obmc_sub_pixel_variance8x4_c + +unsigned int aom_highbd_10_obmc_sub_pixel_variance8x8_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_10_obmc_sub_pixel_variance8x8 aom_highbd_10_obmc_sub_pixel_variance8x8_c + +unsigned int aom_highbd_10_obmc_variance16x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_10_obmc_variance16x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_obmc_variance16x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_10_obmc_variance16x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_10_obmc_variance16x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_obmc_variance16x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_10_obmc_variance16x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_10_obmc_variance16x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_obmc_variance16x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_10_obmc_variance32x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_10_obmc_variance32x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_obmc_variance32x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_10_obmc_variance32x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_10_obmc_variance32x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_obmc_variance32x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_10_obmc_variance32x64_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_10_obmc_variance32x64_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_obmc_variance32x64)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_10_obmc_variance4x4_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_10_obmc_variance4x4_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_obmc_variance4x4)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_10_obmc_variance4x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_10_obmc_variance4x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_obmc_variance4x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_10_obmc_variance64x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_10_obmc_variance64x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_obmc_variance64x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_10_obmc_variance64x64_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_10_obmc_variance64x64_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_obmc_variance64x64)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_10_obmc_variance8x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_10_obmc_variance8x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_obmc_variance8x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_10_obmc_variance8x4_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_10_obmc_variance8x4_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_obmc_variance8x4)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_10_obmc_variance8x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_10_obmc_variance8x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_obmc_variance8x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +uint32_t aom_highbd_10_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_10_sub_pixel_avg_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_10_sub_pixel_avg_variance16x16 aom_highbd_10_sub_pixel_avg_variance16x16_sse2 + +uint32_t aom_highbd_10_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_10_sub_pixel_avg_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_10_sub_pixel_avg_variance16x32 aom_highbd_10_sub_pixel_avg_variance16x32_sse2 + +uint32_t aom_highbd_10_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_10_sub_pixel_avg_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_10_sub_pixel_avg_variance16x8 aom_highbd_10_sub_pixel_avg_variance16x8_sse2 + +uint32_t aom_highbd_10_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_10_sub_pixel_avg_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_10_sub_pixel_avg_variance32x16 aom_highbd_10_sub_pixel_avg_variance32x16_sse2 + +uint32_t aom_highbd_10_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_10_sub_pixel_avg_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_10_sub_pixel_avg_variance32x32 aom_highbd_10_sub_pixel_avg_variance32x32_sse2 + +uint32_t aom_highbd_10_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_10_sub_pixel_avg_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_10_sub_pixel_avg_variance32x64 aom_highbd_10_sub_pixel_avg_variance32x64_sse2 + +uint32_t aom_highbd_10_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_10_sub_pixel_avg_variance4x4_sse4_1(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +RTCD_EXTERN uint32_t (*aom_highbd_10_sub_pixel_avg_variance4x4)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t aom_highbd_10_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_10_sub_pixel_avg_variance4x8 aom_highbd_10_sub_pixel_avg_variance4x8_c + +uint32_t aom_highbd_10_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_10_sub_pixel_avg_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_10_sub_pixel_avg_variance64x32 aom_highbd_10_sub_pixel_avg_variance64x32_sse2 + +uint32_t aom_highbd_10_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_10_sub_pixel_avg_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_10_sub_pixel_avg_variance64x64 aom_highbd_10_sub_pixel_avg_variance64x64_sse2 + +uint32_t aom_highbd_10_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_10_sub_pixel_avg_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_10_sub_pixel_avg_variance8x16 aom_highbd_10_sub_pixel_avg_variance8x16_sse2 + +uint32_t aom_highbd_10_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_10_sub_pixel_avg_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_10_sub_pixel_avg_variance8x4 aom_highbd_10_sub_pixel_avg_variance8x4_sse2 + +uint32_t aom_highbd_10_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_10_sub_pixel_avg_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_10_sub_pixel_avg_variance8x8 aom_highbd_10_sub_pixel_avg_variance8x8_sse2 + +uint32_t aom_highbd_10_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_10_sub_pixel_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_10_sub_pixel_variance16x16 aom_highbd_10_sub_pixel_variance16x16_sse2 + +uint32_t aom_highbd_10_sub_pixel_variance16x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_10_sub_pixel_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_10_sub_pixel_variance16x32 aom_highbd_10_sub_pixel_variance16x32_sse2 + +uint32_t aom_highbd_10_sub_pixel_variance16x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_10_sub_pixel_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_10_sub_pixel_variance16x8 aom_highbd_10_sub_pixel_variance16x8_sse2 + +uint32_t aom_highbd_10_sub_pixel_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_10_sub_pixel_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_10_sub_pixel_variance32x16 aom_highbd_10_sub_pixel_variance32x16_sse2 + +uint32_t aom_highbd_10_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_10_sub_pixel_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_10_sub_pixel_variance32x32 aom_highbd_10_sub_pixel_variance32x32_sse2 + +uint32_t aom_highbd_10_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_10_sub_pixel_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_10_sub_pixel_variance32x64 aom_highbd_10_sub_pixel_variance32x64_sse2 + +uint32_t aom_highbd_10_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_10_sub_pixel_variance4x4_sse4_1(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +RTCD_EXTERN uint32_t (*aom_highbd_10_sub_pixel_variance4x4)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t aom_highbd_10_sub_pixel_variance4x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_10_sub_pixel_variance4x8 aom_highbd_10_sub_pixel_variance4x8_c + +uint32_t aom_highbd_10_sub_pixel_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_10_sub_pixel_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_10_sub_pixel_variance64x32 aom_highbd_10_sub_pixel_variance64x32_sse2 + +uint32_t aom_highbd_10_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_10_sub_pixel_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_10_sub_pixel_variance64x64 aom_highbd_10_sub_pixel_variance64x64_sse2 + +uint32_t aom_highbd_10_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_10_sub_pixel_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_10_sub_pixel_variance8x16 aom_highbd_10_sub_pixel_variance8x16_sse2 + +uint32_t aom_highbd_10_sub_pixel_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_10_sub_pixel_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_10_sub_pixel_variance8x4 aom_highbd_10_sub_pixel_variance8x4_sse2 + +uint32_t aom_highbd_10_sub_pixel_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_10_sub_pixel_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_10_sub_pixel_variance8x8 aom_highbd_10_sub_pixel_variance8x8_sse2 + +unsigned int aom_highbd_10_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_10_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_10_variance16x16 aom_highbd_10_variance16x16_sse2 + +unsigned int aom_highbd_10_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_10_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_10_variance16x32 aom_highbd_10_variance16x32_sse2 + +unsigned int aom_highbd_10_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_10_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_10_variance16x8 aom_highbd_10_variance16x8_sse2 + +unsigned int aom_highbd_10_variance2x2_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_10_variance2x2 aom_highbd_10_variance2x2_c + +unsigned int aom_highbd_10_variance2x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_10_variance2x4 aom_highbd_10_variance2x4_c + +unsigned int aom_highbd_10_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_10_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_10_variance32x16 aom_highbd_10_variance32x16_sse2 + +unsigned int aom_highbd_10_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_10_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_10_variance32x32 aom_highbd_10_variance32x32_sse2 + +unsigned int aom_highbd_10_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_10_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_10_variance32x64 aom_highbd_10_variance32x64_sse2 + +unsigned int aom_highbd_10_variance4x2_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_10_variance4x2 aom_highbd_10_variance4x2_c + +unsigned int aom_highbd_10_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_10_variance4x4_sse4_1(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_10_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int aom_highbd_10_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_10_variance4x8 aom_highbd_10_variance4x8_c + +unsigned int aom_highbd_10_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_10_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_10_variance64x32 aom_highbd_10_variance64x32_sse2 + +unsigned int aom_highbd_10_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_10_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_10_variance64x64 aom_highbd_10_variance64x64_sse2 + +unsigned int aom_highbd_10_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_10_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_10_variance8x16 aom_highbd_10_variance8x16_sse2 + +unsigned int aom_highbd_10_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_10_variance8x4 aom_highbd_10_variance8x4_c + +unsigned int aom_highbd_10_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_10_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_10_variance8x8 aom_highbd_10_variance8x8_sse2 + +void aom_highbd_12_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define aom_highbd_12_get16x16var aom_highbd_12_get16x16var_c + +void aom_highbd_12_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define aom_highbd_12_get8x8var aom_highbd_12_get8x8var_c + +unsigned int aom_highbd_12_masked_sub_pixel_variance16x16_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_12_masked_sub_pixel_variance16x16_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_masked_sub_pixel_variance16x16)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_12_masked_sub_pixel_variance16x32_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_12_masked_sub_pixel_variance16x32_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_masked_sub_pixel_variance16x32)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_12_masked_sub_pixel_variance16x8_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_12_masked_sub_pixel_variance16x8_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_masked_sub_pixel_variance16x8)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_12_masked_sub_pixel_variance32x16_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_12_masked_sub_pixel_variance32x16_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_masked_sub_pixel_variance32x16)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_12_masked_sub_pixel_variance32x32_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_12_masked_sub_pixel_variance32x32_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_masked_sub_pixel_variance32x32)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_12_masked_sub_pixel_variance32x64_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_12_masked_sub_pixel_variance32x64_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_masked_sub_pixel_variance32x64)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_12_masked_sub_pixel_variance4x4_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_12_masked_sub_pixel_variance4x4_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_masked_sub_pixel_variance4x4)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_12_masked_sub_pixel_variance4x8_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_12_masked_sub_pixel_variance4x8_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_masked_sub_pixel_variance4x8)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_12_masked_sub_pixel_variance64x32_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_12_masked_sub_pixel_variance64x32_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_masked_sub_pixel_variance64x32)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_12_masked_sub_pixel_variance64x64_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_12_masked_sub_pixel_variance64x64_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_masked_sub_pixel_variance64x64)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_12_masked_sub_pixel_variance8x16_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_12_masked_sub_pixel_variance8x16_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_masked_sub_pixel_variance8x16)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_12_masked_sub_pixel_variance8x4_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_12_masked_sub_pixel_variance8x4_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_masked_sub_pixel_variance8x4)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_12_masked_sub_pixel_variance8x8_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_12_masked_sub_pixel_variance8x8_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_masked_sub_pixel_variance8x8)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_12_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int aom_highbd_12_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define aom_highbd_12_mse16x16 aom_highbd_12_mse16x16_sse2 + +unsigned int aom_highbd_12_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define aom_highbd_12_mse16x8 aom_highbd_12_mse16x8_c + +unsigned int aom_highbd_12_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define aom_highbd_12_mse8x16 aom_highbd_12_mse8x16_c + +unsigned int aom_highbd_12_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int aom_highbd_12_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define aom_highbd_12_mse8x8 aom_highbd_12_mse8x8_sse2 + +unsigned int aom_highbd_12_obmc_sub_pixel_variance16x16_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_12_obmc_sub_pixel_variance16x16 aom_highbd_12_obmc_sub_pixel_variance16x16_c + +unsigned int aom_highbd_12_obmc_sub_pixel_variance16x32_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_12_obmc_sub_pixel_variance16x32 aom_highbd_12_obmc_sub_pixel_variance16x32_c + +unsigned int aom_highbd_12_obmc_sub_pixel_variance16x8_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_12_obmc_sub_pixel_variance16x8 aom_highbd_12_obmc_sub_pixel_variance16x8_c + +unsigned int aom_highbd_12_obmc_sub_pixel_variance32x16_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_12_obmc_sub_pixel_variance32x16 aom_highbd_12_obmc_sub_pixel_variance32x16_c + +unsigned int aom_highbd_12_obmc_sub_pixel_variance32x32_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_12_obmc_sub_pixel_variance32x32 aom_highbd_12_obmc_sub_pixel_variance32x32_c + +unsigned int aom_highbd_12_obmc_sub_pixel_variance32x64_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_12_obmc_sub_pixel_variance32x64 aom_highbd_12_obmc_sub_pixel_variance32x64_c + +unsigned int aom_highbd_12_obmc_sub_pixel_variance4x4_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_12_obmc_sub_pixel_variance4x4 aom_highbd_12_obmc_sub_pixel_variance4x4_c + +unsigned int aom_highbd_12_obmc_sub_pixel_variance4x8_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_12_obmc_sub_pixel_variance4x8 aom_highbd_12_obmc_sub_pixel_variance4x8_c + +unsigned int aom_highbd_12_obmc_sub_pixel_variance64x32_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_12_obmc_sub_pixel_variance64x32 aom_highbd_12_obmc_sub_pixel_variance64x32_c + +unsigned int aom_highbd_12_obmc_sub_pixel_variance64x64_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_12_obmc_sub_pixel_variance64x64 aom_highbd_12_obmc_sub_pixel_variance64x64_c + +unsigned int aom_highbd_12_obmc_sub_pixel_variance8x16_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_12_obmc_sub_pixel_variance8x16 aom_highbd_12_obmc_sub_pixel_variance8x16_c + +unsigned int aom_highbd_12_obmc_sub_pixel_variance8x4_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_12_obmc_sub_pixel_variance8x4 aom_highbd_12_obmc_sub_pixel_variance8x4_c + +unsigned int aom_highbd_12_obmc_sub_pixel_variance8x8_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_12_obmc_sub_pixel_variance8x8 aom_highbd_12_obmc_sub_pixel_variance8x8_c + +unsigned int aom_highbd_12_obmc_variance16x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_12_obmc_variance16x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_obmc_variance16x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_12_obmc_variance16x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_12_obmc_variance16x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_obmc_variance16x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_12_obmc_variance16x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_12_obmc_variance16x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_obmc_variance16x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_12_obmc_variance32x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_12_obmc_variance32x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_obmc_variance32x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_12_obmc_variance32x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_12_obmc_variance32x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_obmc_variance32x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_12_obmc_variance32x64_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_12_obmc_variance32x64_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_obmc_variance32x64)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_12_obmc_variance4x4_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_12_obmc_variance4x4_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_obmc_variance4x4)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_12_obmc_variance4x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_12_obmc_variance4x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_obmc_variance4x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_12_obmc_variance64x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_12_obmc_variance64x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_obmc_variance64x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_12_obmc_variance64x64_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_12_obmc_variance64x64_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_obmc_variance64x64)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_12_obmc_variance8x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_12_obmc_variance8x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_obmc_variance8x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_12_obmc_variance8x4_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_12_obmc_variance8x4_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_obmc_variance8x4)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_12_obmc_variance8x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_12_obmc_variance8x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_obmc_variance8x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +uint32_t aom_highbd_12_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_12_sub_pixel_avg_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_12_sub_pixel_avg_variance16x16 aom_highbd_12_sub_pixel_avg_variance16x16_sse2 + +uint32_t aom_highbd_12_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_12_sub_pixel_avg_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_12_sub_pixel_avg_variance16x32 aom_highbd_12_sub_pixel_avg_variance16x32_sse2 + +uint32_t aom_highbd_12_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_12_sub_pixel_avg_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_12_sub_pixel_avg_variance16x8 aom_highbd_12_sub_pixel_avg_variance16x8_sse2 + +uint32_t aom_highbd_12_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_12_sub_pixel_avg_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_12_sub_pixel_avg_variance32x16 aom_highbd_12_sub_pixel_avg_variance32x16_sse2 + +uint32_t aom_highbd_12_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_12_sub_pixel_avg_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_12_sub_pixel_avg_variance32x32 aom_highbd_12_sub_pixel_avg_variance32x32_sse2 + +uint32_t aom_highbd_12_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_12_sub_pixel_avg_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_12_sub_pixel_avg_variance32x64 aom_highbd_12_sub_pixel_avg_variance32x64_sse2 + +uint32_t aom_highbd_12_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_12_sub_pixel_avg_variance4x4_sse4_1(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +RTCD_EXTERN uint32_t (*aom_highbd_12_sub_pixel_avg_variance4x4)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t aom_highbd_12_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_12_sub_pixel_avg_variance4x8 aom_highbd_12_sub_pixel_avg_variance4x8_c + +uint32_t aom_highbd_12_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_12_sub_pixel_avg_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_12_sub_pixel_avg_variance64x32 aom_highbd_12_sub_pixel_avg_variance64x32_sse2 + +uint32_t aom_highbd_12_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_12_sub_pixel_avg_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_12_sub_pixel_avg_variance64x64 aom_highbd_12_sub_pixel_avg_variance64x64_sse2 + +uint32_t aom_highbd_12_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_12_sub_pixel_avg_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_12_sub_pixel_avg_variance8x16 aom_highbd_12_sub_pixel_avg_variance8x16_sse2 + +uint32_t aom_highbd_12_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_12_sub_pixel_avg_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_12_sub_pixel_avg_variance8x4 aom_highbd_12_sub_pixel_avg_variance8x4_sse2 + +uint32_t aom_highbd_12_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_12_sub_pixel_avg_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_12_sub_pixel_avg_variance8x8 aom_highbd_12_sub_pixel_avg_variance8x8_sse2 + +uint32_t aom_highbd_12_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_12_sub_pixel_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_12_sub_pixel_variance16x16 aom_highbd_12_sub_pixel_variance16x16_sse2 + +uint32_t aom_highbd_12_sub_pixel_variance16x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_12_sub_pixel_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_12_sub_pixel_variance16x32 aom_highbd_12_sub_pixel_variance16x32_sse2 + +uint32_t aom_highbd_12_sub_pixel_variance16x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_12_sub_pixel_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_12_sub_pixel_variance16x8 aom_highbd_12_sub_pixel_variance16x8_sse2 + +uint32_t aom_highbd_12_sub_pixel_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_12_sub_pixel_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_12_sub_pixel_variance32x16 aom_highbd_12_sub_pixel_variance32x16_sse2 + +uint32_t aom_highbd_12_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_12_sub_pixel_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_12_sub_pixel_variance32x32 aom_highbd_12_sub_pixel_variance32x32_sse2 + +uint32_t aom_highbd_12_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_12_sub_pixel_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_12_sub_pixel_variance32x64 aom_highbd_12_sub_pixel_variance32x64_sse2 + +uint32_t aom_highbd_12_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_12_sub_pixel_variance4x4_sse4_1(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +RTCD_EXTERN uint32_t (*aom_highbd_12_sub_pixel_variance4x4)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t aom_highbd_12_sub_pixel_variance4x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_12_sub_pixel_variance4x8 aom_highbd_12_sub_pixel_variance4x8_c + +uint32_t aom_highbd_12_sub_pixel_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_12_sub_pixel_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_12_sub_pixel_variance64x32 aom_highbd_12_sub_pixel_variance64x32_sse2 + +uint32_t aom_highbd_12_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_12_sub_pixel_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_12_sub_pixel_variance64x64 aom_highbd_12_sub_pixel_variance64x64_sse2 + +uint32_t aom_highbd_12_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_12_sub_pixel_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_12_sub_pixel_variance8x16 aom_highbd_12_sub_pixel_variance8x16_sse2 + +uint32_t aom_highbd_12_sub_pixel_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_12_sub_pixel_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_12_sub_pixel_variance8x4 aom_highbd_12_sub_pixel_variance8x4_sse2 + +uint32_t aom_highbd_12_sub_pixel_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_12_sub_pixel_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_12_sub_pixel_variance8x8 aom_highbd_12_sub_pixel_variance8x8_sse2 + +unsigned int aom_highbd_12_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_12_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_12_variance16x16 aom_highbd_12_variance16x16_sse2 + +unsigned int aom_highbd_12_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_12_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_12_variance16x32 aom_highbd_12_variance16x32_sse2 + +unsigned int aom_highbd_12_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_12_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_12_variance16x8 aom_highbd_12_variance16x8_sse2 + +unsigned int aom_highbd_12_variance2x2_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_12_variance2x2 aom_highbd_12_variance2x2_c + +unsigned int aom_highbd_12_variance2x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_12_variance2x4 aom_highbd_12_variance2x4_c + +unsigned int aom_highbd_12_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_12_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_12_variance32x16 aom_highbd_12_variance32x16_sse2 + +unsigned int aom_highbd_12_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_12_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_12_variance32x32 aom_highbd_12_variance32x32_sse2 + +unsigned int aom_highbd_12_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_12_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_12_variance32x64 aom_highbd_12_variance32x64_sse2 + +unsigned int aom_highbd_12_variance4x2_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_12_variance4x2 aom_highbd_12_variance4x2_c + +unsigned int aom_highbd_12_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_12_variance4x4_sse4_1(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_12_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int aom_highbd_12_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_12_variance4x8 aom_highbd_12_variance4x8_c + +unsigned int aom_highbd_12_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_12_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_12_variance64x32 aom_highbd_12_variance64x32_sse2 + +unsigned int aom_highbd_12_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_12_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_12_variance64x64 aom_highbd_12_variance64x64_sse2 + +unsigned int aom_highbd_12_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_12_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_12_variance8x16 aom_highbd_12_variance8x16_sse2 + +unsigned int aom_highbd_12_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_12_variance8x4 aom_highbd_12_variance8x4_c + +unsigned int aom_highbd_12_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_12_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_12_variance8x8 aom_highbd_12_variance8x8_sse2 + +void aom_highbd_8_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define aom_highbd_8_get16x16var aom_highbd_8_get16x16var_c + +void aom_highbd_8_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define aom_highbd_8_get8x8var aom_highbd_8_get8x8var_c + +unsigned int aom_highbd_8_masked_sub_pixel_variance16x16_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_8_masked_sub_pixel_variance16x16_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_8_masked_sub_pixel_variance16x16)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_8_masked_sub_pixel_variance16x32_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_8_masked_sub_pixel_variance16x32_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_8_masked_sub_pixel_variance16x32)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_8_masked_sub_pixel_variance16x8_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_8_masked_sub_pixel_variance16x8_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_8_masked_sub_pixel_variance16x8)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_8_masked_sub_pixel_variance32x16_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_8_masked_sub_pixel_variance32x16_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_8_masked_sub_pixel_variance32x16)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_8_masked_sub_pixel_variance32x32_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_8_masked_sub_pixel_variance32x32_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_8_masked_sub_pixel_variance32x32)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_8_masked_sub_pixel_variance32x64_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_8_masked_sub_pixel_variance32x64_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_8_masked_sub_pixel_variance32x64)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_8_masked_sub_pixel_variance4x4_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_8_masked_sub_pixel_variance4x4_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_8_masked_sub_pixel_variance4x4)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_8_masked_sub_pixel_variance4x8_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_8_masked_sub_pixel_variance4x8_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_8_masked_sub_pixel_variance4x8)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_8_masked_sub_pixel_variance64x32_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_8_masked_sub_pixel_variance64x32_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_8_masked_sub_pixel_variance64x32)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_8_masked_sub_pixel_variance64x64_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_8_masked_sub_pixel_variance64x64_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_8_masked_sub_pixel_variance64x64)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_8_masked_sub_pixel_variance8x16_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_8_masked_sub_pixel_variance8x16_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_8_masked_sub_pixel_variance8x16)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_8_masked_sub_pixel_variance8x4_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_8_masked_sub_pixel_variance8x4_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_8_masked_sub_pixel_variance8x4)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_8_masked_sub_pixel_variance8x8_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_highbd_8_masked_sub_pixel_variance8x8_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_8_masked_sub_pixel_variance8x8)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_highbd_8_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int aom_highbd_8_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define aom_highbd_8_mse16x16 aom_highbd_8_mse16x16_sse2 + +unsigned int aom_highbd_8_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define aom_highbd_8_mse16x8 aom_highbd_8_mse16x8_c + +unsigned int aom_highbd_8_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define aom_highbd_8_mse8x16 aom_highbd_8_mse8x16_c + +unsigned int aom_highbd_8_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int aom_highbd_8_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define aom_highbd_8_mse8x8 aom_highbd_8_mse8x8_sse2 + +uint32_t aom_highbd_8_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_8_sub_pixel_avg_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_8_sub_pixel_avg_variance16x16 aom_highbd_8_sub_pixel_avg_variance16x16_sse2 + +uint32_t aom_highbd_8_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_8_sub_pixel_avg_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_8_sub_pixel_avg_variance16x32 aom_highbd_8_sub_pixel_avg_variance16x32_sse2 + +uint32_t aom_highbd_8_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_8_sub_pixel_avg_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_8_sub_pixel_avg_variance16x8 aom_highbd_8_sub_pixel_avg_variance16x8_sse2 + +uint32_t aom_highbd_8_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_8_sub_pixel_avg_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_8_sub_pixel_avg_variance32x16 aom_highbd_8_sub_pixel_avg_variance32x16_sse2 + +uint32_t aom_highbd_8_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_8_sub_pixel_avg_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_8_sub_pixel_avg_variance32x32 aom_highbd_8_sub_pixel_avg_variance32x32_sse2 + +uint32_t aom_highbd_8_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_8_sub_pixel_avg_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_8_sub_pixel_avg_variance32x64 aom_highbd_8_sub_pixel_avg_variance32x64_sse2 + +uint32_t aom_highbd_8_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_8_sub_pixel_avg_variance4x4_sse4_1(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +RTCD_EXTERN uint32_t (*aom_highbd_8_sub_pixel_avg_variance4x4)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t aom_highbd_8_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_8_sub_pixel_avg_variance4x8 aom_highbd_8_sub_pixel_avg_variance4x8_c + +uint32_t aom_highbd_8_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_8_sub_pixel_avg_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_8_sub_pixel_avg_variance64x32 aom_highbd_8_sub_pixel_avg_variance64x32_sse2 + +uint32_t aom_highbd_8_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_8_sub_pixel_avg_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_8_sub_pixel_avg_variance64x64 aom_highbd_8_sub_pixel_avg_variance64x64_sse2 + +uint32_t aom_highbd_8_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_8_sub_pixel_avg_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_8_sub_pixel_avg_variance8x16 aom_highbd_8_sub_pixel_avg_variance8x16_sse2 + +uint32_t aom_highbd_8_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_8_sub_pixel_avg_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_8_sub_pixel_avg_variance8x4 aom_highbd_8_sub_pixel_avg_variance8x4_sse2 + +uint32_t aom_highbd_8_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_highbd_8_sub_pixel_avg_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +#define aom_highbd_8_sub_pixel_avg_variance8x8 aom_highbd_8_sub_pixel_avg_variance8x8_sse2 + +uint32_t aom_highbd_8_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_8_sub_pixel_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_8_sub_pixel_variance16x16 aom_highbd_8_sub_pixel_variance16x16_sse2 + +uint32_t aom_highbd_8_sub_pixel_variance16x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_8_sub_pixel_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_8_sub_pixel_variance16x32 aom_highbd_8_sub_pixel_variance16x32_sse2 + +uint32_t aom_highbd_8_sub_pixel_variance16x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_8_sub_pixel_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_8_sub_pixel_variance16x8 aom_highbd_8_sub_pixel_variance16x8_sse2 + +uint32_t aom_highbd_8_sub_pixel_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_8_sub_pixel_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_8_sub_pixel_variance32x16 aom_highbd_8_sub_pixel_variance32x16_sse2 + +uint32_t aom_highbd_8_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_8_sub_pixel_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_8_sub_pixel_variance32x32 aom_highbd_8_sub_pixel_variance32x32_sse2 + +uint32_t aom_highbd_8_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_8_sub_pixel_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_8_sub_pixel_variance32x64 aom_highbd_8_sub_pixel_variance32x64_sse2 + +uint32_t aom_highbd_8_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_8_sub_pixel_variance4x4_sse4_1(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +RTCD_EXTERN uint32_t (*aom_highbd_8_sub_pixel_variance4x4)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t aom_highbd_8_sub_pixel_variance4x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_8_sub_pixel_variance4x8 aom_highbd_8_sub_pixel_variance4x8_c + +uint32_t aom_highbd_8_sub_pixel_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_8_sub_pixel_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_8_sub_pixel_variance64x32 aom_highbd_8_sub_pixel_variance64x32_sse2 + +uint32_t aom_highbd_8_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_8_sub_pixel_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_8_sub_pixel_variance64x64 aom_highbd_8_sub_pixel_variance64x64_sse2 + +uint32_t aom_highbd_8_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_8_sub_pixel_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_8_sub_pixel_variance8x16 aom_highbd_8_sub_pixel_variance8x16_sse2 + +uint32_t aom_highbd_8_sub_pixel_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_8_sub_pixel_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_8_sub_pixel_variance8x4 aom_highbd_8_sub_pixel_variance8x4_sse2 + +uint32_t aom_highbd_8_sub_pixel_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_highbd_8_sub_pixel_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_highbd_8_sub_pixel_variance8x8 aom_highbd_8_sub_pixel_variance8x8_sse2 + +unsigned int aom_highbd_8_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_8_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_8_variance16x16 aom_highbd_8_variance16x16_sse2 + +unsigned int aom_highbd_8_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_8_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_8_variance16x32 aom_highbd_8_variance16x32_sse2 + +unsigned int aom_highbd_8_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_8_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_8_variance16x8 aom_highbd_8_variance16x8_sse2 + +unsigned int aom_highbd_8_variance2x2_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_8_variance2x2 aom_highbd_8_variance2x2_c + +unsigned int aom_highbd_8_variance2x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_8_variance2x4 aom_highbd_8_variance2x4_c + +unsigned int aom_highbd_8_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_8_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_8_variance32x16 aom_highbd_8_variance32x16_sse2 + +unsigned int aom_highbd_8_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_8_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_8_variance32x32 aom_highbd_8_variance32x32_sse2 + +unsigned int aom_highbd_8_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_8_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_8_variance32x64 aom_highbd_8_variance32x64_sse2 + +unsigned int aom_highbd_8_variance4x2_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_8_variance4x2 aom_highbd_8_variance4x2_c + +unsigned int aom_highbd_8_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_8_variance4x4_sse4_1(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_8_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int aom_highbd_8_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_8_variance4x8 aom_highbd_8_variance4x8_c + +unsigned int aom_highbd_8_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_8_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_8_variance64x32 aom_highbd_8_variance64x32_sse2 + +unsigned int aom_highbd_8_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_8_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_8_variance64x64 aom_highbd_8_variance64x64_sse2 + +unsigned int aom_highbd_8_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_8_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_8_variance8x16 aom_highbd_8_variance8x16_sse2 + +unsigned int aom_highbd_8_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_8_variance8x4 aom_highbd_8_variance8x4_c + +unsigned int aom_highbd_8_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_highbd_8_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_highbd_8_variance8x8 aom_highbd_8_variance8x8_sse2 + +void aom_highbd_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd); +void aom_highbd_blend_a64_hmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd); +RTCD_EXTERN void (*aom_highbd_blend_a64_hmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd); + +void aom_highbd_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, int bd); +void aom_highbd_blend_a64_mask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, int bd); +RTCD_EXTERN void (*aom_highbd_blend_a64_mask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, int bd); + +void aom_highbd_blend_a64_vmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd); +void aom_highbd_blend_a64_vmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd); +RTCD_EXTERN void (*aom_highbd_blend_a64_vmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd); + +void aom_highbd_comp_avg_pred_c(uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride); +#define aom_highbd_comp_avg_pred aom_highbd_comp_avg_pred_c + +void aom_highbd_comp_avg_upsampled_pred_c(uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride); +void aom_highbd_comp_avg_upsampled_pred_sse2(uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride); +#define aom_highbd_comp_avg_upsampled_pred aom_highbd_comp_avg_upsampled_pred_sse2 + +void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask); +#define aom_highbd_comp_mask_pred aom_highbd_comp_mask_pred_c + +void aom_highbd_comp_mask_upsampled_pred_c(uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask); +#define aom_highbd_comp_mask_upsampled_pred aom_highbd_comp_mask_upsampled_pred_c + +void aom_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +RTCD_EXTERN void (*aom_highbd_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); + +void aom_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_avg_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +RTCD_EXTERN void (*aom_highbd_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); + +void aom_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_avg_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +RTCD_EXTERN void (*aom_highbd_convolve8_avg_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); + +void aom_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_avg_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +RTCD_EXTERN void (*aom_highbd_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); + +void aom_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +RTCD_EXTERN void (*aom_highbd_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); + +void aom_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +RTCD_EXTERN void (*aom_highbd_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); + +void aom_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve_avg_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +RTCD_EXTERN void (*aom_highbd_convolve_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); + +void aom_highbd_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve_copy_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +RTCD_EXTERN void (*aom_highbd_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); + +void aom_highbd_d117_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d117_predictor_16x16 aom_highbd_d117_predictor_16x16_c + +void aom_highbd_d117_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d117_predictor_2x2 aom_highbd_d117_predictor_2x2_c + +void aom_highbd_d117_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d117_predictor_32x32 aom_highbd_d117_predictor_32x32_c + +void aom_highbd_d117_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d117_predictor_4x4 aom_highbd_d117_predictor_4x4_c + +void aom_highbd_d117_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d117_predictor_8x8 aom_highbd_d117_predictor_8x8_c + +void aom_highbd_d135_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d135_predictor_16x16 aom_highbd_d135_predictor_16x16_c + +void aom_highbd_d135_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d135_predictor_2x2 aom_highbd_d135_predictor_2x2_c + +void aom_highbd_d135_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d135_predictor_32x32 aom_highbd_d135_predictor_32x32_c + +void aom_highbd_d135_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d135_predictor_4x4 aom_highbd_d135_predictor_4x4_c + +void aom_highbd_d135_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d135_predictor_8x8 aom_highbd_d135_predictor_8x8_c + +void aom_highbd_d153_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d153_predictor_16x16 aom_highbd_d153_predictor_16x16_c + +void aom_highbd_d153_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d153_predictor_2x2 aom_highbd_d153_predictor_2x2_c + +void aom_highbd_d153_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d153_predictor_32x32 aom_highbd_d153_predictor_32x32_c + +void aom_highbd_d153_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d153_predictor_4x4 aom_highbd_d153_predictor_4x4_c + +void aom_highbd_d153_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d153_predictor_8x8 aom_highbd_d153_predictor_8x8_c + +void aom_highbd_d207e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d207e_predictor_16x16 aom_highbd_d207e_predictor_16x16_c + +void aom_highbd_d207e_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d207e_predictor_2x2 aom_highbd_d207e_predictor_2x2_c + +void aom_highbd_d207e_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d207e_predictor_32x32 aom_highbd_d207e_predictor_32x32_c + +void aom_highbd_d207e_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d207e_predictor_4x4 aom_highbd_d207e_predictor_4x4_c + +void aom_highbd_d207e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d207e_predictor_8x8 aom_highbd_d207e_predictor_8x8_c + +void aom_highbd_d45e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_16x16 aom_highbd_d45e_predictor_16x16_c + +void aom_highbd_d45e_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_2x2 aom_highbd_d45e_predictor_2x2_c + +void aom_highbd_d45e_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_32x32 aom_highbd_d45e_predictor_32x32_c + +void aom_highbd_d45e_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_4x4 aom_highbd_d45e_predictor_4x4_c + +void aom_highbd_d45e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_8x8 aom_highbd_d45e_predictor_8x8_c + +void aom_highbd_d63e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d63e_predictor_16x16 aom_highbd_d63e_predictor_16x16_c + +void aom_highbd_d63e_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d63e_predictor_2x2 aom_highbd_d63e_predictor_2x2_c + +void aom_highbd_d63e_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d63e_predictor_32x32 aom_highbd_d63e_predictor_32x32_c + +void aom_highbd_d63e_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d63e_predictor_4x4 aom_highbd_d63e_predictor_4x4_c + +void aom_highbd_d63e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d63e_predictor_8x8 aom_highbd_d63e_predictor_8x8_c + +void aom_highbd_dc_128_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_16x16 aom_highbd_dc_128_predictor_16x16_c + +void aom_highbd_dc_128_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_2x2 aom_highbd_dc_128_predictor_2x2_c + +void aom_highbd_dc_128_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_32x32 aom_highbd_dc_128_predictor_32x32_c + +void aom_highbd_dc_128_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_4x4 aom_highbd_dc_128_predictor_4x4_c + +void aom_highbd_dc_128_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_8x8 aom_highbd_dc_128_predictor_8x8_c + +void aom_highbd_dc_left_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_16x16 aom_highbd_dc_left_predictor_16x16_c + +void aom_highbd_dc_left_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_2x2 aom_highbd_dc_left_predictor_2x2_c + +void aom_highbd_dc_left_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_32x32 aom_highbd_dc_left_predictor_32x32_c + +void aom_highbd_dc_left_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_4x4 aom_highbd_dc_left_predictor_4x4_c + +void aom_highbd_dc_left_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_8x8 aom_highbd_dc_left_predictor_8x8_c + +void aom_highbd_dc_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +void aom_highbd_dc_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_16x16 aom_highbd_dc_predictor_16x16_sse2 + +void aom_highbd_dc_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_2x2 aom_highbd_dc_predictor_2x2_c + +void aom_highbd_dc_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +void aom_highbd_dc_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_32x32 aom_highbd_dc_predictor_32x32_sse2 + +void aom_highbd_dc_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +void aom_highbd_dc_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_4x4 aom_highbd_dc_predictor_4x4_sse2 + +void aom_highbd_dc_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +void aom_highbd_dc_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_8x8 aom_highbd_dc_predictor_8x8_sse2 + +void aom_highbd_dc_top_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_16x16 aom_highbd_dc_top_predictor_16x16_c + +void aom_highbd_dc_top_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_2x2 aom_highbd_dc_top_predictor_2x2_c + +void aom_highbd_dc_top_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_32x32 aom_highbd_dc_top_predictor_32x32_c + +void aom_highbd_dc_top_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_4x4 aom_highbd_dc_top_predictor_4x4_c + +void aom_highbd_dc_top_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_8x8 aom_highbd_dc_top_predictor_8x8_c + +void aom_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride); +void aom_highbd_fdct16x16_sse2(const int16_t *input, tran_low_t *output, int stride); +#define aom_highbd_fdct16x16 aom_highbd_fdct16x16_sse2 + +void aom_highbd_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride); +#define aom_highbd_fdct16x16_1 aom_highbd_fdct16x16_1_c + +void aom_highbd_fdct32x32_c(const int16_t *input, tran_low_t *output, int stride); +void aom_highbd_fdct32x32_sse2(const int16_t *input, tran_low_t *output, int stride); +#define aom_highbd_fdct32x32 aom_highbd_fdct32x32_sse2 + +void aom_highbd_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride); +#define aom_highbd_fdct32x32_1 aom_highbd_fdct32x32_1_c + +void aom_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *output, int stride); +void aom_highbd_fdct32x32_rd_sse2(const int16_t *input, tran_low_t *output, int stride); +#define aom_highbd_fdct32x32_rd aom_highbd_fdct32x32_rd_sse2 + +void aom_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride); +void aom_highbd_fdct4x4_sse2(const int16_t *input, tran_low_t *output, int stride); +#define aom_highbd_fdct4x4 aom_highbd_fdct4x4_sse2 + +void aom_highbd_fdct8x8_c(const int16_t *input, tran_low_t *output, int stride); +void aom_highbd_fdct8x8_sse2(const int16_t *input, tran_low_t *output, int stride); +#define aom_highbd_fdct8x8 aom_highbd_fdct8x8_sse2 + +void aom_highbd_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride); +#define aom_highbd_fdct8x8_1 aom_highbd_fdct8x8_1_c + +void aom_highbd_h_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_16x16 aom_highbd_h_predictor_16x16_c + +void aom_highbd_h_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_2x2 aom_highbd_h_predictor_2x2_c + +void aom_highbd_h_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_32x32 aom_highbd_h_predictor_32x32_c + +void aom_highbd_h_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_4x4 aom_highbd_h_predictor_4x4_c + +void aom_highbd_h_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_8x8 aom_highbd_h_predictor_8x8_c + +void aom_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); +#define aom_highbd_idct16x16_1_add aom_highbd_idct16x16_1_add_c + +void aom_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); +#define aom_highbd_idct32x32_1024_add aom_highbd_idct32x32_1024_add_c + +void aom_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); +#define aom_highbd_idct32x32_1_add aom_highbd_idct32x32_1_add_c + +void aom_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); +#define aom_highbd_idct32x32_34_add aom_highbd_idct32x32_34_add_c + +void aom_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); +void aom_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); +#define aom_highbd_idct4x4_16_add aom_highbd_idct4x4_16_add_sse2 + +void aom_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); +#define aom_highbd_idct4x4_1_add aom_highbd_idct4x4_1_add_c + +void aom_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); +#define aom_highbd_idct8x8_1_add aom_highbd_idct8x8_1_add_c + +void aom_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); +#define aom_highbd_iwht4x4_16_add aom_highbd_iwht4x4_16_add_c + +void aom_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); +#define aom_highbd_iwht4x4_1_add aom_highbd_iwht4x4_1_add_c + +void aom_highbd_lpf_horizontal_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +#define aom_highbd_lpf_horizontal_4 aom_highbd_lpf_horizontal_4_sse2 + +void aom_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +void aom_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +#define aom_highbd_lpf_horizontal_4_dual aom_highbd_lpf_horizontal_4_dual_sse2 + +void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +#define aom_highbd_lpf_horizontal_8 aom_highbd_lpf_horizontal_8_sse2 + +void aom_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +void aom_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +#define aom_highbd_lpf_horizontal_8_dual aom_highbd_lpf_horizontal_8_dual_sse2 + +void aom_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +void aom_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +#define aom_highbd_lpf_horizontal_edge_16 aom_highbd_lpf_horizontal_edge_16_sse2 + +void aom_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +void aom_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +#define aom_highbd_lpf_horizontal_edge_8 aom_highbd_lpf_horizontal_edge_8_sse2 + +void aom_highbd_lpf_vertical_16_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +void aom_highbd_lpf_vertical_16_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +#define aom_highbd_lpf_vertical_16 aom_highbd_lpf_vertical_16_sse2 + +void aom_highbd_lpf_vertical_16_dual_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +void aom_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +#define aom_highbd_lpf_vertical_16_dual aom_highbd_lpf_vertical_16_dual_sse2 + +void aom_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +void aom_highbd_lpf_vertical_4_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +#define aom_highbd_lpf_vertical_4 aom_highbd_lpf_vertical_4_sse2 + +void aom_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +void aom_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +#define aom_highbd_lpf_vertical_4_dual aom_highbd_lpf_vertical_4_dual_sse2 + +void aom_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +void aom_highbd_lpf_vertical_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +#define aom_highbd_lpf_vertical_8 aom_highbd_lpf_vertical_8_sse2 + +void aom_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +void aom_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +#define aom_highbd_lpf_vertical_8_dual aom_highbd_lpf_vertical_8_dual_sse2 + +unsigned int aom_highbd_masked_sad16x16_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_highbd_masked_sad16x16_ssse3(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_highbd_masked_sad16x16)(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_highbd_masked_sad16x32_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_highbd_masked_sad16x32_ssse3(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_highbd_masked_sad16x32)(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_highbd_masked_sad16x8_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_highbd_masked_sad16x8_ssse3(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_highbd_masked_sad16x8)(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_highbd_masked_sad32x16_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_highbd_masked_sad32x16_ssse3(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_highbd_masked_sad32x16)(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_highbd_masked_sad32x32_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_highbd_masked_sad32x32_ssse3(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_highbd_masked_sad32x32)(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_highbd_masked_sad32x64_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_highbd_masked_sad32x64_ssse3(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_highbd_masked_sad32x64)(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_highbd_masked_sad4x4_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_highbd_masked_sad4x4_ssse3(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_highbd_masked_sad4x4)(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_highbd_masked_sad4x8_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_highbd_masked_sad4x8_ssse3(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_highbd_masked_sad4x8)(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_highbd_masked_sad64x32_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_highbd_masked_sad64x32_ssse3(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_highbd_masked_sad64x32)(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_highbd_masked_sad64x64_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_highbd_masked_sad64x64_ssse3(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_highbd_masked_sad64x64)(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_highbd_masked_sad8x16_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_highbd_masked_sad8x16_ssse3(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_highbd_masked_sad8x16)(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_highbd_masked_sad8x4_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_highbd_masked_sad8x4_ssse3(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_highbd_masked_sad8x4)(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_highbd_masked_sad8x8_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_highbd_masked_sad8x8_ssse3(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_highbd_masked_sad8x8)(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); + +void aom_highbd_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +#define aom_highbd_minmax_8x8 aom_highbd_minmax_8x8_c + +unsigned int aom_highbd_obmc_sad16x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_highbd_obmc_sad16x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_sad16x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_highbd_obmc_sad16x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_highbd_obmc_sad16x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_sad16x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_highbd_obmc_sad16x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_highbd_obmc_sad16x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_sad16x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_highbd_obmc_sad32x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_highbd_obmc_sad32x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_sad32x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_highbd_obmc_sad32x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_highbd_obmc_sad32x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_sad32x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_highbd_obmc_sad32x64_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_highbd_obmc_sad32x64_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_sad32x64)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_highbd_obmc_sad4x4_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_highbd_obmc_sad4x4_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_sad4x4)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_highbd_obmc_sad4x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_highbd_obmc_sad4x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_sad4x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_highbd_obmc_sad64x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_highbd_obmc_sad64x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_sad64x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_highbd_obmc_sad64x64_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_highbd_obmc_sad64x64_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_sad64x64)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_highbd_obmc_sad8x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_highbd_obmc_sad8x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_sad8x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_highbd_obmc_sad8x4_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_highbd_obmc_sad8x4_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_sad8x4)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_highbd_obmc_sad8x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_highbd_obmc_sad8x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_sad8x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_highbd_obmc_sub_pixel_variance16x16_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_obmc_sub_pixel_variance16x16 aom_highbd_obmc_sub_pixel_variance16x16_c + +unsigned int aom_highbd_obmc_sub_pixel_variance16x32_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_obmc_sub_pixel_variance16x32 aom_highbd_obmc_sub_pixel_variance16x32_c + +unsigned int aom_highbd_obmc_sub_pixel_variance16x8_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_obmc_sub_pixel_variance16x8 aom_highbd_obmc_sub_pixel_variance16x8_c + +unsigned int aom_highbd_obmc_sub_pixel_variance32x16_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_obmc_sub_pixel_variance32x16 aom_highbd_obmc_sub_pixel_variance32x16_c + +unsigned int aom_highbd_obmc_sub_pixel_variance32x32_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_obmc_sub_pixel_variance32x32 aom_highbd_obmc_sub_pixel_variance32x32_c + +unsigned int aom_highbd_obmc_sub_pixel_variance32x64_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_obmc_sub_pixel_variance32x64 aom_highbd_obmc_sub_pixel_variance32x64_c + +unsigned int aom_highbd_obmc_sub_pixel_variance4x4_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_obmc_sub_pixel_variance4x4 aom_highbd_obmc_sub_pixel_variance4x4_c + +unsigned int aom_highbd_obmc_sub_pixel_variance4x8_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_obmc_sub_pixel_variance4x8 aom_highbd_obmc_sub_pixel_variance4x8_c + +unsigned int aom_highbd_obmc_sub_pixel_variance64x32_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_obmc_sub_pixel_variance64x32 aom_highbd_obmc_sub_pixel_variance64x32_c + +unsigned int aom_highbd_obmc_sub_pixel_variance64x64_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_obmc_sub_pixel_variance64x64 aom_highbd_obmc_sub_pixel_variance64x64_c + +unsigned int aom_highbd_obmc_sub_pixel_variance8x16_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_obmc_sub_pixel_variance8x16 aom_highbd_obmc_sub_pixel_variance8x16_c + +unsigned int aom_highbd_obmc_sub_pixel_variance8x4_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_obmc_sub_pixel_variance8x4 aom_highbd_obmc_sub_pixel_variance8x4_c + +unsigned int aom_highbd_obmc_sub_pixel_variance8x8_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_highbd_obmc_sub_pixel_variance8x8 aom_highbd_obmc_sub_pixel_variance8x8_c + +unsigned int aom_highbd_obmc_variance16x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_obmc_variance16x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_variance16x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_obmc_variance16x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_obmc_variance16x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_variance16x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_obmc_variance16x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_obmc_variance16x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_variance16x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_obmc_variance32x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_obmc_variance32x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_variance32x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_obmc_variance32x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_obmc_variance32x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_variance32x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_obmc_variance32x64_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_obmc_variance32x64_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_variance32x64)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_obmc_variance4x4_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_obmc_variance4x4_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_variance4x4)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_obmc_variance4x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_obmc_variance4x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_variance4x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_obmc_variance64x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_obmc_variance64x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_variance64x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_obmc_variance64x64_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_obmc_variance64x64_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_variance64x64)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_obmc_variance8x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_obmc_variance8x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_variance8x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_obmc_variance8x4_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_obmc_variance8x4_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_variance8x4)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_highbd_obmc_variance8x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_highbd_obmc_variance8x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_highbd_obmc_variance8x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +void aom_highbd_paeth_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_paeth_predictor_16x16 aom_highbd_paeth_predictor_16x16_c + +void aom_highbd_paeth_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_paeth_predictor_2x2 aom_highbd_paeth_predictor_2x2_c + +void aom_highbd_paeth_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_paeth_predictor_32x32 aom_highbd_paeth_predictor_32x32_c + +void aom_highbd_paeth_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_paeth_predictor_4x4 aom_highbd_paeth_predictor_4x4_c + +void aom_highbd_paeth_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_paeth_predictor_8x8 aom_highbd_paeth_predictor_8x8_c + +void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void aom_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +#define aom_highbd_quantize_b aom_highbd_quantize_b_sse2 + +void aom_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void aom_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +#define aom_highbd_quantize_b_32x32 aom_highbd_quantize_b_32x32_sse2 + +void aom_highbd_quantize_b_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +#define aom_highbd_quantize_b_64x64 aom_highbd_quantize_b_64x64_c + +unsigned int aom_highbd_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad16x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*aom_highbd_sad16x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int aom_highbd_sad16x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad16x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad16x16_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*aom_highbd_sad16x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void aom_highbd_sad16x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad16x16x3 aom_highbd_sad16x16x3_c + +void aom_highbd_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad16x16x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_highbd_sad16x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void aom_highbd_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad16x16x8 aom_highbd_sad16x16x8_c + +unsigned int aom_highbd_sad16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad16x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad16x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*aom_highbd_sad16x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int aom_highbd_sad16x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad16x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad16x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*aom_highbd_sad16x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void aom_highbd_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad16x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_highbd_sad16x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int aom_highbd_sad16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad16x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad16x8_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*aom_highbd_sad16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int aom_highbd_sad16x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad16x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad16x8_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*aom_highbd_sad16x8_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void aom_highbd_sad16x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad16x8x3 aom_highbd_sad16x8x3_c + +void aom_highbd_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad16x8x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_highbd_sad16x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void aom_highbd_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad16x8x8 aom_highbd_sad16x8x8_c + +unsigned int aom_highbd_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*aom_highbd_sad32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int aom_highbd_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad32x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad32x16_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*aom_highbd_sad32x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void aom_highbd_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad32x16x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_highbd_sad32x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int aom_highbd_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*aom_highbd_sad32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int aom_highbd_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad32x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad32x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*aom_highbd_sad32x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void aom_highbd_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad32x32x3 aom_highbd_sad32x32x3_c + +void aom_highbd_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_highbd_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void aom_highbd_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad32x32x8 aom_highbd_sad32x32x8_c + +unsigned int aom_highbd_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*aom_highbd_sad32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int aom_highbd_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad32x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad32x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*aom_highbd_sad32x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void aom_highbd_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad32x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_highbd_sad32x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int aom_highbd_sad4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define aom_highbd_sad4x4 aom_highbd_sad4x4_c + +unsigned int aom_highbd_sad4x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define aom_highbd_sad4x4_avg aom_highbd_sad4x4_avg_c + +void aom_highbd_sad4x4x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad4x4x3 aom_highbd_sad4x4x3_c + +void aom_highbd_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad4x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad4x4x4d aom_highbd_sad4x4x4d_sse2 + +void aom_highbd_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad4x4x8 aom_highbd_sad4x4x8_c + +unsigned int aom_highbd_sad4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define aom_highbd_sad4x8 aom_highbd_sad4x8_c + +unsigned int aom_highbd_sad4x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define aom_highbd_sad4x8_avg aom_highbd_sad4x8_avg_c + +void aom_highbd_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad4x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad4x8x4d aom_highbd_sad4x8x4d_sse2 + +void aom_highbd_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad4x8x8 aom_highbd_sad4x8x8_c + +unsigned int aom_highbd_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*aom_highbd_sad64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int aom_highbd_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad64x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad64x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*aom_highbd_sad64x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void aom_highbd_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad64x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_highbd_sad64x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int aom_highbd_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*aom_highbd_sad64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int aom_highbd_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad64x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad64x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*aom_highbd_sad64x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void aom_highbd_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad64x64x3 aom_highbd_sad64x64x3_c + +void aom_highbd_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_highbd_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void aom_highbd_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad64x64x8 aom_highbd_sad64x64x8_c + +unsigned int aom_highbd_sad8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad8x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define aom_highbd_sad8x16 aom_highbd_sad8x16_sse2 + +unsigned int aom_highbd_sad8x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad8x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define aom_highbd_sad8x16_avg aom_highbd_sad8x16_avg_sse2 + +void aom_highbd_sad8x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad8x16x3 aom_highbd_sad8x16x3_c + +void aom_highbd_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad8x16x4d aom_highbd_sad8x16x4d_sse2 + +void aom_highbd_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad8x16x8 aom_highbd_sad8x16x8_c + +unsigned int aom_highbd_sad8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad8x4_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define aom_highbd_sad8x4 aom_highbd_sad8x4_sse2 + +unsigned int aom_highbd_sad8x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad8x4_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define aom_highbd_sad8x4_avg aom_highbd_sad8x4_avg_sse2 + +void aom_highbd_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad8x4x4d aom_highbd_sad8x4x4d_sse2 + +void aom_highbd_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad8x4x8 aom_highbd_sad8x4x8_c + +unsigned int aom_highbd_sad8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_highbd_sad8x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define aom_highbd_sad8x8 aom_highbd_sad8x8_sse2 + +unsigned int aom_highbd_sad8x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_highbd_sad8x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define aom_highbd_sad8x8_avg aom_highbd_sad8x8_avg_sse2 + +void aom_highbd_sad8x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad8x8x3 aom_highbd_sad8x8x3_c + +void aom_highbd_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_highbd_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad8x8x4d aom_highbd_sad8x8x4d_sse2 + +void aom_highbd_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_highbd_sad8x8x8 aom_highbd_sad8x8x8_c + +void aom_highbd_smooth_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_smooth_predictor_16x16 aom_highbd_smooth_predictor_16x16_c + +void aom_highbd_smooth_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_smooth_predictor_2x2 aom_highbd_smooth_predictor_2x2_c + +void aom_highbd_smooth_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_smooth_predictor_32x32 aom_highbd_smooth_predictor_32x32_c + +void aom_highbd_smooth_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_smooth_predictor_4x4 aom_highbd_smooth_predictor_4x4_c + +void aom_highbd_smooth_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_smooth_predictor_8x8 aom_highbd_smooth_predictor_8x8_c + +void aom_highbd_subtract_block_c(int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd); +void aom_highbd_subtract_block_sse2(int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd); +#define aom_highbd_subtract_block aom_highbd_subtract_block_sse2 + +void aom_highbd_upsampled_pred_c(uint16_t *comp_pred, int width, int height, const uint8_t *ref8, int ref_stride); +void aom_highbd_upsampled_pred_sse2(uint16_t *comp_pred, int width, int height, const uint8_t *ref8, int ref_stride); +#define aom_highbd_upsampled_pred aom_highbd_upsampled_pred_sse2 + +void aom_highbd_v_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +void aom_highbd_v_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_16x16 aom_highbd_v_predictor_16x16_sse2 + +void aom_highbd_v_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_2x2 aom_highbd_v_predictor_2x2_c + +void aom_highbd_v_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +void aom_highbd_v_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_32x32 aom_highbd_v_predictor_32x32_sse2 + +void aom_highbd_v_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +void aom_highbd_v_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_4x4 aom_highbd_v_predictor_4x4_sse2 + +void aom_highbd_v_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +void aom_highbd_v_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_8x8 aom_highbd_v_predictor_8x8_sse2 + void aom_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define aom_idct16x16_10_add aom_idct16x16_10_add_sse2 +void aom_idct16x16_10_add_avx2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*aom_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define aom_idct16x16_1_add aom_idct16x16_1_add_sse2 +void aom_idct16x16_1_add_avx2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*aom_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define aom_idct16x16_256_add aom_idct16x16_256_add_sse2 +void aom_idct16x16_256_add_avx2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*aom_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct16x16_38_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define aom_idct16x16_38_add aom_idct16x16_38_add_c +void aom_idct16x16_38_add_avx2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*aom_idct16x16_38_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct32x32_1024_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct32x32_1024_add_avx2(const tran_low_t *input, uint8_t *dest, int dest_stride); RTCD_EXTERN void (*aom_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct32x32_135_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct32x32_135_add_avx2(const tran_low_t *input, uint8_t *dest, int dest_stride); RTCD_EXTERN void (*aom_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define aom_idct32x32_1_add aom_idct32x32_1_add_sse2 +void aom_idct32x32_1_add_avx2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*aom_idct32x32_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct32x32_34_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct32x32_34_add_avx2(const tran_low_t *input, uint8_t *dest, int dest_stride); RTCD_EXTERN void (*aom_idct32x32_34_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); void aom_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); @@ -465,6 +2181,110 @@ void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, co void aom_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); #define aom_lpf_vertical_8_dual aom_lpf_vertical_8_dual_sse2 +unsigned int aom_masked_sad16x16_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_masked_sad16x16_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_masked_sad16x16)(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_masked_sad16x32_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_masked_sad16x32_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_masked_sad16x32)(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_masked_sad16x8_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_masked_sad16x8_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_masked_sad16x8)(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_masked_sad32x16_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_masked_sad32x16_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_masked_sad32x16)(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_masked_sad32x32_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_masked_sad32x32_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_masked_sad32x32)(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_masked_sad32x64_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_masked_sad32x64_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_masked_sad32x64)(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_masked_sad4x4_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_masked_sad4x4_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_masked_sad4x4)(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_masked_sad4x8_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_masked_sad4x8_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_masked_sad4x8)(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_masked_sad64x32_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_masked_sad64x32_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_masked_sad64x32)(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_masked_sad64x64_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_masked_sad64x64_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_masked_sad64x64)(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_masked_sad8x16_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_masked_sad8x16_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_masked_sad8x16)(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_masked_sad8x4_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_masked_sad8x4_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_masked_sad8x4)(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_masked_sad8x8_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +unsigned int aom_masked_sad8x8_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); +RTCD_EXTERN unsigned int (*aom_masked_sad8x8)(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); + +unsigned int aom_masked_sub_pixel_variance16x16_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_masked_sub_pixel_variance16x16_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_masked_sub_pixel_variance16x16)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_masked_sub_pixel_variance16x32_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_masked_sub_pixel_variance16x32_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_masked_sub_pixel_variance16x32)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_masked_sub_pixel_variance16x8_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_masked_sub_pixel_variance16x8_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_masked_sub_pixel_variance16x8)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_masked_sub_pixel_variance32x16_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_masked_sub_pixel_variance32x16_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_masked_sub_pixel_variance32x16)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_masked_sub_pixel_variance32x32_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_masked_sub_pixel_variance32x32_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_masked_sub_pixel_variance32x32)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_masked_sub_pixel_variance32x64_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_masked_sub_pixel_variance32x64_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_masked_sub_pixel_variance32x64)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_masked_sub_pixel_variance4x4_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_masked_sub_pixel_variance4x4_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_masked_sub_pixel_variance4x4)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_masked_sub_pixel_variance4x8_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_masked_sub_pixel_variance4x8_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_masked_sub_pixel_variance4x8)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_masked_sub_pixel_variance64x32_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_masked_sub_pixel_variance64x32_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_masked_sub_pixel_variance64x32)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_masked_sub_pixel_variance64x64_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_masked_sub_pixel_variance64x64_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_masked_sub_pixel_variance64x64)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_masked_sub_pixel_variance8x16_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_masked_sub_pixel_variance8x16_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_masked_sub_pixel_variance8x16)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_masked_sub_pixel_variance8x4_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_masked_sub_pixel_variance8x4_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_masked_sub_pixel_variance8x4)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + +unsigned int aom_masked_sub_pixel_variance8x8_c(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +unsigned int aom_masked_sub_pixel_variance8x8_ssse3(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_masked_sub_pixel_variance8x8)(const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse); + void aom_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); void aom_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); #define aom_minmax_8x8 aom_minmax_8x8_sse2 @@ -1232,12 +3052,321 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) aom_get16x16var = aom_get16x16var_avx2; aom_hadamard_8x8 = aom_hadamard_8x8_sse2; if (flags & HAS_SSSE3) aom_hadamard_8x8 = aom_hadamard_8x8_ssse3; + aom_highbd_10_masked_sub_pixel_variance16x16 = aom_highbd_10_masked_sub_pixel_variance16x16_c; + if (flags & HAS_SSSE3) aom_highbd_10_masked_sub_pixel_variance16x16 = aom_highbd_10_masked_sub_pixel_variance16x16_ssse3; + aom_highbd_10_masked_sub_pixel_variance16x32 = aom_highbd_10_masked_sub_pixel_variance16x32_c; + if (flags & HAS_SSSE3) aom_highbd_10_masked_sub_pixel_variance16x32 = aom_highbd_10_masked_sub_pixel_variance16x32_ssse3; + aom_highbd_10_masked_sub_pixel_variance16x8 = aom_highbd_10_masked_sub_pixel_variance16x8_c; + if (flags & HAS_SSSE3) aom_highbd_10_masked_sub_pixel_variance16x8 = aom_highbd_10_masked_sub_pixel_variance16x8_ssse3; + aom_highbd_10_masked_sub_pixel_variance32x16 = aom_highbd_10_masked_sub_pixel_variance32x16_c; + if (flags & HAS_SSSE3) aom_highbd_10_masked_sub_pixel_variance32x16 = aom_highbd_10_masked_sub_pixel_variance32x16_ssse3; + aom_highbd_10_masked_sub_pixel_variance32x32 = aom_highbd_10_masked_sub_pixel_variance32x32_c; + if (flags & HAS_SSSE3) aom_highbd_10_masked_sub_pixel_variance32x32 = aom_highbd_10_masked_sub_pixel_variance32x32_ssse3; + aom_highbd_10_masked_sub_pixel_variance32x64 = aom_highbd_10_masked_sub_pixel_variance32x64_c; + if (flags & HAS_SSSE3) aom_highbd_10_masked_sub_pixel_variance32x64 = aom_highbd_10_masked_sub_pixel_variance32x64_ssse3; + aom_highbd_10_masked_sub_pixel_variance4x4 = aom_highbd_10_masked_sub_pixel_variance4x4_c; + if (flags & HAS_SSSE3) aom_highbd_10_masked_sub_pixel_variance4x4 = aom_highbd_10_masked_sub_pixel_variance4x4_ssse3; + aom_highbd_10_masked_sub_pixel_variance4x8 = aom_highbd_10_masked_sub_pixel_variance4x8_c; + if (flags & HAS_SSSE3) aom_highbd_10_masked_sub_pixel_variance4x8 = aom_highbd_10_masked_sub_pixel_variance4x8_ssse3; + aom_highbd_10_masked_sub_pixel_variance64x32 = aom_highbd_10_masked_sub_pixel_variance64x32_c; + if (flags & HAS_SSSE3) aom_highbd_10_masked_sub_pixel_variance64x32 = aom_highbd_10_masked_sub_pixel_variance64x32_ssse3; + aom_highbd_10_masked_sub_pixel_variance64x64 = aom_highbd_10_masked_sub_pixel_variance64x64_c; + if (flags & HAS_SSSE3) aom_highbd_10_masked_sub_pixel_variance64x64 = aom_highbd_10_masked_sub_pixel_variance64x64_ssse3; + aom_highbd_10_masked_sub_pixel_variance8x16 = aom_highbd_10_masked_sub_pixel_variance8x16_c; + if (flags & HAS_SSSE3) aom_highbd_10_masked_sub_pixel_variance8x16 = aom_highbd_10_masked_sub_pixel_variance8x16_ssse3; + aom_highbd_10_masked_sub_pixel_variance8x4 = aom_highbd_10_masked_sub_pixel_variance8x4_c; + if (flags & HAS_SSSE3) aom_highbd_10_masked_sub_pixel_variance8x4 = aom_highbd_10_masked_sub_pixel_variance8x4_ssse3; + aom_highbd_10_masked_sub_pixel_variance8x8 = aom_highbd_10_masked_sub_pixel_variance8x8_c; + if (flags & HAS_SSSE3) aom_highbd_10_masked_sub_pixel_variance8x8 = aom_highbd_10_masked_sub_pixel_variance8x8_ssse3; + aom_highbd_10_obmc_variance16x16 = aom_highbd_10_obmc_variance16x16_c; + if (flags & HAS_SSE4_1) aom_highbd_10_obmc_variance16x16 = aom_highbd_10_obmc_variance16x16_sse4_1; + aom_highbd_10_obmc_variance16x32 = aom_highbd_10_obmc_variance16x32_c; + if (flags & HAS_SSE4_1) aom_highbd_10_obmc_variance16x32 = aom_highbd_10_obmc_variance16x32_sse4_1; + aom_highbd_10_obmc_variance16x8 = aom_highbd_10_obmc_variance16x8_c; + if (flags & HAS_SSE4_1) aom_highbd_10_obmc_variance16x8 = aom_highbd_10_obmc_variance16x8_sse4_1; + aom_highbd_10_obmc_variance32x16 = aom_highbd_10_obmc_variance32x16_c; + if (flags & HAS_SSE4_1) aom_highbd_10_obmc_variance32x16 = aom_highbd_10_obmc_variance32x16_sse4_1; + aom_highbd_10_obmc_variance32x32 = aom_highbd_10_obmc_variance32x32_c; + if (flags & HAS_SSE4_1) aom_highbd_10_obmc_variance32x32 = aom_highbd_10_obmc_variance32x32_sse4_1; + aom_highbd_10_obmc_variance32x64 = aom_highbd_10_obmc_variance32x64_c; + if (flags & HAS_SSE4_1) aom_highbd_10_obmc_variance32x64 = aom_highbd_10_obmc_variance32x64_sse4_1; + aom_highbd_10_obmc_variance4x4 = aom_highbd_10_obmc_variance4x4_c; + if (flags & HAS_SSE4_1) aom_highbd_10_obmc_variance4x4 = aom_highbd_10_obmc_variance4x4_sse4_1; + aom_highbd_10_obmc_variance4x8 = aom_highbd_10_obmc_variance4x8_c; + if (flags & HAS_SSE4_1) aom_highbd_10_obmc_variance4x8 = aom_highbd_10_obmc_variance4x8_sse4_1; + aom_highbd_10_obmc_variance64x32 = aom_highbd_10_obmc_variance64x32_c; + if (flags & HAS_SSE4_1) aom_highbd_10_obmc_variance64x32 = aom_highbd_10_obmc_variance64x32_sse4_1; + aom_highbd_10_obmc_variance64x64 = aom_highbd_10_obmc_variance64x64_c; + if (flags & HAS_SSE4_1) aom_highbd_10_obmc_variance64x64 = aom_highbd_10_obmc_variance64x64_sse4_1; + aom_highbd_10_obmc_variance8x16 = aom_highbd_10_obmc_variance8x16_c; + if (flags & HAS_SSE4_1) aom_highbd_10_obmc_variance8x16 = aom_highbd_10_obmc_variance8x16_sse4_1; + aom_highbd_10_obmc_variance8x4 = aom_highbd_10_obmc_variance8x4_c; + if (flags & HAS_SSE4_1) aom_highbd_10_obmc_variance8x4 = aom_highbd_10_obmc_variance8x4_sse4_1; + aom_highbd_10_obmc_variance8x8 = aom_highbd_10_obmc_variance8x8_c; + if (flags & HAS_SSE4_1) aom_highbd_10_obmc_variance8x8 = aom_highbd_10_obmc_variance8x8_sse4_1; + aom_highbd_10_sub_pixel_avg_variance4x4 = aom_highbd_10_sub_pixel_avg_variance4x4_c; + if (flags & HAS_SSE4_1) aom_highbd_10_sub_pixel_avg_variance4x4 = aom_highbd_10_sub_pixel_avg_variance4x4_sse4_1; + aom_highbd_10_sub_pixel_variance4x4 = aom_highbd_10_sub_pixel_variance4x4_c; + if (flags & HAS_SSE4_1) aom_highbd_10_sub_pixel_variance4x4 = aom_highbd_10_sub_pixel_variance4x4_sse4_1; + aom_highbd_10_variance4x4 = aom_highbd_10_variance4x4_c; + if (flags & HAS_SSE4_1) aom_highbd_10_variance4x4 = aom_highbd_10_variance4x4_sse4_1; + aom_highbd_12_masked_sub_pixel_variance16x16 = aom_highbd_12_masked_sub_pixel_variance16x16_c; + if (flags & HAS_SSSE3) aom_highbd_12_masked_sub_pixel_variance16x16 = aom_highbd_12_masked_sub_pixel_variance16x16_ssse3; + aom_highbd_12_masked_sub_pixel_variance16x32 = aom_highbd_12_masked_sub_pixel_variance16x32_c; + if (flags & HAS_SSSE3) aom_highbd_12_masked_sub_pixel_variance16x32 = aom_highbd_12_masked_sub_pixel_variance16x32_ssse3; + aom_highbd_12_masked_sub_pixel_variance16x8 = aom_highbd_12_masked_sub_pixel_variance16x8_c; + if (flags & HAS_SSSE3) aom_highbd_12_masked_sub_pixel_variance16x8 = aom_highbd_12_masked_sub_pixel_variance16x8_ssse3; + aom_highbd_12_masked_sub_pixel_variance32x16 = aom_highbd_12_masked_sub_pixel_variance32x16_c; + if (flags & HAS_SSSE3) aom_highbd_12_masked_sub_pixel_variance32x16 = aom_highbd_12_masked_sub_pixel_variance32x16_ssse3; + aom_highbd_12_masked_sub_pixel_variance32x32 = aom_highbd_12_masked_sub_pixel_variance32x32_c; + if (flags & HAS_SSSE3) aom_highbd_12_masked_sub_pixel_variance32x32 = aom_highbd_12_masked_sub_pixel_variance32x32_ssse3; + aom_highbd_12_masked_sub_pixel_variance32x64 = aom_highbd_12_masked_sub_pixel_variance32x64_c; + if (flags & HAS_SSSE3) aom_highbd_12_masked_sub_pixel_variance32x64 = aom_highbd_12_masked_sub_pixel_variance32x64_ssse3; + aom_highbd_12_masked_sub_pixel_variance4x4 = aom_highbd_12_masked_sub_pixel_variance4x4_c; + if (flags & HAS_SSSE3) aom_highbd_12_masked_sub_pixel_variance4x4 = aom_highbd_12_masked_sub_pixel_variance4x4_ssse3; + aom_highbd_12_masked_sub_pixel_variance4x8 = aom_highbd_12_masked_sub_pixel_variance4x8_c; + if (flags & HAS_SSSE3) aom_highbd_12_masked_sub_pixel_variance4x8 = aom_highbd_12_masked_sub_pixel_variance4x8_ssse3; + aom_highbd_12_masked_sub_pixel_variance64x32 = aom_highbd_12_masked_sub_pixel_variance64x32_c; + if (flags & HAS_SSSE3) aom_highbd_12_masked_sub_pixel_variance64x32 = aom_highbd_12_masked_sub_pixel_variance64x32_ssse3; + aom_highbd_12_masked_sub_pixel_variance64x64 = aom_highbd_12_masked_sub_pixel_variance64x64_c; + if (flags & HAS_SSSE3) aom_highbd_12_masked_sub_pixel_variance64x64 = aom_highbd_12_masked_sub_pixel_variance64x64_ssse3; + aom_highbd_12_masked_sub_pixel_variance8x16 = aom_highbd_12_masked_sub_pixel_variance8x16_c; + if (flags & HAS_SSSE3) aom_highbd_12_masked_sub_pixel_variance8x16 = aom_highbd_12_masked_sub_pixel_variance8x16_ssse3; + aom_highbd_12_masked_sub_pixel_variance8x4 = aom_highbd_12_masked_sub_pixel_variance8x4_c; + if (flags & HAS_SSSE3) aom_highbd_12_masked_sub_pixel_variance8x4 = aom_highbd_12_masked_sub_pixel_variance8x4_ssse3; + aom_highbd_12_masked_sub_pixel_variance8x8 = aom_highbd_12_masked_sub_pixel_variance8x8_c; + if (flags & HAS_SSSE3) aom_highbd_12_masked_sub_pixel_variance8x8 = aom_highbd_12_masked_sub_pixel_variance8x8_ssse3; + aom_highbd_12_obmc_variance16x16 = aom_highbd_12_obmc_variance16x16_c; + if (flags & HAS_SSE4_1) aom_highbd_12_obmc_variance16x16 = aom_highbd_12_obmc_variance16x16_sse4_1; + aom_highbd_12_obmc_variance16x32 = aom_highbd_12_obmc_variance16x32_c; + if (flags & HAS_SSE4_1) aom_highbd_12_obmc_variance16x32 = aom_highbd_12_obmc_variance16x32_sse4_1; + aom_highbd_12_obmc_variance16x8 = aom_highbd_12_obmc_variance16x8_c; + if (flags & HAS_SSE4_1) aom_highbd_12_obmc_variance16x8 = aom_highbd_12_obmc_variance16x8_sse4_1; + aom_highbd_12_obmc_variance32x16 = aom_highbd_12_obmc_variance32x16_c; + if (flags & HAS_SSE4_1) aom_highbd_12_obmc_variance32x16 = aom_highbd_12_obmc_variance32x16_sse4_1; + aom_highbd_12_obmc_variance32x32 = aom_highbd_12_obmc_variance32x32_c; + if (flags & HAS_SSE4_1) aom_highbd_12_obmc_variance32x32 = aom_highbd_12_obmc_variance32x32_sse4_1; + aom_highbd_12_obmc_variance32x64 = aom_highbd_12_obmc_variance32x64_c; + if (flags & HAS_SSE4_1) aom_highbd_12_obmc_variance32x64 = aom_highbd_12_obmc_variance32x64_sse4_1; + aom_highbd_12_obmc_variance4x4 = aom_highbd_12_obmc_variance4x4_c; + if (flags & HAS_SSE4_1) aom_highbd_12_obmc_variance4x4 = aom_highbd_12_obmc_variance4x4_sse4_1; + aom_highbd_12_obmc_variance4x8 = aom_highbd_12_obmc_variance4x8_c; + if (flags & HAS_SSE4_1) aom_highbd_12_obmc_variance4x8 = aom_highbd_12_obmc_variance4x8_sse4_1; + aom_highbd_12_obmc_variance64x32 = aom_highbd_12_obmc_variance64x32_c; + if (flags & HAS_SSE4_1) aom_highbd_12_obmc_variance64x32 = aom_highbd_12_obmc_variance64x32_sse4_1; + aom_highbd_12_obmc_variance64x64 = aom_highbd_12_obmc_variance64x64_c; + if (flags & HAS_SSE4_1) aom_highbd_12_obmc_variance64x64 = aom_highbd_12_obmc_variance64x64_sse4_1; + aom_highbd_12_obmc_variance8x16 = aom_highbd_12_obmc_variance8x16_c; + if (flags & HAS_SSE4_1) aom_highbd_12_obmc_variance8x16 = aom_highbd_12_obmc_variance8x16_sse4_1; + aom_highbd_12_obmc_variance8x4 = aom_highbd_12_obmc_variance8x4_c; + if (flags & HAS_SSE4_1) aom_highbd_12_obmc_variance8x4 = aom_highbd_12_obmc_variance8x4_sse4_1; + aom_highbd_12_obmc_variance8x8 = aom_highbd_12_obmc_variance8x8_c; + if (flags & HAS_SSE4_1) aom_highbd_12_obmc_variance8x8 = aom_highbd_12_obmc_variance8x8_sse4_1; + aom_highbd_12_sub_pixel_avg_variance4x4 = aom_highbd_12_sub_pixel_avg_variance4x4_c; + if (flags & HAS_SSE4_1) aom_highbd_12_sub_pixel_avg_variance4x4 = aom_highbd_12_sub_pixel_avg_variance4x4_sse4_1; + aom_highbd_12_sub_pixel_variance4x4 = aom_highbd_12_sub_pixel_variance4x4_c; + if (flags & HAS_SSE4_1) aom_highbd_12_sub_pixel_variance4x4 = aom_highbd_12_sub_pixel_variance4x4_sse4_1; + aom_highbd_12_variance4x4 = aom_highbd_12_variance4x4_c; + if (flags & HAS_SSE4_1) aom_highbd_12_variance4x4 = aom_highbd_12_variance4x4_sse4_1; + aom_highbd_8_masked_sub_pixel_variance16x16 = aom_highbd_8_masked_sub_pixel_variance16x16_c; + if (flags & HAS_SSSE3) aom_highbd_8_masked_sub_pixel_variance16x16 = aom_highbd_8_masked_sub_pixel_variance16x16_ssse3; + aom_highbd_8_masked_sub_pixel_variance16x32 = aom_highbd_8_masked_sub_pixel_variance16x32_c; + if (flags & HAS_SSSE3) aom_highbd_8_masked_sub_pixel_variance16x32 = aom_highbd_8_masked_sub_pixel_variance16x32_ssse3; + aom_highbd_8_masked_sub_pixel_variance16x8 = aom_highbd_8_masked_sub_pixel_variance16x8_c; + if (flags & HAS_SSSE3) aom_highbd_8_masked_sub_pixel_variance16x8 = aom_highbd_8_masked_sub_pixel_variance16x8_ssse3; + aom_highbd_8_masked_sub_pixel_variance32x16 = aom_highbd_8_masked_sub_pixel_variance32x16_c; + if (flags & HAS_SSSE3) aom_highbd_8_masked_sub_pixel_variance32x16 = aom_highbd_8_masked_sub_pixel_variance32x16_ssse3; + aom_highbd_8_masked_sub_pixel_variance32x32 = aom_highbd_8_masked_sub_pixel_variance32x32_c; + if (flags & HAS_SSSE3) aom_highbd_8_masked_sub_pixel_variance32x32 = aom_highbd_8_masked_sub_pixel_variance32x32_ssse3; + aom_highbd_8_masked_sub_pixel_variance32x64 = aom_highbd_8_masked_sub_pixel_variance32x64_c; + if (flags & HAS_SSSE3) aom_highbd_8_masked_sub_pixel_variance32x64 = aom_highbd_8_masked_sub_pixel_variance32x64_ssse3; + aom_highbd_8_masked_sub_pixel_variance4x4 = aom_highbd_8_masked_sub_pixel_variance4x4_c; + if (flags & HAS_SSSE3) aom_highbd_8_masked_sub_pixel_variance4x4 = aom_highbd_8_masked_sub_pixel_variance4x4_ssse3; + aom_highbd_8_masked_sub_pixel_variance4x8 = aom_highbd_8_masked_sub_pixel_variance4x8_c; + if (flags & HAS_SSSE3) aom_highbd_8_masked_sub_pixel_variance4x8 = aom_highbd_8_masked_sub_pixel_variance4x8_ssse3; + aom_highbd_8_masked_sub_pixel_variance64x32 = aom_highbd_8_masked_sub_pixel_variance64x32_c; + if (flags & HAS_SSSE3) aom_highbd_8_masked_sub_pixel_variance64x32 = aom_highbd_8_masked_sub_pixel_variance64x32_ssse3; + aom_highbd_8_masked_sub_pixel_variance64x64 = aom_highbd_8_masked_sub_pixel_variance64x64_c; + if (flags & HAS_SSSE3) aom_highbd_8_masked_sub_pixel_variance64x64 = aom_highbd_8_masked_sub_pixel_variance64x64_ssse3; + aom_highbd_8_masked_sub_pixel_variance8x16 = aom_highbd_8_masked_sub_pixel_variance8x16_c; + if (flags & HAS_SSSE3) aom_highbd_8_masked_sub_pixel_variance8x16 = aom_highbd_8_masked_sub_pixel_variance8x16_ssse3; + aom_highbd_8_masked_sub_pixel_variance8x4 = aom_highbd_8_masked_sub_pixel_variance8x4_c; + if (flags & HAS_SSSE3) aom_highbd_8_masked_sub_pixel_variance8x4 = aom_highbd_8_masked_sub_pixel_variance8x4_ssse3; + aom_highbd_8_masked_sub_pixel_variance8x8 = aom_highbd_8_masked_sub_pixel_variance8x8_c; + if (flags & HAS_SSSE3) aom_highbd_8_masked_sub_pixel_variance8x8 = aom_highbd_8_masked_sub_pixel_variance8x8_ssse3; + aom_highbd_8_sub_pixel_avg_variance4x4 = aom_highbd_8_sub_pixel_avg_variance4x4_c; + if (flags & HAS_SSE4_1) aom_highbd_8_sub_pixel_avg_variance4x4 = aom_highbd_8_sub_pixel_avg_variance4x4_sse4_1; + aom_highbd_8_sub_pixel_variance4x4 = aom_highbd_8_sub_pixel_variance4x4_c; + if (flags & HAS_SSE4_1) aom_highbd_8_sub_pixel_variance4x4 = aom_highbd_8_sub_pixel_variance4x4_sse4_1; + aom_highbd_8_variance4x4 = aom_highbd_8_variance4x4_c; + if (flags & HAS_SSE4_1) aom_highbd_8_variance4x4 = aom_highbd_8_variance4x4_sse4_1; + aom_highbd_blend_a64_hmask = aom_highbd_blend_a64_hmask_c; + if (flags & HAS_SSE4_1) aom_highbd_blend_a64_hmask = aom_highbd_blend_a64_hmask_sse4_1; + aom_highbd_blend_a64_mask = aom_highbd_blend_a64_mask_c; + if (flags & HAS_SSE4_1) aom_highbd_blend_a64_mask = aom_highbd_blend_a64_mask_sse4_1; + aom_highbd_blend_a64_vmask = aom_highbd_blend_a64_vmask_c; + if (flags & HAS_SSE4_1) aom_highbd_blend_a64_vmask = aom_highbd_blend_a64_vmask_sse4_1; + aom_highbd_convolve8 = aom_highbd_convolve8_sse2; + if (flags & HAS_AVX2) aom_highbd_convolve8 = aom_highbd_convolve8_avx2; + aom_highbd_convolve8_avg = aom_highbd_convolve8_avg_sse2; + if (flags & HAS_AVX2) aom_highbd_convolve8_avg = aom_highbd_convolve8_avg_avx2; + aom_highbd_convolve8_avg_horiz = aom_highbd_convolve8_avg_horiz_sse2; + if (flags & HAS_AVX2) aom_highbd_convolve8_avg_horiz = aom_highbd_convolve8_avg_horiz_avx2; + aom_highbd_convolve8_avg_vert = aom_highbd_convolve8_avg_vert_sse2; + if (flags & HAS_AVX2) aom_highbd_convolve8_avg_vert = aom_highbd_convolve8_avg_vert_avx2; + aom_highbd_convolve8_horiz = aom_highbd_convolve8_horiz_sse2; + if (flags & HAS_AVX2) aom_highbd_convolve8_horiz = aom_highbd_convolve8_horiz_avx2; + aom_highbd_convolve8_vert = aom_highbd_convolve8_vert_sse2; + if (flags & HAS_AVX2) aom_highbd_convolve8_vert = aom_highbd_convolve8_vert_avx2; + aom_highbd_convolve_avg = aom_highbd_convolve_avg_sse2; + if (flags & HAS_AVX2) aom_highbd_convolve_avg = aom_highbd_convolve_avg_avx2; + aom_highbd_convolve_copy = aom_highbd_convolve_copy_sse2; + if (flags & HAS_AVX2) aom_highbd_convolve_copy = aom_highbd_convolve_copy_avx2; + aom_highbd_masked_sad16x16 = aom_highbd_masked_sad16x16_c; + if (flags & HAS_SSSE3) aom_highbd_masked_sad16x16 = aom_highbd_masked_sad16x16_ssse3; + aom_highbd_masked_sad16x32 = aom_highbd_masked_sad16x32_c; + if (flags & HAS_SSSE3) aom_highbd_masked_sad16x32 = aom_highbd_masked_sad16x32_ssse3; + aom_highbd_masked_sad16x8 = aom_highbd_masked_sad16x8_c; + if (flags & HAS_SSSE3) aom_highbd_masked_sad16x8 = aom_highbd_masked_sad16x8_ssse3; + aom_highbd_masked_sad32x16 = aom_highbd_masked_sad32x16_c; + if (flags & HAS_SSSE3) aom_highbd_masked_sad32x16 = aom_highbd_masked_sad32x16_ssse3; + aom_highbd_masked_sad32x32 = aom_highbd_masked_sad32x32_c; + if (flags & HAS_SSSE3) aom_highbd_masked_sad32x32 = aom_highbd_masked_sad32x32_ssse3; + aom_highbd_masked_sad32x64 = aom_highbd_masked_sad32x64_c; + if (flags & HAS_SSSE3) aom_highbd_masked_sad32x64 = aom_highbd_masked_sad32x64_ssse3; + aom_highbd_masked_sad4x4 = aom_highbd_masked_sad4x4_c; + if (flags & HAS_SSSE3) aom_highbd_masked_sad4x4 = aom_highbd_masked_sad4x4_ssse3; + aom_highbd_masked_sad4x8 = aom_highbd_masked_sad4x8_c; + if (flags & HAS_SSSE3) aom_highbd_masked_sad4x8 = aom_highbd_masked_sad4x8_ssse3; + aom_highbd_masked_sad64x32 = aom_highbd_masked_sad64x32_c; + if (flags & HAS_SSSE3) aom_highbd_masked_sad64x32 = aom_highbd_masked_sad64x32_ssse3; + aom_highbd_masked_sad64x64 = aom_highbd_masked_sad64x64_c; + if (flags & HAS_SSSE3) aom_highbd_masked_sad64x64 = aom_highbd_masked_sad64x64_ssse3; + aom_highbd_masked_sad8x16 = aom_highbd_masked_sad8x16_c; + if (flags & HAS_SSSE3) aom_highbd_masked_sad8x16 = aom_highbd_masked_sad8x16_ssse3; + aom_highbd_masked_sad8x4 = aom_highbd_masked_sad8x4_c; + if (flags & HAS_SSSE3) aom_highbd_masked_sad8x4 = aom_highbd_masked_sad8x4_ssse3; + aom_highbd_masked_sad8x8 = aom_highbd_masked_sad8x8_c; + if (flags & HAS_SSSE3) aom_highbd_masked_sad8x8 = aom_highbd_masked_sad8x8_ssse3; + aom_highbd_obmc_sad16x16 = aom_highbd_obmc_sad16x16_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_sad16x16 = aom_highbd_obmc_sad16x16_sse4_1; + aom_highbd_obmc_sad16x32 = aom_highbd_obmc_sad16x32_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_sad16x32 = aom_highbd_obmc_sad16x32_sse4_1; + aom_highbd_obmc_sad16x8 = aom_highbd_obmc_sad16x8_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_sad16x8 = aom_highbd_obmc_sad16x8_sse4_1; + aom_highbd_obmc_sad32x16 = aom_highbd_obmc_sad32x16_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_sad32x16 = aom_highbd_obmc_sad32x16_sse4_1; + aom_highbd_obmc_sad32x32 = aom_highbd_obmc_sad32x32_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_sad32x32 = aom_highbd_obmc_sad32x32_sse4_1; + aom_highbd_obmc_sad32x64 = aom_highbd_obmc_sad32x64_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_sad32x64 = aom_highbd_obmc_sad32x64_sse4_1; + aom_highbd_obmc_sad4x4 = aom_highbd_obmc_sad4x4_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_sad4x4 = aom_highbd_obmc_sad4x4_sse4_1; + aom_highbd_obmc_sad4x8 = aom_highbd_obmc_sad4x8_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_sad4x8 = aom_highbd_obmc_sad4x8_sse4_1; + aom_highbd_obmc_sad64x32 = aom_highbd_obmc_sad64x32_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_sad64x32 = aom_highbd_obmc_sad64x32_sse4_1; + aom_highbd_obmc_sad64x64 = aom_highbd_obmc_sad64x64_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_sad64x64 = aom_highbd_obmc_sad64x64_sse4_1; + aom_highbd_obmc_sad8x16 = aom_highbd_obmc_sad8x16_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_sad8x16 = aom_highbd_obmc_sad8x16_sse4_1; + aom_highbd_obmc_sad8x4 = aom_highbd_obmc_sad8x4_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_sad8x4 = aom_highbd_obmc_sad8x4_sse4_1; + aom_highbd_obmc_sad8x8 = aom_highbd_obmc_sad8x8_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_sad8x8 = aom_highbd_obmc_sad8x8_sse4_1; + aom_highbd_obmc_variance16x16 = aom_highbd_obmc_variance16x16_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_variance16x16 = aom_highbd_obmc_variance16x16_sse4_1; + aom_highbd_obmc_variance16x32 = aom_highbd_obmc_variance16x32_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_variance16x32 = aom_highbd_obmc_variance16x32_sse4_1; + aom_highbd_obmc_variance16x8 = aom_highbd_obmc_variance16x8_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_variance16x8 = aom_highbd_obmc_variance16x8_sse4_1; + aom_highbd_obmc_variance32x16 = aom_highbd_obmc_variance32x16_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_variance32x16 = aom_highbd_obmc_variance32x16_sse4_1; + aom_highbd_obmc_variance32x32 = aom_highbd_obmc_variance32x32_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_variance32x32 = aom_highbd_obmc_variance32x32_sse4_1; + aom_highbd_obmc_variance32x64 = aom_highbd_obmc_variance32x64_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_variance32x64 = aom_highbd_obmc_variance32x64_sse4_1; + aom_highbd_obmc_variance4x4 = aom_highbd_obmc_variance4x4_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_variance4x4 = aom_highbd_obmc_variance4x4_sse4_1; + aom_highbd_obmc_variance4x8 = aom_highbd_obmc_variance4x8_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_variance4x8 = aom_highbd_obmc_variance4x8_sse4_1; + aom_highbd_obmc_variance64x32 = aom_highbd_obmc_variance64x32_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_variance64x32 = aom_highbd_obmc_variance64x32_sse4_1; + aom_highbd_obmc_variance64x64 = aom_highbd_obmc_variance64x64_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_variance64x64 = aom_highbd_obmc_variance64x64_sse4_1; + aom_highbd_obmc_variance8x16 = aom_highbd_obmc_variance8x16_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_variance8x16 = aom_highbd_obmc_variance8x16_sse4_1; + aom_highbd_obmc_variance8x4 = aom_highbd_obmc_variance8x4_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_variance8x4 = aom_highbd_obmc_variance8x4_sse4_1; + aom_highbd_obmc_variance8x8 = aom_highbd_obmc_variance8x8_c; + if (flags & HAS_SSE4_1) aom_highbd_obmc_variance8x8 = aom_highbd_obmc_variance8x8_sse4_1; + aom_highbd_sad16x16 = aom_highbd_sad16x16_sse2; + if (flags & HAS_AVX2) aom_highbd_sad16x16 = aom_highbd_sad16x16_avx2; + aom_highbd_sad16x16_avg = aom_highbd_sad16x16_avg_sse2; + if (flags & HAS_AVX2) aom_highbd_sad16x16_avg = aom_highbd_sad16x16_avg_avx2; + aom_highbd_sad16x16x4d = aom_highbd_sad16x16x4d_sse2; + if (flags & HAS_AVX2) aom_highbd_sad16x16x4d = aom_highbd_sad16x16x4d_avx2; + aom_highbd_sad16x32 = aom_highbd_sad16x32_sse2; + if (flags & HAS_AVX2) aom_highbd_sad16x32 = aom_highbd_sad16x32_avx2; + aom_highbd_sad16x32_avg = aom_highbd_sad16x32_avg_sse2; + if (flags & HAS_AVX2) aom_highbd_sad16x32_avg = aom_highbd_sad16x32_avg_avx2; + aom_highbd_sad16x32x4d = aom_highbd_sad16x32x4d_sse2; + if (flags & HAS_AVX2) aom_highbd_sad16x32x4d = aom_highbd_sad16x32x4d_avx2; + aom_highbd_sad16x8 = aom_highbd_sad16x8_sse2; + if (flags & HAS_AVX2) aom_highbd_sad16x8 = aom_highbd_sad16x8_avx2; + aom_highbd_sad16x8_avg = aom_highbd_sad16x8_avg_sse2; + if (flags & HAS_AVX2) aom_highbd_sad16x8_avg = aom_highbd_sad16x8_avg_avx2; + aom_highbd_sad16x8x4d = aom_highbd_sad16x8x4d_sse2; + if (flags & HAS_AVX2) aom_highbd_sad16x8x4d = aom_highbd_sad16x8x4d_avx2; + aom_highbd_sad32x16 = aom_highbd_sad32x16_sse2; + if (flags & HAS_AVX2) aom_highbd_sad32x16 = aom_highbd_sad32x16_avx2; + aom_highbd_sad32x16_avg = aom_highbd_sad32x16_avg_sse2; + if (flags & HAS_AVX2) aom_highbd_sad32x16_avg = aom_highbd_sad32x16_avg_avx2; + aom_highbd_sad32x16x4d = aom_highbd_sad32x16x4d_sse2; + if (flags & HAS_AVX2) aom_highbd_sad32x16x4d = aom_highbd_sad32x16x4d_avx2; + aom_highbd_sad32x32 = aom_highbd_sad32x32_sse2; + if (flags & HAS_AVX2) aom_highbd_sad32x32 = aom_highbd_sad32x32_avx2; + aom_highbd_sad32x32_avg = aom_highbd_sad32x32_avg_sse2; + if (flags & HAS_AVX2) aom_highbd_sad32x32_avg = aom_highbd_sad32x32_avg_avx2; + aom_highbd_sad32x32x4d = aom_highbd_sad32x32x4d_sse2; + if (flags & HAS_AVX2) aom_highbd_sad32x32x4d = aom_highbd_sad32x32x4d_avx2; + aom_highbd_sad32x64 = aom_highbd_sad32x64_sse2; + if (flags & HAS_AVX2) aom_highbd_sad32x64 = aom_highbd_sad32x64_avx2; + aom_highbd_sad32x64_avg = aom_highbd_sad32x64_avg_sse2; + if (flags & HAS_AVX2) aom_highbd_sad32x64_avg = aom_highbd_sad32x64_avg_avx2; + aom_highbd_sad32x64x4d = aom_highbd_sad32x64x4d_sse2; + if (flags & HAS_AVX2) aom_highbd_sad32x64x4d = aom_highbd_sad32x64x4d_avx2; + aom_highbd_sad64x32 = aom_highbd_sad64x32_sse2; + if (flags & HAS_AVX2) aom_highbd_sad64x32 = aom_highbd_sad64x32_avx2; + aom_highbd_sad64x32_avg = aom_highbd_sad64x32_avg_sse2; + if (flags & HAS_AVX2) aom_highbd_sad64x32_avg = aom_highbd_sad64x32_avg_avx2; + aom_highbd_sad64x32x4d = aom_highbd_sad64x32x4d_sse2; + if (flags & HAS_AVX2) aom_highbd_sad64x32x4d = aom_highbd_sad64x32x4d_avx2; + aom_highbd_sad64x64 = aom_highbd_sad64x64_sse2; + if (flags & HAS_AVX2) aom_highbd_sad64x64 = aom_highbd_sad64x64_avx2; + aom_highbd_sad64x64_avg = aom_highbd_sad64x64_avg_sse2; + if (flags & HAS_AVX2) aom_highbd_sad64x64_avg = aom_highbd_sad64x64_avg_avx2; + aom_highbd_sad64x64x4d = aom_highbd_sad64x64x4d_sse2; + if (flags & HAS_AVX2) aom_highbd_sad64x64x4d = aom_highbd_sad64x64x4d_avx2; + aom_idct16x16_10_add = aom_idct16x16_10_add_sse2; + if (flags & HAS_AVX2) aom_idct16x16_10_add = aom_idct16x16_10_add_avx2; + aom_idct16x16_1_add = aom_idct16x16_1_add_sse2; + if (flags & HAS_AVX2) aom_idct16x16_1_add = aom_idct16x16_1_add_avx2; + aom_idct16x16_256_add = aom_idct16x16_256_add_sse2; + if (flags & HAS_AVX2) aom_idct16x16_256_add = aom_idct16x16_256_add_avx2; + aom_idct16x16_38_add = aom_idct16x16_38_add_c; + if (flags & HAS_AVX2) aom_idct16x16_38_add = aom_idct16x16_38_add_avx2; aom_idct32x32_1024_add = aom_idct32x32_1024_add_sse2; if (flags & HAS_SSSE3) aom_idct32x32_1024_add = aom_idct32x32_1024_add_ssse3; + if (flags & HAS_AVX2) aom_idct32x32_1024_add = aom_idct32x32_1024_add_avx2; aom_idct32x32_135_add = aom_idct32x32_1024_add_sse2; if (flags & HAS_SSSE3) aom_idct32x32_135_add = aom_idct32x32_135_add_ssse3; + if (flags & HAS_AVX2) aom_idct32x32_135_add = aom_idct32x32_135_add_avx2; + aom_idct32x32_1_add = aom_idct32x32_1_add_sse2; + if (flags & HAS_AVX2) aom_idct32x32_1_add = aom_idct32x32_1_add_avx2; aom_idct32x32_34_add = aom_idct32x32_34_add_sse2; if (flags & HAS_SSSE3) aom_idct32x32_34_add = aom_idct32x32_34_add_ssse3; + if (flags & HAS_AVX2) aom_idct32x32_34_add = aom_idct32x32_34_add_avx2; aom_idct8x8_12_add = aom_idct8x8_12_add_sse2; if (flags & HAS_SSSE3) aom_idct8x8_12_add = aom_idct8x8_12_add_ssse3; aom_idct8x8_64_add = aom_idct8x8_64_add_sse2; @@ -1246,6 +3375,58 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_avx2; aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_sse2; if (flags & HAS_AVX2) aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_avx2; + aom_masked_sad16x16 = aom_masked_sad16x16_c; + if (flags & HAS_SSSE3) aom_masked_sad16x16 = aom_masked_sad16x16_ssse3; + aom_masked_sad16x32 = aom_masked_sad16x32_c; + if (flags & HAS_SSSE3) aom_masked_sad16x32 = aom_masked_sad16x32_ssse3; + aom_masked_sad16x8 = aom_masked_sad16x8_c; + if (flags & HAS_SSSE3) aom_masked_sad16x8 = aom_masked_sad16x8_ssse3; + aom_masked_sad32x16 = aom_masked_sad32x16_c; + if (flags & HAS_SSSE3) aom_masked_sad32x16 = aom_masked_sad32x16_ssse3; + aom_masked_sad32x32 = aom_masked_sad32x32_c; + if (flags & HAS_SSSE3) aom_masked_sad32x32 = aom_masked_sad32x32_ssse3; + aom_masked_sad32x64 = aom_masked_sad32x64_c; + if (flags & HAS_SSSE3) aom_masked_sad32x64 = aom_masked_sad32x64_ssse3; + aom_masked_sad4x4 = aom_masked_sad4x4_c; + if (flags & HAS_SSSE3) aom_masked_sad4x4 = aom_masked_sad4x4_ssse3; + aom_masked_sad4x8 = aom_masked_sad4x8_c; + if (flags & HAS_SSSE3) aom_masked_sad4x8 = aom_masked_sad4x8_ssse3; + aom_masked_sad64x32 = aom_masked_sad64x32_c; + if (flags & HAS_SSSE3) aom_masked_sad64x32 = aom_masked_sad64x32_ssse3; + aom_masked_sad64x64 = aom_masked_sad64x64_c; + if (flags & HAS_SSSE3) aom_masked_sad64x64 = aom_masked_sad64x64_ssse3; + aom_masked_sad8x16 = aom_masked_sad8x16_c; + if (flags & HAS_SSSE3) aom_masked_sad8x16 = aom_masked_sad8x16_ssse3; + aom_masked_sad8x4 = aom_masked_sad8x4_c; + if (flags & HAS_SSSE3) aom_masked_sad8x4 = aom_masked_sad8x4_ssse3; + aom_masked_sad8x8 = aom_masked_sad8x8_c; + if (flags & HAS_SSSE3) aom_masked_sad8x8 = aom_masked_sad8x8_ssse3; + aom_masked_sub_pixel_variance16x16 = aom_masked_sub_pixel_variance16x16_c; + if (flags & HAS_SSSE3) aom_masked_sub_pixel_variance16x16 = aom_masked_sub_pixel_variance16x16_ssse3; + aom_masked_sub_pixel_variance16x32 = aom_masked_sub_pixel_variance16x32_c; + if (flags & HAS_SSSE3) aom_masked_sub_pixel_variance16x32 = aom_masked_sub_pixel_variance16x32_ssse3; + aom_masked_sub_pixel_variance16x8 = aom_masked_sub_pixel_variance16x8_c; + if (flags & HAS_SSSE3) aom_masked_sub_pixel_variance16x8 = aom_masked_sub_pixel_variance16x8_ssse3; + aom_masked_sub_pixel_variance32x16 = aom_masked_sub_pixel_variance32x16_c; + if (flags & HAS_SSSE3) aom_masked_sub_pixel_variance32x16 = aom_masked_sub_pixel_variance32x16_ssse3; + aom_masked_sub_pixel_variance32x32 = aom_masked_sub_pixel_variance32x32_c; + if (flags & HAS_SSSE3) aom_masked_sub_pixel_variance32x32 = aom_masked_sub_pixel_variance32x32_ssse3; + aom_masked_sub_pixel_variance32x64 = aom_masked_sub_pixel_variance32x64_c; + if (flags & HAS_SSSE3) aom_masked_sub_pixel_variance32x64 = aom_masked_sub_pixel_variance32x64_ssse3; + aom_masked_sub_pixel_variance4x4 = aom_masked_sub_pixel_variance4x4_c; + if (flags & HAS_SSSE3) aom_masked_sub_pixel_variance4x4 = aom_masked_sub_pixel_variance4x4_ssse3; + aom_masked_sub_pixel_variance4x8 = aom_masked_sub_pixel_variance4x8_c; + if (flags & HAS_SSSE3) aom_masked_sub_pixel_variance4x8 = aom_masked_sub_pixel_variance4x8_ssse3; + aom_masked_sub_pixel_variance64x32 = aom_masked_sub_pixel_variance64x32_c; + if (flags & HAS_SSSE3) aom_masked_sub_pixel_variance64x32 = aom_masked_sub_pixel_variance64x32_ssse3; + aom_masked_sub_pixel_variance64x64 = aom_masked_sub_pixel_variance64x64_c; + if (flags & HAS_SSSE3) aom_masked_sub_pixel_variance64x64 = aom_masked_sub_pixel_variance64x64_ssse3; + aom_masked_sub_pixel_variance8x16 = aom_masked_sub_pixel_variance8x16_c; + if (flags & HAS_SSSE3) aom_masked_sub_pixel_variance8x16 = aom_masked_sub_pixel_variance8x16_ssse3; + aom_masked_sub_pixel_variance8x4 = aom_masked_sub_pixel_variance8x4_c; + if (flags & HAS_SSSE3) aom_masked_sub_pixel_variance8x4 = aom_masked_sub_pixel_variance8x4_ssse3; + aom_masked_sub_pixel_variance8x8 = aom_masked_sub_pixel_variance8x8_c; + if (flags & HAS_SSSE3) aom_masked_sub_pixel_variance8x8 = aom_masked_sub_pixel_variance8x8_ssse3; aom_mse16x16 = aom_mse16x16_sse2; if (flags & HAS_AVX2) aom_mse16x16 = aom_mse16x16_avx2; aom_obmc_sad16x16 = aom_obmc_sad16x16_c; diff --git a/media/libaom/config/mac/x64/av1_rtcd.h b/media/libaom/config/mac/x64/av1_rtcd.h index eacb5a555..9dcf96f84 100644 --- a/media/libaom/config/mac/x64/av1_rtcd.h +++ b/media/libaom/config/mac/x64/av1_rtcd.h @@ -60,13 +60,7 @@ void aom_clpf_hblock_hbd_sse4_1(uint16_t *dst, const uint16_t *src, int dstride, RTCD_EXTERN void (*aom_clpf_hblock_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); -int64_t av1_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); -int64_t av1_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); -RTCD_EXTERN int64_t (*av1_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); - -int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); -int64_t av1_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, int block_size); -#define av1_block_error_fp av1_block_error_fp_sse2 +#define av1_block_error av1_block_error_c void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); @@ -80,9 +74,7 @@ int av1_diamond_search_sad_c(struct macroblock *x, const struct search_site_conf #define av1_diamond_search_sad av1_diamond_search_sad_c void av1_fdct8x8_quant_c(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -void av1_fdct8x8_quant_sse2(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -void av1_fdct8x8_quant_ssse3(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -RTCD_EXTERN void (*av1_fdct8x8_quant)(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +#define av1_fdct8x8_quant av1_fdct8x8_quant_c void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); void av1_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); @@ -149,9 +141,174 @@ RTCD_EXTERN int (*av1_full_search_sad)(const struct macroblock *x, const struct void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type); #define av1_fwd_idtx av1_fwd_idtx_c +void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_16x16)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); + +void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_32x32)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); + +void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_4x4)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); + +void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_64x64)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); + +void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +RTCD_EXTERN void (*av1_fwd_txfm2d_8x8)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); + void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); #define av1_fwht4x4 av1_fwht4x4_c +int64_t av1_highbd_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd); +int64_t av1_highbd_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd); +#define av1_highbd_block_error av1_highbd_block_error_sse2 + +void av1_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void av1_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define av1_highbd_convolve8 av1_highbd_convolve8_sse2 + +void av1_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void av1_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define av1_highbd_convolve8_avg av1_highbd_convolve8_avg_sse2 + +void av1_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void av1_highbd_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define av1_highbd_convolve8_avg_horiz av1_highbd_convolve8_avg_horiz_sse2 + +void av1_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void av1_highbd_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define av1_highbd_convolve8_avg_vert av1_highbd_convolve8_avg_vert_sse2 + +void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void av1_highbd_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define av1_highbd_convolve8_horiz av1_highbd_convolve8_horiz_sse2 + +void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void av1_highbd_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_sse2 + +void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define av1_highbd_convolve_avg av1_highbd_convolve_avg_c + +void av1_highbd_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define av1_highbd_convolve_copy av1_highbd_convolve_copy_c + +void av1_highbd_convolve_horiz_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); +void av1_highbd_convolve_horiz_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_horiz)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); + +void av1_highbd_convolve_init_c(void); +void av1_highbd_convolve_init_sse4_1(void); +RTCD_EXTERN void (*av1_highbd_convolve_init)(void); + +void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); +void av1_highbd_convolve_vert_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); +RTCD_EXTERN void (*av1_highbd_convolve_vert)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); + +void av1_highbd_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_highbd_fht16x16 av1_highbd_fht16x16_c + +void av1_highbd_fht16x32_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_highbd_fht16x32 av1_highbd_fht16x32_c + +void av1_highbd_fht16x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_highbd_fht16x4 av1_highbd_fht16x4_c + +void av1_highbd_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_highbd_fht16x8 av1_highbd_fht16x8_c + +void av1_highbd_fht32x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_highbd_fht32x16 av1_highbd_fht32x16_c + +void av1_highbd_fht32x32_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_highbd_fht32x32 av1_highbd_fht32x32_c + +void av1_highbd_fht32x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_highbd_fht32x8 av1_highbd_fht32x8_c + +void av1_highbd_fht4x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_highbd_fht4x16 av1_highbd_fht4x16_c + +void av1_highbd_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_highbd_fht4x4_sse4_1(const int16_t *input, tran_low_t *output, int stride, int tx_type); +RTCD_EXTERN void (*av1_highbd_fht4x4)(const int16_t *input, tran_low_t *output, int stride, int tx_type); + +void av1_highbd_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_highbd_fht4x8 av1_highbd_fht4x8_c + +void av1_highbd_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_highbd_fht8x16 av1_highbd_fht8x16_c + +void av1_highbd_fht8x32_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_highbd_fht8x32 av1_highbd_fht8x32_c + +void av1_highbd_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_highbd_fht8x4 av1_highbd_fht8x4_c + +void av1_highbd_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_highbd_fht8x8 av1_highbd_fht8x8_c + +void av1_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); +#define av1_highbd_fwht4x4 av1_highbd_fwht4x4_c + +void av1_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type, int bd); +#define av1_highbd_iht16x16_256_add av1_highbd_iht16x16_256_add_c + +void av1_highbd_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +#define av1_highbd_iht16x32_512_add av1_highbd_iht16x32_512_add_c + +void av1_highbd_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +#define av1_highbd_iht16x4_64_add av1_highbd_iht16x4_64_add_c + +void av1_highbd_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +#define av1_highbd_iht16x8_128_add av1_highbd_iht16x8_128_add_c + +void av1_highbd_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +#define av1_highbd_iht32x16_512_add av1_highbd_iht32x16_512_add_c + +void av1_highbd_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +#define av1_highbd_iht32x8_256_add av1_highbd_iht32x8_256_add_c + +void av1_highbd_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +#define av1_highbd_iht4x16_64_add av1_highbd_iht4x16_64_add_c + +void av1_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +#define av1_highbd_iht4x4_16_add av1_highbd_iht4x4_16_add_c + +void av1_highbd_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +#define av1_highbd_iht4x8_32_add av1_highbd_iht4x8_32_add_c + +void av1_highbd_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +#define av1_highbd_iht8x16_128_add av1_highbd_iht8x16_128_add_c + +void av1_highbd_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +#define av1_highbd_iht8x32_256_add av1_highbd_iht8x32_256_add_c + +void av1_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +#define av1_highbd_iht8x4_32_add av1_highbd_iht8x4_32_add_c + +void av1_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +#define av1_highbd_iht8x8_64_add av1_highbd_iht8x8_64_add_c + +void av1_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); +#define av1_highbd_quantize_b av1_highbd_quantize_b_c + +void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); +void av1_highbd_quantize_fp_sse4_1(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); +RTCD_EXTERN void (*av1_highbd_quantize_fp)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); + +void av1_highbd_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); +#define av1_highbd_temporal_filter_apply av1_highbd_temporal_filter_apply_c + +void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +RTCD_EXTERN void (*av1_highbd_warp_affine)(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); + void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); void av1_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); void av1_iht16x16_256_add_avx2(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); @@ -204,6 +361,25 @@ void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride void av1_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); #define av1_iht8x8_64_add av1_iht8x8_64_add_sse2 +void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_16x16)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); + +void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_32x32_avx2(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_32x32)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); + +void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); + +void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +#define av1_inv_txfm2d_add_64x64 av1_inv_txfm2d_add_64x64_c + +void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); + void av1_lowbd_convolve_init_c(void); void av1_lowbd_convolve_init_ssse3(void); RTCD_EXTERN void (*av1_lowbd_convolve_init)(void); @@ -213,20 +389,35 @@ void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_b void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void av1_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -void av1_quantize_fp_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -RTCD_EXTERN void (*av1_quantize_fp)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +#define av1_quantize_fp av1_quantize_fp_sse2 void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -void av1_quantize_fp_32x32_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -RTCD_EXTERN void (*av1_quantize_fp_32x32)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +#define av1_quantize_fp_32x32 av1_quantize_fp_32x32_c void av1_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); void av1_temporal_filter_apply_sse2(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); #define av1_temporal_filter_apply av1_temporal_filter_apply_sse2 -void av1_warp_affine_c(int32_t *mat, uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int ref_frm, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); -void av1_warp_affine_sse2(int32_t *mat, uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int ref_frm, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); -#define av1_warp_affine av1_warp_affine_sse2 +void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +RTCD_EXTERN void (*av1_warp_affine)(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); + +void av1_wedge_compute_delta_squares_c(int16_t *d, const int16_t *a, const int16_t *b, int N); +void av1_wedge_compute_delta_squares_sse2(int16_t *d, const int16_t *a, const int16_t *b, int N); +#define av1_wedge_compute_delta_squares av1_wedge_compute_delta_squares_sse2 + +int av1_wedge_sign_from_residuals_c(const int16_t *ds, const uint8_t *m, int N, int64_t limit); +int av1_wedge_sign_from_residuals_sse2(const int16_t *ds, const uint8_t *m, int N, int64_t limit); +#define av1_wedge_sign_from_residuals av1_wedge_sign_from_residuals_sse2 + +uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d, const uint8_t *m, int N); +uint64_t av1_wedge_sse_from_residuals_sse2(const int16_t *r1, const int16_t *d, const uint8_t *m, int N); +#define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_sse2 + +double compute_cross_correlation_c(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); +double compute_cross_correlation_sse4_1(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); +RTCD_EXTERN double (*compute_cross_correlation)(unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2); void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); void copy_4x4_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride); @@ -304,14 +495,10 @@ static void setup_rtcd_internal(void) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse2; if (flags & HAS_SSSE3) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_ssse3; if (flags & HAS_SSE4_1) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse4_1; - av1_block_error = av1_block_error_sse2; - if (flags & HAS_AVX2) av1_block_error = av1_block_error_avx2; av1_convolve_horiz = av1_convolve_horiz_c; if (flags & HAS_SSSE3) av1_convolve_horiz = av1_convolve_horiz_ssse3; av1_convolve_vert = av1_convolve_vert_c; if (flags & HAS_SSSE3) av1_convolve_vert = av1_convolve_vert_ssse3; - av1_fdct8x8_quant = av1_fdct8x8_quant_sse2; - if (flags & HAS_SSSE3) av1_fdct8x8_quant = av1_fdct8x8_quant_ssse3; av1_fht16x16 = av1_fht16x16_sse2; if (flags & HAS_AVX2) av1_fht16x16 = av1_fht16x16_avx2; av1_fht32x32 = av1_fht32x32_sse2; @@ -319,14 +506,44 @@ static void setup_rtcd_internal(void) av1_full_search_sad = av1_full_search_sad_c; if (flags & HAS_SSE3) av1_full_search_sad = av1_full_search_sadx3; if (flags & HAS_SSE4_1) av1_full_search_sad = av1_full_search_sadx8; + av1_fwd_txfm2d_16x16 = av1_fwd_txfm2d_16x16_c; + if (flags & HAS_SSE4_1) av1_fwd_txfm2d_16x16 = av1_fwd_txfm2d_16x16_sse4_1; + av1_fwd_txfm2d_32x32 = av1_fwd_txfm2d_32x32_c; + if (flags & HAS_SSE4_1) av1_fwd_txfm2d_32x32 = av1_fwd_txfm2d_32x32_sse4_1; + av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_c; + if (flags & HAS_SSE4_1) av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_sse4_1; + av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_c; + if (flags & HAS_SSE4_1) av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_sse4_1; + av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_c; + if (flags & HAS_SSE4_1) av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_sse4_1; + av1_highbd_convolve_horiz = av1_highbd_convolve_horiz_c; + if (flags & HAS_SSE4_1) av1_highbd_convolve_horiz = av1_highbd_convolve_horiz_sse4_1; + av1_highbd_convolve_init = av1_highbd_convolve_init_c; + if (flags & HAS_SSE4_1) av1_highbd_convolve_init = av1_highbd_convolve_init_sse4_1; + av1_highbd_convolve_vert = av1_highbd_convolve_vert_c; + if (flags & HAS_SSE4_1) av1_highbd_convolve_vert = av1_highbd_convolve_vert_sse4_1; + av1_highbd_fht4x4 = av1_highbd_fht4x4_c; + if (flags & HAS_SSE4_1) av1_highbd_fht4x4 = av1_highbd_fht4x4_sse4_1; + av1_highbd_quantize_fp = av1_highbd_quantize_fp_c; + if (flags & HAS_SSE4_1) av1_highbd_quantize_fp = av1_highbd_quantize_fp_sse4_1; + av1_highbd_warp_affine = av1_highbd_warp_affine_c; + if (flags & HAS_SSSE3) av1_highbd_warp_affine = av1_highbd_warp_affine_ssse3; av1_iht16x16_256_add = av1_iht16x16_256_add_sse2; if (flags & HAS_AVX2) av1_iht16x16_256_add = av1_iht16x16_256_add_avx2; + av1_inv_txfm2d_add_16x16 = av1_inv_txfm2d_add_16x16_c; + if (flags & HAS_SSE4_1) av1_inv_txfm2d_add_16x16 = av1_inv_txfm2d_add_16x16_sse4_1; + av1_inv_txfm2d_add_32x32 = av1_inv_txfm2d_add_32x32_c; + if (flags & HAS_AVX2) av1_inv_txfm2d_add_32x32 = av1_inv_txfm2d_add_32x32_avx2; + av1_inv_txfm2d_add_4x4 = av1_inv_txfm2d_add_4x4_c; + if (flags & HAS_SSE4_1) av1_inv_txfm2d_add_4x4 = av1_inv_txfm2d_add_4x4_sse4_1; + av1_inv_txfm2d_add_8x8 = av1_inv_txfm2d_add_8x8_c; + if (flags & HAS_SSE4_1) av1_inv_txfm2d_add_8x8 = av1_inv_txfm2d_add_8x8_sse4_1; av1_lowbd_convolve_init = av1_lowbd_convolve_init_c; if (flags & HAS_SSSE3) av1_lowbd_convolve_init = av1_lowbd_convolve_init_ssse3; - av1_quantize_fp = av1_quantize_fp_sse2; - if (flags & HAS_SSSE3) av1_quantize_fp = av1_quantize_fp_ssse3; - av1_quantize_fp_32x32 = av1_quantize_fp_32x32_c; - if (flags & HAS_SSSE3) av1_quantize_fp_32x32 = av1_quantize_fp_32x32_ssse3; + av1_warp_affine = av1_warp_affine_sse2; + if (flags & HAS_SSSE3) av1_warp_affine = av1_warp_affine_ssse3; + compute_cross_correlation = compute_cross_correlation_c; + if (flags & HAS_SSE4_1) compute_cross_correlation = compute_cross_correlation_sse4_1; copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_sse2; if (flags & HAS_SSSE3) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_ssse3; if (flags & HAS_SSE4_1) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_sse4_1; -- cgit v1.2.3