diff options
Diffstat (limited to 'media/libaom/config/mac/x64/aom_dsp_rtcd.h')
-rw-r--r-- | media/libaom/config/mac/x64/aom_dsp_rtcd.h | 621 |
1 files changed, 461 insertions, 160 deletions
diff --git a/media/libaom/config/mac/x64/aom_dsp_rtcd.h b/media/libaom/config/mac/x64/aom_dsp_rtcd.h index 6ee856f1d..d2bee385f 100644 --- a/media/libaom/config/mac/x64/aom_dsp_rtcd.h +++ b/media/libaom/config/mac/x64/aom_dsp_rtcd.h @@ -20,6 +20,9 @@ extern "C" { #endif +void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx); +#define aom_blend_a64_d32_mask aom_blend_a64_d32_mask_c + void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); void aom_blend_a64_hmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); RTCD_EXTERN void (*aom_blend_a64_hmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); @@ -51,6 +54,28 @@ void aom_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, void aom_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); RTCD_EXTERN void (*aom_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_hip_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_hip aom_convolve8_add_src_hip_sse2 + +void aom_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_horiz_hip aom_convolve8_add_src_horiz_hip_c + +void aom_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_add_src_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_add_src_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve8_add_src_vert_hip aom_convolve8_add_src_vert_hip_c + void aom_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void aom_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); void aom_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); @@ -312,33 +337,41 @@ void aom_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uin #define aom_dc_128_predictor_16x16 aom_dc_128_predictor_16x16_sse2 void aom_dc_128_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_c +void aom_dc_128_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_sse2 void aom_dc_128_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_c +void aom_dc_128_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_sse2 void aom_dc_128_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c void aom_dc_128_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_32x16 aom_dc_128_predictor_32x16_c +void aom_dc_128_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_128_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_32x32 aom_dc_128_predictor_32x32_sse2 +void aom_dc_128_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_128_predictor_4x4 aom_dc_128_predictor_4x4_sse2 void aom_dc_128_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_c +void aom_dc_128_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_sse2 void aom_dc_128_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_c +void aom_dc_128_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_sse2 void aom_dc_128_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_128_predictor_8x4 aom_dc_128_predictor_8x4_c +void aom_dc_128_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_8x4 aom_dc_128_predictor_8x4_sse2 void aom_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_128_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -349,33 +382,41 @@ void aom_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const ui #define aom_dc_left_predictor_16x16 aom_dc_left_predictor_16x16_sse2 void aom_dc_left_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_c +void aom_dc_left_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_sse2 void aom_dc_left_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_c +void aom_dc_left_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_sse2 void aom_dc_left_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c void aom_dc_left_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_32x16 aom_dc_left_predictor_32x16_c +void aom_dc_left_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_left_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_32x32 aom_dc_left_predictor_32x32_sse2 +void aom_dc_left_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_left_predictor_4x4 aom_dc_left_predictor_4x4_sse2 void aom_dc_left_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_c +void aom_dc_left_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_sse2 void aom_dc_left_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_c +void aom_dc_left_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_sse2 void aom_dc_left_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_left_predictor_8x4 aom_dc_left_predictor_8x4_c +void aom_dc_left_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_8x4 aom_dc_left_predictor_8x4_sse2 void aom_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_left_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -386,33 +427,41 @@ void aom_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t #define aom_dc_predictor_16x16 aom_dc_predictor_16x16_sse2 void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_16x32 aom_dc_predictor_16x32_c +void aom_dc_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_16x32 aom_dc_predictor_16x32_sse2 void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_16x8 aom_dc_predictor_16x8_c +void aom_dc_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_16x8 aom_dc_predictor_16x8_sse2 void aom_dc_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_32x16 aom_dc_predictor_32x16_c +void aom_dc_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_32x32 aom_dc_predictor_32x32_sse2 +void aom_dc_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_predictor_4x4 aom_dc_predictor_4x4_sse2 void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_4x8 aom_dc_predictor_4x8_c +void aom_dc_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_4x8 aom_dc_predictor_4x8_sse2 void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_8x16 aom_dc_predictor_8x16_c +void aom_dc_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_8x16 aom_dc_predictor_8x16_sse2 void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_predictor_8x4 aom_dc_predictor_8x4_c +void aom_dc_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_8x4 aom_dc_predictor_8x4_sse2 void aom_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -423,33 +472,41 @@ void aom_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uin #define aom_dc_top_predictor_16x16 aom_dc_top_predictor_16x16_sse2 void aom_dc_top_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_c +void aom_dc_top_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_sse2 void aom_dc_top_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_c +void aom_dc_top_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_sse2 void aom_dc_top_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c void aom_dc_top_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_32x16 aom_dc_top_predictor_32x16_c +void aom_dc_top_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_top_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_32x32 aom_dc_top_predictor_32x32_sse2 +void aom_dc_top_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_dc_top_predictor_4x4 aom_dc_top_predictor_4x4_sse2 void aom_dc_top_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_c +void aom_dc_top_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_sse2 void aom_dc_top_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_c +void aom_dc_top_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_sse2 void aom_dc_top_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_dc_top_predictor_8x4 aom_dc_top_predictor_8x4_c +void aom_dc_top_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_8x4 aom_dc_top_predictor_8x4_sse2 void aom_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -503,33 +560,40 @@ void aom_h_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t #define aom_h_predictor_16x16 aom_h_predictor_16x16_sse2 void aom_h_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_16x32 aom_h_predictor_16x32_c +void aom_h_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_16x32 aom_h_predictor_16x32_sse2 void aom_h_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_16x8 aom_h_predictor_16x8_c +void aom_h_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_16x8 aom_h_predictor_16x8_sse2 void aom_h_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_h_predictor_2x2 aom_h_predictor_2x2_c void aom_h_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_32x16 aom_h_predictor_32x16_c +void aom_h_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_32x16 aom_h_predictor_32x16_sse2 void aom_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_32x32 aom_h_predictor_32x32_sse2 +void aom_h_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_h_predictor_4x4 aom_h_predictor_4x4_sse2 void aom_h_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_4x8 aom_h_predictor_4x8_c +void aom_h_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_4x8 aom_h_predictor_4x8_sse2 void aom_h_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_8x16 aom_h_predictor_8x16_c +void aom_h_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_8x16 aom_h_predictor_8x16_sse2 void aom_h_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_h_predictor_8x4 aom_h_predictor_8x4_c +void aom_h_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_8x4 aom_h_predictor_8x4_sse2 void aom_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_h_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -1455,6 +1519,26 @@ void aom_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t void aom_highbd_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); RTCD_EXTERN void (*aom_highbd_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src aom_highbd_convolve8_add_src_sse2 + +void aom_highbd_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +void aom_highbd_convolve8_add_src_hip_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +RTCD_EXTERN void (*aom_highbd_convolve8_add_src_hip)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); + +void aom_highbd_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz aom_highbd_convolve8_add_src_horiz_c + +void aom_highbd_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_horiz_hip aom_highbd_convolve8_add_src_horiz_hip_c + +void aom_highbd_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert aom_highbd_convolve8_add_src_vert_c + +void aom_highbd_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); +#define aom_highbd_convolve8_add_src_vert_hip aom_highbd_convolve8_add_src_vert_hip_c + void aom_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_convolve8_avg_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); @@ -1491,7 +1575,8 @@ void aom_highbd_convolve_copy_avx2(const uint8_t *src, ptrdiff_t src_stride, uin RTCD_EXTERN void (*aom_highbd_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps); void aom_highbd_d117_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_16x16 aom_highbd_d117_predictor_16x16_c +void aom_highbd_d117_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d117_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d117_predictor_16x32 aom_highbd_d117_predictor_16x32_c @@ -1506,10 +1591,12 @@ void aom_highbd_d117_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d117_predictor_32x16 aom_highbd_d117_predictor_32x16_c void aom_highbd_d117_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_32x32 aom_highbd_d117_predictor_32x32_c +void aom_highbd_d117_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d117_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_4x4 aom_highbd_d117_predictor_4x4_c +void aom_highbd_d117_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d117_predictor_4x4 aom_highbd_d117_predictor_4x4_sse2 void aom_highbd_d117_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d117_predictor_4x8 aom_highbd_d117_predictor_4x8_c @@ -1521,10 +1608,12 @@ void aom_highbd_d117_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d117_predictor_8x4 aom_highbd_d117_predictor_8x4_c void aom_highbd_d117_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d117_predictor_8x8 aom_highbd_d117_predictor_8x8_c +void aom_highbd_d117_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d117_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_16x16 aom_highbd_d135_predictor_16x16_c +void aom_highbd_d135_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d135_predictor_16x32 aom_highbd_d135_predictor_16x32_c @@ -1539,10 +1628,12 @@ void aom_highbd_d135_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d135_predictor_32x16 aom_highbd_d135_predictor_32x16_c void aom_highbd_d135_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_32x32 aom_highbd_d135_predictor_32x32_c +void aom_highbd_d135_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d135_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_4x4 aom_highbd_d135_predictor_4x4_c +void aom_highbd_d135_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d135_predictor_4x4 aom_highbd_d135_predictor_4x4_sse2 void aom_highbd_d135_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d135_predictor_4x8 aom_highbd_d135_predictor_4x8_c @@ -1554,10 +1645,12 @@ void aom_highbd_d135_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d135_predictor_8x4 aom_highbd_d135_predictor_8x4_c void aom_highbd_d135_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d135_predictor_8x8 aom_highbd_d135_predictor_8x8_c +void aom_highbd_d135_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d135_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_16x16 aom_highbd_d153_predictor_16x16_c +void aom_highbd_d153_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d153_predictor_16x32 aom_highbd_d153_predictor_16x32_c @@ -1572,10 +1665,12 @@ void aom_highbd_d153_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_d153_predictor_32x16 aom_highbd_d153_predictor_32x16_c void aom_highbd_d153_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_32x32 aom_highbd_d153_predictor_32x32_c +void aom_highbd_d153_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d153_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_4x4 aom_highbd_d153_predictor_4x4_c +void aom_highbd_d153_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d153_predictor_4x4 aom_highbd_d153_predictor_4x4_sse2 void aom_highbd_d153_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d153_predictor_4x8 aom_highbd_d153_predictor_4x8_c @@ -1587,7 +1682,8 @@ void aom_highbd_d153_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d153_predictor_8x4 aom_highbd_d153_predictor_8x4_c void aom_highbd_d153_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d153_predictor_8x8 aom_highbd_d153_predictor_8x8_c +void aom_highbd_d153_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d153_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d207e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d207e_predictor_16x16 aom_highbd_d207e_predictor_16x16_c @@ -1623,37 +1719,47 @@ void aom_highbd_d207e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const u #define aom_highbd_d207e_predictor_8x8 aom_highbd_d207e_predictor_8x8_c void aom_highbd_d45e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x16 aom_highbd_d45e_predictor_16x16_c +void aom_highbd_d45e_predictor_16x16_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x32 aom_highbd_d45e_predictor_16x32_c +void aom_highbd_d45e_predictor_16x32_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_16x8 aom_highbd_d45e_predictor_16x8_c +void aom_highbd_d45e_predictor_16x8_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d45e_predictor_2x2 aom_highbd_d45e_predictor_2x2_c void aom_highbd_d45e_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_32x16 aom_highbd_d45e_predictor_32x16_c +void aom_highbd_d45e_predictor_32x16_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_32x32 aom_highbd_d45e_predictor_32x32_c +void aom_highbd_d45e_predictor_32x32_avx2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +RTCD_EXTERN void (*aom_highbd_d45e_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_d45e_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_4x4 aom_highbd_d45e_predictor_4x4_c +void aom_highbd_d45e_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_4x4 aom_highbd_d45e_predictor_4x4_sse2 void aom_highbd_d45e_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_4x8 aom_highbd_d45e_predictor_4x8_c +void aom_highbd_d45e_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_4x8 aom_highbd_d45e_predictor_4x8_sse2 void aom_highbd_d45e_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x16 aom_highbd_d45e_predictor_8x16_c +void aom_highbd_d45e_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_8x16 aom_highbd_d45e_predictor_8x16_sse2 void aom_highbd_d45e_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x4 aom_highbd_d45e_predictor_8x4_c +void aom_highbd_d45e_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_8x4 aom_highbd_d45e_predictor_8x4_sse2 void aom_highbd_d45e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_d45e_predictor_8x8 aom_highbd_d45e_predictor_8x8_c +void aom_highbd_d45e_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_d45e_predictor_8x8 aom_highbd_d45e_predictor_8x8_sse2 void aom_highbd_d63e_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_d63e_predictor_16x16 aom_highbd_d63e_predictor_16x16_c @@ -1689,86 +1795,109 @@ void aom_highbd_d63e_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_d63e_predictor_8x8 aom_highbd_d63e_predictor_8x8_c void aom_highbd_dc_128_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x16 aom_highbd_dc_128_predictor_16x16_c +void aom_highbd_dc_128_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_16x16 aom_highbd_dc_128_predictor_16x16_sse2 void aom_highbd_dc_128_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x32 aom_highbd_dc_128_predictor_16x32_c +void aom_highbd_dc_128_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_16x32 aom_highbd_dc_128_predictor_16x32_sse2 void aom_highbd_dc_128_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_16x8 aom_highbd_dc_128_predictor_16x8_c +void aom_highbd_dc_128_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_16x8 aom_highbd_dc_128_predictor_16x8_sse2 void aom_highbd_dc_128_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_128_predictor_2x2 aom_highbd_dc_128_predictor_2x2_c void aom_highbd_dc_128_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_32x16 aom_highbd_dc_128_predictor_32x16_c +void aom_highbd_dc_128_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_32x16 aom_highbd_dc_128_predictor_32x16_sse2 void aom_highbd_dc_128_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_32x32 aom_highbd_dc_128_predictor_32x32_c +void aom_highbd_dc_128_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_32x32 aom_highbd_dc_128_predictor_32x32_sse2 void aom_highbd_dc_128_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_4x4 aom_highbd_dc_128_predictor_4x4_c +void aom_highbd_dc_128_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_4x4 aom_highbd_dc_128_predictor_4x4_sse2 void aom_highbd_dc_128_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_4x8 aom_highbd_dc_128_predictor_4x8_c +void aom_highbd_dc_128_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_4x8 aom_highbd_dc_128_predictor_4x8_sse2 void aom_highbd_dc_128_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x16 aom_highbd_dc_128_predictor_8x16_c +void aom_highbd_dc_128_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_8x16 aom_highbd_dc_128_predictor_8x16_sse2 void aom_highbd_dc_128_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x4 aom_highbd_dc_128_predictor_8x4_c +void aom_highbd_dc_128_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_8x4 aom_highbd_dc_128_predictor_8x4_sse2 void aom_highbd_dc_128_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_128_predictor_8x8 aom_highbd_dc_128_predictor_8x8_c +void aom_highbd_dc_128_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_128_predictor_8x8 aom_highbd_dc_128_predictor_8x8_sse2 void aom_highbd_dc_left_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x16 aom_highbd_dc_left_predictor_16x16_c +void aom_highbd_dc_left_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_16x16 aom_highbd_dc_left_predictor_16x16_sse2 void aom_highbd_dc_left_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x32 aom_highbd_dc_left_predictor_16x32_c +void aom_highbd_dc_left_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_16x32 aom_highbd_dc_left_predictor_16x32_sse2 void aom_highbd_dc_left_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_16x8 aom_highbd_dc_left_predictor_16x8_c +void aom_highbd_dc_left_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_16x8 aom_highbd_dc_left_predictor_16x8_sse2 void aom_highbd_dc_left_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_left_predictor_2x2 aom_highbd_dc_left_predictor_2x2_c void aom_highbd_dc_left_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_32x16 aom_highbd_dc_left_predictor_32x16_c +void aom_highbd_dc_left_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_32x16 aom_highbd_dc_left_predictor_32x16_sse2 void aom_highbd_dc_left_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_32x32 aom_highbd_dc_left_predictor_32x32_c +void aom_highbd_dc_left_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_32x32 aom_highbd_dc_left_predictor_32x32_sse2 void aom_highbd_dc_left_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_4x4 aom_highbd_dc_left_predictor_4x4_c +void aom_highbd_dc_left_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_4x4 aom_highbd_dc_left_predictor_4x4_sse2 void aom_highbd_dc_left_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_4x8 aom_highbd_dc_left_predictor_4x8_c +void aom_highbd_dc_left_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_4x8 aom_highbd_dc_left_predictor_4x8_sse2 void aom_highbd_dc_left_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x16 aom_highbd_dc_left_predictor_8x16_c +void aom_highbd_dc_left_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_8x16 aom_highbd_dc_left_predictor_8x16_sse2 void aom_highbd_dc_left_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x4 aom_highbd_dc_left_predictor_8x4_c +void aom_highbd_dc_left_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_8x4 aom_highbd_dc_left_predictor_8x4_sse2 void aom_highbd_dc_left_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_left_predictor_8x8 aom_highbd_dc_left_predictor_8x8_c +void aom_highbd_dc_left_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_left_predictor_8x8 aom_highbd_dc_left_predictor_8x8_sse2 void aom_highbd_dc_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_predictor_16x16 aom_highbd_dc_predictor_16x16_sse2 void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_16x32 aom_highbd_dc_predictor_16x32_c +void aom_highbd_dc_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_16x32 aom_highbd_dc_predictor_16x32_sse2 void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_16x8 aom_highbd_dc_predictor_16x8_c +void aom_highbd_dc_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_16x8 aom_highbd_dc_predictor_16x8_sse2 void aom_highbd_dc_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_predictor_2x2 aom_highbd_dc_predictor_2x2_c void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_32x16 aom_highbd_dc_predictor_32x16_c +void aom_highbd_dc_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_32x16 aom_highbd_dc_predictor_32x16_sse2 void aom_highbd_dc_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -1779,50 +1908,63 @@ void aom_highbd_dc_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const u #define aom_highbd_dc_predictor_4x4 aom_highbd_dc_predictor_4x4_sse2 void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_4x8 aom_highbd_dc_predictor_4x8_c +void aom_highbd_dc_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_4x8 aom_highbd_dc_predictor_4x8_sse2 void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_8x16 aom_highbd_dc_predictor_8x16_c +void aom_highbd_dc_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_8x16 aom_highbd_dc_predictor_8x16_sse2 void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_predictor_8x4 aom_highbd_dc_predictor_8x4_c +void aom_highbd_dc_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_predictor_8x4 aom_highbd_dc_predictor_8x4_sse2 void aom_highbd_dc_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_dc_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_predictor_8x8 aom_highbd_dc_predictor_8x8_sse2 void aom_highbd_dc_top_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x16 aom_highbd_dc_top_predictor_16x16_c +void aom_highbd_dc_top_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_16x16 aom_highbd_dc_top_predictor_16x16_sse2 void aom_highbd_dc_top_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x32 aom_highbd_dc_top_predictor_16x32_c +void aom_highbd_dc_top_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_16x32 aom_highbd_dc_top_predictor_16x32_sse2 void aom_highbd_dc_top_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_16x8 aom_highbd_dc_top_predictor_16x8_c +void aom_highbd_dc_top_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_16x8 aom_highbd_dc_top_predictor_16x8_sse2 void aom_highbd_dc_top_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_dc_top_predictor_2x2 aom_highbd_dc_top_predictor_2x2_c void aom_highbd_dc_top_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_32x16 aom_highbd_dc_top_predictor_32x16_c +void aom_highbd_dc_top_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_32x16 aom_highbd_dc_top_predictor_32x16_sse2 void aom_highbd_dc_top_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_32x32 aom_highbd_dc_top_predictor_32x32_c +void aom_highbd_dc_top_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_32x32 aom_highbd_dc_top_predictor_32x32_sse2 void aom_highbd_dc_top_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_4x4 aom_highbd_dc_top_predictor_4x4_c +void aom_highbd_dc_top_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_4x4 aom_highbd_dc_top_predictor_4x4_sse2 void aom_highbd_dc_top_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_4x8 aom_highbd_dc_top_predictor_4x8_c +void aom_highbd_dc_top_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_4x8 aom_highbd_dc_top_predictor_4x8_sse2 void aom_highbd_dc_top_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x16 aom_highbd_dc_top_predictor_8x16_c +void aom_highbd_dc_top_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_8x16 aom_highbd_dc_top_predictor_8x16_sse2 void aom_highbd_dc_top_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x4 aom_highbd_dc_top_predictor_8x4_c +void aom_highbd_dc_top_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_8x4 aom_highbd_dc_top_predictor_8x4_sse2 void aom_highbd_dc_top_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_dc_top_predictor_8x8 aom_highbd_dc_top_predictor_8x8_c +void aom_highbd_dc_top_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_dc_top_predictor_8x8 aom_highbd_dc_top_predictor_8x8_sse2 void aom_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride); void aom_highbd_fdct16x16_sse2(const int16_t *input, tran_low_t *output, int stride); @@ -1845,37 +1987,47 @@ void aom_highbd_fdct8x8_sse2(const int16_t *input, tran_low_t *output, int strid #define aom_highbd_fdct8x8 aom_highbd_fdct8x8_sse2 void aom_highbd_h_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x16 aom_highbd_h_predictor_16x16_c +void aom_highbd_h_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_16x16 aom_highbd_h_predictor_16x16_sse2 void aom_highbd_h_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x32 aom_highbd_h_predictor_16x32_c +void aom_highbd_h_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_16x32 aom_highbd_h_predictor_16x32_sse2 void aom_highbd_h_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_16x8 aom_highbd_h_predictor_16x8_c +void aom_highbd_h_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_16x8 aom_highbd_h_predictor_16x8_sse2 void aom_highbd_h_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_h_predictor_2x2 aom_highbd_h_predictor_2x2_c void aom_highbd_h_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_32x16 aom_highbd_h_predictor_32x16_c +void aom_highbd_h_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_32x16 aom_highbd_h_predictor_32x16_sse2 void aom_highbd_h_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_32x32 aom_highbd_h_predictor_32x32_c +void aom_highbd_h_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_32x32 aom_highbd_h_predictor_32x32_sse2 void aom_highbd_h_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_4x4 aom_highbd_h_predictor_4x4_c +void aom_highbd_h_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_4x4 aom_highbd_h_predictor_4x4_sse2 void aom_highbd_h_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_4x8 aom_highbd_h_predictor_4x8_c +void aom_highbd_h_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_4x8 aom_highbd_h_predictor_4x8_sse2 void aom_highbd_h_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x16 aom_highbd_h_predictor_8x16_c +void aom_highbd_h_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_8x16 aom_highbd_h_predictor_8x16_sse2 void aom_highbd_h_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x4 aom_highbd_h_predictor_8x4_c +void aom_highbd_h_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_8x4 aom_highbd_h_predictor_8x4_sse2 void aom_highbd_h_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_h_predictor_8x8 aom_highbd_h_predictor_8x8_c +void aom_highbd_h_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_h_predictor_8x8 aom_highbd_h_predictor_8x8_sse2 void aom_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd); #define aom_highbd_iwht4x4_16_add aom_highbd_iwht4x4_16_add_c @@ -1889,7 +2041,8 @@ void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int pitch, const uint8_t *bli void aom_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); -#define aom_highbd_lpf_horizontal_4_dual aom_highbd_lpf_horizontal_4_dual_sse2 +void aom_highbd_lpf_horizontal_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_horizontal_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1897,11 +2050,13 @@ void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int pitch, const uint8_t *bli void aom_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); -#define aom_highbd_lpf_horizontal_8_dual aom_highbd_lpf_horizontal_8_dual_sse2 +void aom_highbd_lpf_horizontal_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_horizontal_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); -#define aom_highbd_lpf_horizontal_edge_16 aom_highbd_lpf_horizontal_edge_16_sse2 +void aom_highbd_lpf_horizontal_edge_16_avx2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_horizontal_edge_16)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1913,7 +2068,8 @@ void aom_highbd_lpf_vertical_16_sse2(uint16_t *s, int pitch, const uint8_t *blim void aom_highbd_lpf_vertical_16_dual_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); -#define aom_highbd_lpf_vertical_16_dual aom_highbd_lpf_vertical_16_dual_sse2 +void aom_highbd_lpf_vertical_16_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_vertical_16_dual)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_4_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1921,7 +2077,8 @@ void aom_highbd_lpf_vertical_4_sse2(uint16_t *s, int pitch, const uint8_t *blimi void aom_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); -#define aom_highbd_lpf_vertical_4_dual aom_highbd_lpf_vertical_4_dual_sse2 +void aom_highbd_lpf_vertical_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_vertical_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); void aom_highbd_lpf_vertical_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); @@ -1929,7 +2086,8 @@ void aom_highbd_lpf_vertical_8_sse2(uint16_t *s, int pitch, const uint8_t *blimi void aom_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); void aom_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); -#define aom_highbd_lpf_vertical_8_dual aom_highbd_lpf_vertical_8_dual_sse2 +void aom_highbd_lpf_vertical_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); +RTCD_EXTERN void (*aom_highbd_lpf_vertical_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); unsigned int aom_highbd_masked_sad16x16_c(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); unsigned int aom_highbd_masked_sad16x16_ssse3(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask); @@ -2510,16 +2668,19 @@ void aom_highbd_v_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const #define aom_highbd_v_predictor_16x16 aom_highbd_v_predictor_16x16_sse2 void aom_highbd_v_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_16x32 aom_highbd_v_predictor_16x32_c +void aom_highbd_v_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_16x32 aom_highbd_v_predictor_16x32_sse2 void aom_highbd_v_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_16x8 aom_highbd_v_predictor_16x8_c +void aom_highbd_v_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_16x8 aom_highbd_v_predictor_16x8_sse2 void aom_highbd_v_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); #define aom_highbd_v_predictor_2x2 aom_highbd_v_predictor_2x2_c void aom_highbd_v_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_32x16 aom_highbd_v_predictor_32x16_c +void aom_highbd_v_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_32x16 aom_highbd_v_predictor_32x16_sse2 void aom_highbd_v_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -2530,13 +2691,16 @@ void aom_highbd_v_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const ui #define aom_highbd_v_predictor_4x4 aom_highbd_v_predictor_4x4_sse2 void aom_highbd_v_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_4x8 aom_highbd_v_predictor_4x8_c +void aom_highbd_v_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_4x8 aom_highbd_v_predictor_4x8_sse2 void aom_highbd_v_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_8x16 aom_highbd_v_predictor_8x16_c +void aom_highbd_v_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_8x16 aom_highbd_v_predictor_8x16_sse2 void aom_highbd_v_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); -#define aom_highbd_v_predictor_8x4 aom_highbd_v_predictor_8x4_c +void aom_highbd_v_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); +#define aom_highbd_v_predictor_8x4 aom_highbd_v_predictor_8x4_sse2 void aom_highbd_v_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); void aom_highbd_v_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); @@ -2626,50 +2790,43 @@ void aom_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, con #define aom_lpf_horizontal_4 aom_lpf_horizontal_4_sse2 void aom_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define aom_lpf_horizontal_4_dual aom_lpf_horizontal_4_dual_sse2 +#define aom_lpf_horizontal_4_dual aom_lpf_horizontal_4_dual_c void aom_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); #define aom_lpf_horizontal_8 aom_lpf_horizontal_8_sse2 void aom_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_horizontal_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define aom_lpf_horizontal_8_dual aom_lpf_horizontal_8_dual_sse2 +#define aom_lpf_horizontal_8_dual aom_lpf_horizontal_8_dual_c void aom_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_edge_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_edge_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_horizontal_edge_16 aom_lpf_horizontal_edge_16_sse2 void aom_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_horizontal_edge_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_horizontal_edge_8_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*aom_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_horizontal_edge_8 aom_lpf_horizontal_edge_8_sse2 void aom_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); #define aom_lpf_vertical_16 aom_lpf_vertical_16_sse2 void aom_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void aom_lpf_vertical_16_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define aom_lpf_vertical_16_dual aom_lpf_vertical_16_dual_sse2 +#define aom_lpf_vertical_16_dual aom_lpf_vertical_16_dual_c void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); #define aom_lpf_vertical_4 aom_lpf_vertical_4_sse2 void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define aom_lpf_vertical_4_dual aom_lpf_vertical_4_dual_sse2 +#define aom_lpf_vertical_4_dual aom_lpf_vertical_4_dual_c void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); void aom_lpf_vertical_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); #define aom_lpf_vertical_8 aom_lpf_vertical_8_sse2 void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void aom_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define aom_lpf_vertical_8_dual aom_lpf_vertical_8_dual_sse2 +#define aom_lpf_vertical_8_dual aom_lpf_vertical_8_dual_c unsigned int aom_masked_sad16x16_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); unsigned int aom_masked_sad16x16_ssse3(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask); @@ -2940,37 +3097,52 @@ unsigned int aom_obmc_variance8x8_sse4_1(const uint8_t *pre, int pre_stride, con RTCD_EXTERN unsigned int (*aom_obmc_variance8x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); void aom_paeth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_c +void aom_paeth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_c +void aom_paeth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_c +void aom_paeth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_16x8_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c void aom_paeth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_c +void aom_paeth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_c +void aom_paeth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_paeth_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_c +void aom_paeth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_c +void aom_paeth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_c +void aom_paeth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_c +void aom_paeth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_paeth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_c +void aom_paeth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_paeth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void aom_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); @@ -3273,37 +3445,47 @@ void aom_smooth_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_ #define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_c void aom_smooth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x16 aom_smooth_predictor_16x16_c +void aom_smooth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x32 aom_smooth_predictor_16x32_c +void aom_smooth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_16x8 aom_smooth_predictor_16x8_c +void aom_smooth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c void aom_smooth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_32x16 aom_smooth_predictor_32x16_c +void aom_smooth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_32x32 aom_smooth_predictor_32x32_c +void aom_smooth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_4x4 aom_smooth_predictor_4x4_c +void aom_smooth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_4x8 aom_smooth_predictor_4x8_c +void aom_smooth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x16 aom_smooth_predictor_8x16_c +void aom_smooth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x4 aom_smooth_predictor_8x4_c +void aom_smooth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_smooth_predictor_8x8 aom_smooth_predictor_8x8_c +void aom_smooth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_smooth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_smooth_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_c @@ -3493,33 +3675,41 @@ void aom_v_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t #define aom_v_predictor_16x16 aom_v_predictor_16x16_sse2 void aom_v_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_16x32 aom_v_predictor_16x32_c +void aom_v_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_16x32 aom_v_predictor_16x32_sse2 void aom_v_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_16x8 aom_v_predictor_16x8_c +void aom_v_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_16x8 aom_v_predictor_16x8_sse2 void aom_v_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_v_predictor_2x2 aom_v_predictor_2x2_c void aom_v_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_32x16 aom_v_predictor_32x16_c +void aom_v_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_v_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_32x32 aom_v_predictor_32x32_sse2 +void aom_v_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); #define aom_v_predictor_4x4 aom_v_predictor_4x4_sse2 void aom_v_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_4x8 aom_v_predictor_4x8_c +void aom_v_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_4x8 aom_v_predictor_4x8_sse2 void aom_v_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_8x16 aom_v_predictor_8x16_c +void aom_v_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_8x16 aom_v_predictor_8x16_sse2 void aom_v_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define aom_v_predictor_8x4 aom_v_predictor_8x4_c +void aom_v_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_8x4 aom_v_predictor_8x4_sse2 void aom_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); void aom_v_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); @@ -3626,6 +3816,12 @@ static void setup_rtcd_internal(void) aom_convolve8 = aom_convolve8_sse2; if (flags & HAS_SSSE3) aom_convolve8 = aom_convolve8_ssse3; if (flags & HAS_AVX2) aom_convolve8 = aom_convolve8_avx2; + aom_convolve8_add_src = aom_convolve8_add_src_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src = aom_convolve8_add_src_ssse3; + aom_convolve8_add_src_horiz = aom_convolve8_add_src_horiz_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src_horiz = aom_convolve8_add_src_horiz_ssse3; + aom_convolve8_add_src_vert = aom_convolve8_add_src_vert_c; + if (flags & HAS_SSSE3) aom_convolve8_add_src_vert = aom_convolve8_add_src_vert_ssse3; aom_convolve8_avg = aom_convolve8_avg_sse2; if (flags & HAS_SSSE3) aom_convolve8_avg = aom_convolve8_avg_ssse3; aom_convolve8_avg_horiz = aom_convolve8_avg_horiz_sse2; @@ -3648,6 +3844,22 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_d153_predictor_8x8 = aom_d153_predictor_8x8_ssse3; aom_d63e_predictor_4x4 = aom_d63e_predictor_4x4_c; if (flags & HAS_SSSE3) aom_d63e_predictor_4x4 = aom_d63e_predictor_4x4_ssse3; + aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_avx2; + aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_avx2; + aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_avx2; + aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_avx2; + aom_dc_predictor_32x16 = aom_dc_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_predictor_32x16 = aom_dc_predictor_32x16_avx2; + aom_dc_predictor_32x32 = aom_dc_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_predictor_32x32 = aom_dc_predictor_32x32_avx2; + aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_avx2; + aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_avx2; aom_fdct32x32 = aom_fdct32x32_sse2; if (flags & HAS_AVX2) aom_fdct32x32 = aom_fdct32x32_avx2; aom_fdct32x32_rd = aom_fdct32x32_rd_sse2; @@ -3656,6 +3868,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_fdct8x8 = aom_fdct8x8_ssse3; aom_get16x16var = aom_get16x16var_sse2; if (flags & HAS_AVX2) aom_get16x16var = aom_get16x16var_avx2; + aom_h_predictor_32x32 = aom_h_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_h_predictor_32x32 = aom_h_predictor_32x32_avx2; aom_hadamard_8x8 = aom_hadamard_8x8_sse2; if (flags & HAS_SSSE3) aom_hadamard_8x8 = aom_hadamard_8x8_ssse3; aom_highbd_10_masked_sub_pixel_variance16x16 = aom_highbd_10_masked_sub_pixel_variance16x16_c; @@ -3814,6 +4028,8 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) aom_highbd_blend_a64_vmask = aom_highbd_blend_a64_vmask_sse4_1; aom_highbd_convolve8 = aom_highbd_convolve8_sse2; if (flags & HAS_AVX2) aom_highbd_convolve8 = aom_highbd_convolve8_avx2; + aom_highbd_convolve8_add_src_hip = aom_highbd_convolve8_add_src_hip_c; + if (flags & HAS_SSSE3) aom_highbd_convolve8_add_src_hip = aom_highbd_convolve8_add_src_hip_ssse3; aom_highbd_convolve8_avg = aom_highbd_convolve8_avg_sse2; if (flags & HAS_AVX2) aom_highbd_convolve8_avg = aom_highbd_convolve8_avg_avx2; aom_highbd_convolve8_avg_horiz = aom_highbd_convolve8_avg_horiz_sse2; @@ -3828,6 +4044,46 @@ static void setup_rtcd_internal(void) if (flags & HAS_AVX2) aom_highbd_convolve_avg = aom_highbd_convolve_avg_avx2; aom_highbd_convolve_copy = aom_highbd_convolve_copy_sse2; if (flags & HAS_AVX2) aom_highbd_convolve_copy = aom_highbd_convolve_copy_avx2; + aom_highbd_d117_predictor_16x16 = aom_highbd_d117_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_16x16 = aom_highbd_d117_predictor_16x16_ssse3; + aom_highbd_d117_predictor_32x32 = aom_highbd_d117_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_32x32 = aom_highbd_d117_predictor_32x32_ssse3; + aom_highbd_d117_predictor_8x8 = aom_highbd_d117_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d117_predictor_8x8 = aom_highbd_d117_predictor_8x8_ssse3; + aom_highbd_d135_predictor_16x16 = aom_highbd_d135_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_16x16 = aom_highbd_d135_predictor_16x16_ssse3; + aom_highbd_d135_predictor_32x32 = aom_highbd_d135_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_32x32 = aom_highbd_d135_predictor_32x32_ssse3; + aom_highbd_d135_predictor_8x8 = aom_highbd_d135_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d135_predictor_8x8 = aom_highbd_d135_predictor_8x8_ssse3; + aom_highbd_d153_predictor_16x16 = aom_highbd_d153_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_16x16 = aom_highbd_d153_predictor_16x16_ssse3; + aom_highbd_d153_predictor_32x32 = aom_highbd_d153_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_32x32 = aom_highbd_d153_predictor_32x32_ssse3; + aom_highbd_d153_predictor_8x8 = aom_highbd_d153_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_highbd_d153_predictor_8x8 = aom_highbd_d153_predictor_8x8_ssse3; + aom_highbd_d45e_predictor_16x16 = aom_highbd_d45e_predictor_16x16_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x16 = aom_highbd_d45e_predictor_16x16_avx2; + aom_highbd_d45e_predictor_16x32 = aom_highbd_d45e_predictor_16x32_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x32 = aom_highbd_d45e_predictor_16x32_avx2; + aom_highbd_d45e_predictor_16x8 = aom_highbd_d45e_predictor_16x8_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_16x8 = aom_highbd_d45e_predictor_16x8_avx2; + aom_highbd_d45e_predictor_32x16 = aom_highbd_d45e_predictor_32x16_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_32x16 = aom_highbd_d45e_predictor_32x16_avx2; + aom_highbd_d45e_predictor_32x32 = aom_highbd_d45e_predictor_32x32_c; + if (flags & HAS_AVX2) aom_highbd_d45e_predictor_32x32 = aom_highbd_d45e_predictor_32x32_avx2; + aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_avx2; + aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_avx2; + aom_highbd_lpf_horizontal_edge_16 = aom_highbd_lpf_horizontal_edge_16_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_edge_16 = aom_highbd_lpf_horizontal_edge_16_avx2; + aom_highbd_lpf_vertical_16_dual = aom_highbd_lpf_vertical_16_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_16_dual = aom_highbd_lpf_vertical_16_dual_avx2; + aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_avx2; + aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_sse2; + if (flags & HAS_AVX2) aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_avx2; aom_highbd_masked_sad16x16 = aom_highbd_masked_sad16x16_c; if (flags & HAS_SSSE3) aom_highbd_masked_sad16x16 = aom_highbd_masked_sad16x16_ssse3; aom_highbd_masked_sad16x32 = aom_highbd_masked_sad16x32_c; @@ -3979,10 +4235,6 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_idct8x8_12_add = aom_idct8x8_12_add_ssse3; aom_idct8x8_64_add = aom_idct8x8_64_add_sse2; if (flags & HAS_SSSE3) aom_idct8x8_64_add = aom_idct8x8_64_add_ssse3; - aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_sse2; - if (flags & HAS_AVX2) aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_avx2; - aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_sse2; - if (flags & HAS_AVX2) aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_avx2; aom_masked_sad16x16 = aom_masked_sad16x16_c; if (flags & HAS_SSSE3) aom_masked_sad16x16 = aom_masked_sad16x16_ssse3; aom_masked_sad16x32 = aom_masked_sad16x32_c; @@ -4089,6 +4341,31 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) aom_obmc_variance8x4 = aom_obmc_variance8x4_sse4_1; aom_obmc_variance8x8 = aom_obmc_variance8x8_c; if (flags & HAS_SSE4_1) aom_obmc_variance8x8 = aom_obmc_variance8x8_sse4_1; + aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_avx2; + aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_avx2; + aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_avx2; + aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_avx2; + aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_ssse3; + if (flags & HAS_AVX2) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_avx2; + aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_ssse3; + aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_ssse3; + aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_ssse3; + aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_ssse3; + aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_ssse3; aom_quantize_b = aom_quantize_b_sse2; if (flags & HAS_SSSE3) aom_quantize_b = aom_quantize_b_ssse3; if (flags & HAS_AVX) aom_quantize_b = aom_quantize_b_avx; @@ -4147,6 +4424,26 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) aom_sad8x8x8 = aom_sad8x8x8_sse4_1; aom_scaled_2d = aom_scaled_2d_c; if (flags & HAS_SSSE3) aom_scaled_2d = aom_scaled_2d_ssse3; + aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_ssse3; + aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_ssse3; + aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_ssse3; + aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_ssse3; + aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_ssse3; + aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_ssse3; + aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_ssse3; + aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_ssse3; + aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_ssse3; + aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_ssse3; aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_ssse3; aom_sub_pixel_avg_variance16x32 = aom_sub_pixel_avg_variance16x32_sse2; @@ -4203,6 +4500,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSSE3) aom_sub_pixel_variance8x4 = aom_sub_pixel_variance8x4_ssse3; aom_sub_pixel_variance8x8 = aom_sub_pixel_variance8x8_sse2; if (flags & HAS_SSSE3) aom_sub_pixel_variance8x8 = aom_sub_pixel_variance8x8_ssse3; + aom_v_predictor_32x16 = aom_v_predictor_32x16_sse2; + if (flags & HAS_AVX2) aom_v_predictor_32x16 = aom_v_predictor_32x16_avx2; + aom_v_predictor_32x32 = aom_v_predictor_32x32_sse2; + if (flags & HAS_AVX2) aom_v_predictor_32x32 = aom_v_predictor_32x32_avx2; aom_variance16x16 = aom_variance16x16_sse2; if (flags & HAS_AVX2) aom_variance16x16 = aom_variance16x16_avx2; aom_variance32x16 = aom_variance32x16_sse2; |