diff options
Diffstat (limited to 'media/libaom/config/mac/x64/av1_rtcd.h')
-rw-r--r-- | media/libaom/config/mac/x64/av1_rtcd.h | 246 |
1 files changed, 125 insertions, 121 deletions
diff --git a/media/libaom/config/mac/x64/av1_rtcd.h b/media/libaom/config/mac/x64/av1_rtcd.h index 9dcf96f84..d4fc99c10 100644 --- a/media/libaom/config/mac/x64/av1_rtcd.h +++ b/media/libaom/config/mac/x64/av1_rtcd.h @@ -12,6 +12,7 @@ */ #include "aom/aom_integer.h" +#include "aom_dsp/txfm_common.h" #include "av1/common/common.h" #include "av1/common/enums.h" #include "av1/common/quant_common.h" @@ -24,6 +25,7 @@ struct macroblockd; /* Encoder forward decls */ struct macroblock; +struct txfm_param; struct aom_variance_vtable; struct search_site_config; struct mv; @@ -60,7 +62,8 @@ void aom_clpf_hblock_hbd_sse4_1(uint16_t *dst, const uint16_t *src, int dstride, RTCD_EXTERN void (*aom_clpf_hblock_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); -#define av1_block_error av1_block_error_c +int64_t av1_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +RTCD_EXTERN int64_t (*av1_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); @@ -76,58 +79,58 @@ int av1_diamond_search_sad_c(struct macroblock *x, const struct search_site_conf void av1_fdct8x8_quant_c(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); #define av1_fdct8x8_quant av1_fdct8x8_quant_c -void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -void av1_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); -void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride, int tx_type); -RTCD_EXTERN void (*av1_fht16x16)(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); +void av1_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); +void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); +RTCD_EXTERN void (*av1_fht16x16)(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); -void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -void av1_fht16x32_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); +void av1_fht16x32_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht16x32 av1_fht16x32_sse2 -void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht16x4 av1_fht16x4_c -void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -void av1_fht16x8_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); +void av1_fht16x8_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht16x8 av1_fht16x8_sse2 -void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -void av1_fht32x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); +void av1_fht32x16_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht32x16 av1_fht32x16_sse2 -void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -void av1_fht32x32_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); -void av1_fht32x32_avx2(const int16_t *input, tran_low_t *output, int stride, int tx_type); -RTCD_EXTERN void (*av1_fht32x32)(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); +void av1_fht32x32_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); +void av1_fht32x32_avx2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); +RTCD_EXTERN void (*av1_fht32x32)(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); -void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht32x8 av1_fht32x8_c -void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht4x16 av1_fht4x16_c -void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -void av1_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); +void av1_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht4x4 av1_fht4x4_sse2 -void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -void av1_fht4x8_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); +void av1_fht4x8_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht4x8 av1_fht4x8_sse2 -void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -void av1_fht8x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); +void av1_fht8x16_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht8x16 av1_fht8x16_sse2 -void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht8x32 av1_fht8x32_c -void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -void av1_fht8x4_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); +void av1_fht8x4_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht8x4 av1_fht8x4_sse2 -void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -void av1_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); +void av1_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param); #define av1_fht8x8 av1_fht8x8_sse2 int av1_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); @@ -145,6 +148,15 @@ void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, i void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); RTCD_EXTERN void (*av1_fwd_txfm2d_16x16)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +#define av1_fwd_txfm2d_16x32 av1_fwd_txfm2d_16x32_c + +void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +#define av1_fwd_txfm2d_16x8 av1_fwd_txfm2d_16x8_c + +void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +#define av1_fwd_txfm2d_32x16 av1_fwd_txfm2d_32x16_c + void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); RTCD_EXTERN void (*av1_fwd_txfm2d_32x32)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); @@ -153,10 +165,19 @@ void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, int void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); RTCD_EXTERN void (*av1_fwd_txfm2d_4x4)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +#define av1_fwd_txfm2d_4x8 av1_fwd_txfm2d_4x8_c + void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); RTCD_EXTERN void (*av1_fwd_txfm2d_64x64)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +#define av1_fwd_txfm2d_8x16 av1_fwd_txfm2d_8x16_c + +void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); +#define av1_fwd_txfm2d_8x4 av1_fwd_txfm2d_8x4_c + void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); RTCD_EXTERN void (*av1_fwd_txfm2d_8x8)(const int16_t *input, int32_t *output, int stride, int tx_type, int bd); @@ -210,89 +231,46 @@ void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride, uint16_t *d void av1_highbd_convolve_vert_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); RTCD_EXTERN void (*av1_highbd_convolve_vert)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd); -void av1_highbd_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -#define av1_highbd_fht16x16 av1_highbd_fht16x16_c - -void av1_highbd_fht16x32_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -#define av1_highbd_fht16x32 av1_highbd_fht16x32_c - -void av1_highbd_fht16x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -#define av1_highbd_fht16x4 av1_highbd_fht16x4_c - -void av1_highbd_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -#define av1_highbd_fht16x8 av1_highbd_fht16x8_c - -void av1_highbd_fht32x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -#define av1_highbd_fht32x16 av1_highbd_fht32x16_c - -void av1_highbd_fht32x32_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -#define av1_highbd_fht32x32 av1_highbd_fht32x32_c - -void av1_highbd_fht32x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -#define av1_highbd_fht32x8 av1_highbd_fht32x8_c - -void av1_highbd_fht4x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -#define av1_highbd_fht4x16 av1_highbd_fht4x16_c - -void av1_highbd_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -void av1_highbd_fht4x4_sse4_1(const int16_t *input, tran_low_t *output, int stride, int tx_type); -RTCD_EXTERN void (*av1_highbd_fht4x4)(const int16_t *input, tran_low_t *output, int stride, int tx_type); - -void av1_highbd_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -#define av1_highbd_fht4x8 av1_highbd_fht4x8_c - -void av1_highbd_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -#define av1_highbd_fht8x16 av1_highbd_fht8x16_c - -void av1_highbd_fht8x32_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -#define av1_highbd_fht8x32 av1_highbd_fht8x32_c - -void av1_highbd_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -#define av1_highbd_fht8x4 av1_highbd_fht8x4_c - -void av1_highbd_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); -#define av1_highbd_fht8x8 av1_highbd_fht8x8_c - void av1_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); #define av1_highbd_fwht4x4 av1_highbd_fwht4x4_c -void av1_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type, int bd); +void av1_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); #define av1_highbd_iht16x16_256_add av1_highbd_iht16x16_256_add_c -void av1_highbd_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +void av1_highbd_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht16x32_512_add av1_highbd_iht16x32_512_add_c -void av1_highbd_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +void av1_highbd_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht16x4_64_add av1_highbd_iht16x4_64_add_c -void av1_highbd_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +void av1_highbd_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht16x8_128_add av1_highbd_iht16x8_128_add_c -void av1_highbd_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +void av1_highbd_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht32x16_512_add av1_highbd_iht32x16_512_add_c -void av1_highbd_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +void av1_highbd_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht32x8_256_add av1_highbd_iht32x8_256_add_c -void av1_highbd_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +void av1_highbd_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht4x16_64_add av1_highbd_iht4x16_64_add_c -void av1_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +void av1_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht4x4_16_add av1_highbd_iht4x4_16_add_c -void av1_highbd_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +void av1_highbd_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht4x8_32_add av1_highbd_iht4x8_32_add_c -void av1_highbd_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +void av1_highbd_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht8x16_128_add av1_highbd_iht8x16_128_add_c -void av1_highbd_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +void av1_highbd_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht8x32_256_add av1_highbd_iht8x32_256_add_c -void av1_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +void av1_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht8x4_32_add av1_highbd_iht8x4_32_add_c -void av1_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd); +void av1_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_highbd_iht8x8_64_add av1_highbd_iht8x8_64_add_c void av1_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); @@ -300,71 +278,81 @@ void av1_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); void av1_highbd_quantize_fp_sse4_1(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); +void av1_highbd_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); RTCD_EXTERN void (*av1_highbd_quantize_fp)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); void av1_highbd_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); #define av1_highbd_temporal_filter_apply av1_highbd_temporal_filter_apply_c -void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); -void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); -RTCD_EXTERN void (*av1_highbd_warp_affine)(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +RTCD_EXTERN void (*av1_highbd_warp_affine)(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); -void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); -void av1_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); -void av1_iht16x16_256_add_avx2(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); -RTCD_EXTERN void (*av1_iht16x16_256_add)(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); +void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); +void av1_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); +void av1_iht16x16_256_add_avx2(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); +RTCD_EXTERN void (*av1_iht16x16_256_add)(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); -void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -void av1_iht16x32_512_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); +void av1_iht16x32_512_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_iht16x32_512_add av1_iht16x32_512_add_sse2 -void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_iht16x4_64_add av1_iht16x4_64_add_c -void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -void av1_iht16x8_128_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); +void av1_iht16x8_128_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_iht16x8_128_add av1_iht16x8_128_add_sse2 -void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -void av1_iht32x16_512_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); +void av1_iht32x16_512_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_iht32x16_512_add av1_iht32x16_512_add_sse2 -void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); +void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param); #define av1_iht32x32_1024_add av1_iht32x32_1024_add_c -void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_iht32x8_256_add av1_iht32x8_256_add_c -void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_iht4x16_64_add av1_iht4x16_64_add_c -void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -void av1_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); +void av1_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_iht4x4_16_add av1_iht4x4_16_add_sse2 -void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -void av1_iht4x8_32_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); +void av1_iht4x8_32_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_iht4x8_32_add av1_iht4x8_32_add_sse2 -void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -void av1_iht8x16_128_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); +void av1_iht8x16_128_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_iht8x16_128_add av1_iht8x16_128_add_sse2 -void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_iht8x32_256_add av1_iht8x32_256_add_c -void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -void av1_iht8x4_32_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); +void av1_iht8x4_32_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_iht8x4_32_add av1_iht8x4_32_add_sse2 -void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -void av1_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); +void av1_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param); #define av1_iht8x8_64_add av1_iht8x8_64_add_sse2 void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); RTCD_EXTERN void (*av1_inv_txfm2d_add_16x16)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +#define av1_inv_txfm2d_add_16x32 av1_inv_txfm2d_add_16x32_c + +void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +#define av1_inv_txfm2d_add_16x8 av1_inv_txfm2d_add_16x8_c + +void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +#define av1_inv_txfm2d_add_32x16 av1_inv_txfm2d_add_32x16_c + void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); void av1_inv_txfm2d_add_32x32_avx2(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); RTCD_EXTERN void (*av1_inv_txfm2d_add_32x32)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); @@ -373,9 +361,18 @@ void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +#define av1_inv_txfm2d_add_4x8 av1_inv_txfm2d_add_4x8_c + void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); #define av1_inv_txfm2d_add_64x64 av1_inv_txfm2d_add_64x64_c +void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +#define av1_inv_txfm2d_add_8x16 av1_inv_txfm2d_add_8x16_c + +void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); +#define av1_inv_txfm2d_add_8x4 av1_inv_txfm2d_add_8x4_c + void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, int tx_type, int bd); @@ -389,19 +386,21 @@ void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_b void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void av1_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -#define av1_quantize_fp av1_quantize_fp_sse2 +void av1_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +RTCD_EXTERN void (*av1_quantize_fp)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); -#define av1_quantize_fp_32x32 av1_quantize_fp_32x32_c +void av1_quantize_fp_32x32_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +RTCD_EXTERN void (*av1_quantize_fp_32x32)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); void av1_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); void av1_temporal_filter_apply_sse2(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); #define av1_temporal_filter_apply av1_temporal_filter_apply_sse2 -void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); -void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); -void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); -RTCD_EXTERN void (*av1_warp_affine)(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +RTCD_EXTERN void (*av1_warp_affine)(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); void av1_wedge_compute_delta_squares_c(int16_t *d, const int16_t *a, const int16_t *b, int N); void av1_wedge_compute_delta_squares_sse2(int16_t *d, const int16_t *a, const int16_t *b, int N); @@ -495,6 +494,8 @@ static void setup_rtcd_internal(void) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse2; if (flags & HAS_SSSE3) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_ssse3; if (flags & HAS_SSE4_1) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse4_1; + av1_block_error = av1_block_error_c; + if (flags & HAS_AVX2) av1_block_error = av1_block_error_avx2; av1_convolve_horiz = av1_convolve_horiz_c; if (flags & HAS_SSSE3) av1_convolve_horiz = av1_convolve_horiz_ssse3; av1_convolve_vert = av1_convolve_vert_c; @@ -522,10 +523,9 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) av1_highbd_convolve_init = av1_highbd_convolve_init_sse4_1; av1_highbd_convolve_vert = av1_highbd_convolve_vert_c; if (flags & HAS_SSE4_1) av1_highbd_convolve_vert = av1_highbd_convolve_vert_sse4_1; - av1_highbd_fht4x4 = av1_highbd_fht4x4_c; - if (flags & HAS_SSE4_1) av1_highbd_fht4x4 = av1_highbd_fht4x4_sse4_1; av1_highbd_quantize_fp = av1_highbd_quantize_fp_c; if (flags & HAS_SSE4_1) av1_highbd_quantize_fp = av1_highbd_quantize_fp_sse4_1; + if (flags & HAS_AVX2) av1_highbd_quantize_fp = av1_highbd_quantize_fp_avx2; av1_highbd_warp_affine = av1_highbd_warp_affine_c; if (flags & HAS_SSSE3) av1_highbd_warp_affine = av1_highbd_warp_affine_ssse3; av1_iht16x16_256_add = av1_iht16x16_256_add_sse2; @@ -540,6 +540,10 @@ static void setup_rtcd_internal(void) if (flags & HAS_SSE4_1) av1_inv_txfm2d_add_8x8 = av1_inv_txfm2d_add_8x8_sse4_1; av1_lowbd_convolve_init = av1_lowbd_convolve_init_c; if (flags & HAS_SSSE3) av1_lowbd_convolve_init = av1_lowbd_convolve_init_ssse3; + av1_quantize_fp = av1_quantize_fp_sse2; + if (flags & HAS_AVX2) av1_quantize_fp = av1_quantize_fp_avx2; + av1_quantize_fp_32x32 = av1_quantize_fp_32x32_c; + if (flags & HAS_AVX2) av1_quantize_fp_32x32 = av1_quantize_fp_32x32_avx2; av1_warp_affine = av1_warp_affine_sse2; if (flags & HAS_SSSE3) av1_warp_affine = av1_warp_affine_ssse3; compute_cross_correlation = compute_cross_correlation_c; |