diff options
author | trav90 <travawine@palemoon.org> | 2018-10-15 21:46:29 -0500 |
---|---|---|
committer | trav90 <travawine@palemoon.org> | 2018-10-15 21:46:29 -0500 |
commit | a66e91651a81382ce117398303a220d75e8bc02e (patch) | |
tree | ead3e3f3b1669070232d0071d272d4eb5ad892d7 /media/libaom/config/win/x64 | |
parent | 68569dee1416593955c1570d638b3d9250b33012 (diff) | |
download | UXP-a66e91651a81382ce117398303a220d75e8bc02e.tar UXP-a66e91651a81382ce117398303a220d75e8bc02e.tar.gz UXP-a66e91651a81382ce117398303a220d75e8bc02e.tar.lz UXP-a66e91651a81382ce117398303a220d75e8bc02e.tar.xz UXP-a66e91651a81382ce117398303a220d75e8bc02e.zip |
Generate build description for libaom
Diffstat (limited to 'media/libaom/config/win/x64')
-rw-r--r-- | media/libaom/config/win/x64/aom_config.asm | 151 | ||||
-rw-r--r-- | media/libaom/config/win/x64/aom_config.c | 11 | ||||
-rw-r--r-- | media/libaom/config/win/x64/aom_config.h | 167 | ||||
-rw-r--r-- | media/libaom/config/win/x64/aom_dsp_rtcd.h | 1434 | ||||
-rw-r--r-- | media/libaom/config/win/x64/aom_scale_rtcd.h | 78 | ||||
-rw-r--r-- | media/libaom/config/win/x64/av1_rtcd.h | 364 |
6 files changed, 2205 insertions, 0 deletions
diff --git a/media/libaom/config/win/x64/aom_config.asm b/media/libaom/config/win/x64/aom_config.asm new file mode 100644 index 000000000..9cb036ce3 --- /dev/null +++ b/media/libaom/config/win/x64/aom_config.asm @@ -0,0 +1,151 @@ +%define ARCH_ARM 0 +%define ARCH_MIPS 0 +%define ARCH_X86 0 +%define ARCH_X86_64 1 +%define HAVE_EDSP 0 +%define HAVE_MEDIA 0 +%define HAVE_NEON 0 +%define HAVE_NEON_ASM 0 +%define HAVE_MIPS32 0 +%define HAVE_DSPR2 0 +%define HAVE_MSA 0 +%define HAVE_MIPS64 0 +%define HAVE_MMX 1 +%define HAVE_SSE 1 +%define HAVE_SSE2 1 +%define HAVE_SSE3 1 +%define HAVE_SSSE3 1 +%define HAVE_SSE4_1 1 +%define HAVE_AVX 1 +%define HAVE_AVX2 1 +%define HAVE_AOM_PORTS 1 +%define HAVE_FEXCEPT 1 +%define HAVE_PTHREAD_H 0 +%define HAVE_WXWIDGETS 0 +%define CONFIG_DEPENDENCY_TRACKING 1 +%define CONFIG_EXTERNAL_BUILD 1 +%define CONFIG_INSTALL_DOCS 1 +%define CONFIG_INSTALL_BINS 1 +%define CONFIG_INSTALL_LIBS 1 +%define CONFIG_INSTALL_SRCS 0 +%define CONFIG_DEBUG 0 +%define CONFIG_GPROF 0 +%define CONFIG_GCOV 0 +%define CONFIG_RVCT 0 +%define CONFIG_GCC 0 +%define CONFIG_MSVS 1 +%define CONFIG_PIC 1 +%define CONFIG_BIG_ENDIAN 0 +%define CONFIG_CODEC_SRCS 0 +%define CONFIG_DEBUG_LIBS 0 +%define CONFIG_RUNTIME_CPU_DETECT 1 +%define CONFIG_POSTPROC 1 +%define CONFIG_MULTITHREAD 1 +%define CONFIG_INTERNAL_STATS 0 +%define CONFIG_AV1_ENCODER 1 +%define CONFIG_AV1_DECODER 1 +%define CONFIG_AV1 1 +%define CONFIG_ENCODERS 1 +%define CONFIG_DECODERS 1 +%define CONFIG_STATIC_MSVCRT 0 +%define CONFIG_SPATIAL_RESAMPLING 1 +%define CONFIG_REALTIME_ONLY 0 +%define CONFIG_ONTHEFLY_BITPACKING 0 +%define CONFIG_ERROR_CONCEALMENT 0 +%define CONFIG_SHARED 0 +%define CONFIG_STATIC 1 +%define CONFIG_SMALL 0 +%define CONFIG_POSTPROC_VISUALIZER 0 +%define CONFIG_OS_SUPPORT 1 +%define CONFIG_UNIT_TESTS 0 +%define CONFIG_WEBM_IO 1 +%define CONFIG_LIBYUV 1 +%define CONFIG_ACCOUNTING 0 +%define CONFIG_INSPECTION 0 +%define CONFIG_DECODE_PERF_TESTS 0 +%define CONFIG_ENCODE_PERF_TESTS 0 +%define CONFIG_COEFFICIENT_RANGE_CHECKING 0 +%define CONFIG_LOWBITDEPTH 1 +%define CONFIG_HIGHBITDEPTH 0 +%define CONFIG_EXPERIMENTAL 0 +%define CONFIG_SIZE_LIMIT 1 +%define CONFIG_FP_MB_STATS 0 +%define CONFIG_CDEF 1 +%define CONFIG_VAR_TX 0 +%define CONFIG_RECT_TX 1 +%define CONFIG_REF_MV 1 +%define CONFIG_TPL_MV 0 +%define CONFIG_DUAL_FILTER 1 +%define CONFIG_CONVOLVE_ROUND 0 +%define CONFIG_COMPOUND_ROUND 0 +%define CONFIG_EXT_TX 0 +%define CONFIG_TX64X64 0 +%define CONFIG_SUB8X8_MC 0 +%define CONFIG_EXT_INTRA 1 +%define CONFIG_INTRA_INTERP 0 +%define CONFIG_FILTER_INTRA 0 +%define CONFIG_INTRABC 0 +%define CONFIG_EXT_INTER 0 +%define CONFIG_INTERINTRA 0 +%define CONFIG_WEDGE 0 +%define CONFIG_COMPOUND_SEGMENT 0 +%define CONFIG_EXT_REFS 1 +%define CONFIG_GLOBAL_MOTION 1 +%define CONFIG_NEW_QUANT 0 +%define CONFIG_SUPERTX 0 +%define CONFIG_ANS 0 +%define CONFIG_EC_MULTISYMBOL 1 +%define CONFIG_NEW_TOKENSET 1 +%define CONFIG_LOOP_RESTORATION 0 +%define CONFIG_EXT_PARTITION 0 +%define CONFIG_EXT_PARTITION_TYPES 0 +%define CONFIG_UNPOISON_PARTITION_CTX 0 +%define CONFIG_EXT_TILE 0 +%define CONFIG_MOTION_VAR 1 +%define CONFIG_NCOBMC 0 +%define CONFIG_WARPED_MOTION 1 +%define CONFIG_Q_ADAPT_PROBS 0 +%define CONFIG_SUBFRAME_PROB_UPDATE 0 +%define CONFIG_BITSTREAM_DEBUG 0 +%define CONFIG_ALT_INTRA 1 +%define CONFIG_PALETTE 1 +%define CONFIG_PALETTE_DELTA_ENCODING 0 +%define CONFIG_DAALA_EC 1 +%define CONFIG_RAWBITS 0 +%define CONFIG_EC_SMALLMUL 0 +%define CONFIG_PVQ 0 +%define CONFIG_CFL 0 +%define CONFIG_XIPHRC 0 +%define CONFIG_CB4X4 1 +%define CONFIG_CHROMA_2X2 0 +%define CONFIG_CHROMA_SUB8X8 0 +%define CONFIG_FRAME_SIZE 0 +%define CONFIG_DELTA_Q 1 +%define CONFIG_EXT_DELTA_Q 1 +%define CONFIG_ADAPT_SCAN 0 +%define CONFIG_FILTER_7BIT 1 +%define CONFIG_PARALLEL_DEBLOCKING 0 +%define CONFIG_PARALLEL_DEBLOCKING_15TAP 0 +%define CONFIG_LOOPFILTERING_ACROSS_TILES 0 +%define CONFIG_TILE_GROUPS 1 +%define CONFIG_EC_ADAPT 1 +%define CONFIG_TEMPMV_SIGNALING 0 +%define CONFIG_RD_DEBUG 0 +%define CONFIG_REFERENCE_BUFFER 1 +%define CONFIG_COEF_INTERLEAVE 0 +%define CONFIG_ENTROPY_STATS 0 +%define CONFIG_MASKED_TX 0 +%define CONFIG_DEPENDENT_HORZTILES 0 +%define CONFIG_DAALA_DIST 0 +%define CONFIG_TRIPRED 0 +%define CONFIG_PALETTE_THROUGHPUT 1 +%define CONFIG_REF_ADAPT 0 +%define CONFIG_LV_MAP 0 +%define CONFIG_TXK_SEL 0 +%define CONFIG_MV_COMPRESS 1 +%define CONFIG_FRAME_SUPERRES 0 +%define CONFIG_NEW_MULTISYMBOL 0 +%define CONFIG_COMPOUND_SINGLEREF 0 +%define CONFIG_AOM_QM 0 +%define CONFIG_LOWDELAY_COMPOUND 0 +%define CONFIG_ANALYZER 0 diff --git a/media/libaom/config/win/x64/aom_config.c b/media/libaom/config/win/x64/aom_config.c new file mode 100644 index 000000000..f90bfac5c --- /dev/null +++ b/media/libaom/config/win/x64/aom_config.c @@ -0,0 +1,11 @@ +/* Copyright (c) 2016, Alliance for Open Media. All rights reserved. */ +/* */ +/* This source code is subject to the terms of the BSD 2 Clause License and */ +/* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License */ +/* was not distributed with this source code in the LICENSE file, you can */ +/* obtain it at www.aomedia.org/license/software. If the Alliance for Open */ +/* Media Patent License 1.0 was not distributed with this source code in the */ +/* PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ +#include "aom/aom_codec.h" +static const char* const cfg = "--target=x86_64-win64-vs12 --enable-external-build --disable-examples --disable-docs --disable-unit-tests --size-limit=8192x4608 --enable-pic --enable-postproc --as=yasm"; +const char *aom_codec_build_config(void) {return cfg;} diff --git a/media/libaom/config/win/x64/aom_config.h b/media/libaom/config/win/x64/aom_config.h new file mode 100644 index 000000000..92aee324f --- /dev/null +++ b/media/libaom/config/win/x64/aom_config.h @@ -0,0 +1,167 @@ +/* Copyright (c) 2016, Alliance for Open Media. All rights reserved. */ +/* */ +/* This source code is subject to the terms of the BSD 2 Clause License and */ +/* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License */ +/* was not distributed with this source code in the LICENSE file, you can */ +/* obtain it at www.aomedia.org/license/software. If the Alliance for Open */ +/* Media Patent License 1.0 was not distributed with this source code in the */ +/* PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ +/* This file automatically generated by configure. Do not edit! */ +#ifndef AOM_CONFIG_H +#define AOM_CONFIG_H +#define RESTRICT +#define INLINE __forceinline +#define ARCH_ARM 0 +#define ARCH_MIPS 0 +#define ARCH_X86 0 +#define ARCH_X86_64 1 +#define HAVE_EDSP 0 +#define HAVE_MEDIA 0 +#define HAVE_NEON 0 +#define HAVE_NEON_ASM 0 +#define HAVE_MIPS32 0 +#define HAVE_DSPR2 0 +#define HAVE_MSA 0 +#define HAVE_MIPS64 0 +#define HAVE_MMX 1 +#define HAVE_SSE 1 +#define HAVE_SSE2 1 +#define HAVE_SSE3 1 +#define HAVE_SSSE3 1 +#define HAVE_SSE4_1 1 +#define HAVE_AVX 1 +#define HAVE_AVX2 1 +#define HAVE_AOM_PORTS 1 +#define HAVE_FEXCEPT 1 +#define HAVE_PTHREAD_H 0 +#define HAVE_WXWIDGETS 0 +#define CONFIG_DEPENDENCY_TRACKING 1 +#define CONFIG_EXTERNAL_BUILD 1 +#define CONFIG_INSTALL_DOCS 1 +#define CONFIG_INSTALL_BINS 1 +#define CONFIG_INSTALL_LIBS 1 +#define CONFIG_INSTALL_SRCS 0 +#define CONFIG_DEBUG 0 +#define CONFIG_GPROF 0 +#define CONFIG_GCOV 0 +#define CONFIG_RVCT 0 +#define CONFIG_GCC 0 +#define CONFIG_MSVS 1 +#define CONFIG_PIC 1 +#define CONFIG_BIG_ENDIAN 0 +#define CONFIG_CODEC_SRCS 0 +#define CONFIG_DEBUG_LIBS 0 +#define CONFIG_RUNTIME_CPU_DETECT 1 +#define CONFIG_POSTPROC 1 +#define CONFIG_MULTITHREAD 1 +#define CONFIG_INTERNAL_STATS 0 +#define CONFIG_AV1_ENCODER 1 +#define CONFIG_AV1_DECODER 1 +#define CONFIG_AV1 1 +#define CONFIG_ENCODERS 1 +#define CONFIG_DECODERS 1 +#define CONFIG_STATIC_MSVCRT 0 +#define CONFIG_SPATIAL_RESAMPLING 1 +#define CONFIG_REALTIME_ONLY 0 +#define CONFIG_ONTHEFLY_BITPACKING 0 +#define CONFIG_ERROR_CONCEALMENT 0 +#define CONFIG_SHARED 0 +#define CONFIG_STATIC 1 +#define CONFIG_SMALL 0 +#define CONFIG_POSTPROC_VISUALIZER 0 +#define CONFIG_OS_SUPPORT 1 +#define CONFIG_UNIT_TESTS 0 +#define CONFIG_WEBM_IO 1 +#define CONFIG_LIBYUV 1 +#define CONFIG_ACCOUNTING 0 +#define CONFIG_INSPECTION 0 +#define CONFIG_DECODE_PERF_TESTS 0 +#define CONFIG_ENCODE_PERF_TESTS 0 +#define CONFIG_COEFFICIENT_RANGE_CHECKING 0 +#define CONFIG_LOWBITDEPTH 1 +#define CONFIG_HIGHBITDEPTH 0 +#define CONFIG_EXPERIMENTAL 0 +#define CONFIG_SIZE_LIMIT 1 +#define CONFIG_FP_MB_STATS 0 +#define CONFIG_CDEF 1 +#define CONFIG_VAR_TX 0 +#define CONFIG_RECT_TX 1 +#define CONFIG_REF_MV 1 +#define CONFIG_TPL_MV 0 +#define CONFIG_DUAL_FILTER 1 +#define CONFIG_CONVOLVE_ROUND 0 +#define CONFIG_COMPOUND_ROUND 0 +#define CONFIG_EXT_TX 0 +#define CONFIG_TX64X64 0 +#define CONFIG_SUB8X8_MC 0 +#define CONFIG_EXT_INTRA 1 +#define CONFIG_INTRA_INTERP 0 +#define CONFIG_FILTER_INTRA 0 +#define CONFIG_INTRABC 0 +#define CONFIG_EXT_INTER 0 +#define CONFIG_INTERINTRA 0 +#define CONFIG_WEDGE 0 +#define CONFIG_COMPOUND_SEGMENT 0 +#define CONFIG_EXT_REFS 1 +#define CONFIG_GLOBAL_MOTION 1 +#define CONFIG_NEW_QUANT 0 +#define CONFIG_SUPERTX 0 +#define CONFIG_ANS 0 +#define CONFIG_EC_MULTISYMBOL 1 +#define CONFIG_NEW_TOKENSET 1 +#define CONFIG_LOOP_RESTORATION 0 +#define CONFIG_EXT_PARTITION 0 +#define CONFIG_EXT_PARTITION_TYPES 0 +#define CONFIG_UNPOISON_PARTITION_CTX 0 +#define CONFIG_EXT_TILE 0 +#define CONFIG_MOTION_VAR 1 +#define CONFIG_NCOBMC 0 +#define CONFIG_WARPED_MOTION 1 +#define CONFIG_Q_ADAPT_PROBS 0 +#define CONFIG_SUBFRAME_PROB_UPDATE 0 +#define CONFIG_BITSTREAM_DEBUG 0 +#define CONFIG_ALT_INTRA 1 +#define CONFIG_PALETTE 1 +#define CONFIG_PALETTE_DELTA_ENCODING 0 +#define CONFIG_DAALA_EC 1 +#define CONFIG_RAWBITS 0 +#define CONFIG_EC_SMALLMUL 0 +#define CONFIG_PVQ 0 +#define CONFIG_CFL 0 +#define CONFIG_XIPHRC 0 +#define CONFIG_CB4X4 1 +#define CONFIG_CHROMA_2X2 0 +#define CONFIG_CHROMA_SUB8X8 0 +#define CONFIG_FRAME_SIZE 0 +#define CONFIG_DELTA_Q 1 +#define CONFIG_EXT_DELTA_Q 1 +#define CONFIG_ADAPT_SCAN 0 +#define CONFIG_FILTER_7BIT 1 +#define CONFIG_PARALLEL_DEBLOCKING 0 +#define CONFIG_PARALLEL_DEBLOCKING_15TAP 0 +#define CONFIG_LOOPFILTERING_ACROSS_TILES 0 +#define CONFIG_TILE_GROUPS 1 +#define CONFIG_EC_ADAPT 1 +#define CONFIG_TEMPMV_SIGNALING 0 +#define CONFIG_RD_DEBUG 0 +#define CONFIG_REFERENCE_BUFFER 1 +#define CONFIG_COEF_INTERLEAVE 0 +#define CONFIG_ENTROPY_STATS 0 +#define CONFIG_MASKED_TX 0 +#define CONFIG_DEPENDENT_HORZTILES 0 +#define CONFIG_DAALA_DIST 0 +#define CONFIG_TRIPRED 0 +#define CONFIG_PALETTE_THROUGHPUT 1 +#define CONFIG_REF_ADAPT 0 +#define CONFIG_LV_MAP 0 +#define CONFIG_TXK_SEL 0 +#define CONFIG_MV_COMPRESS 1 +#define CONFIG_FRAME_SUPERRES 0 +#define CONFIG_NEW_MULTISYMBOL 0 +#define CONFIG_COMPOUND_SINGLEREF 0 +#define CONFIG_AOM_QM 0 +#define CONFIG_LOWDELAY_COMPOUND 0 +#define CONFIG_ANALYZER 0 +#define DECODE_WIDTH_LIMIT 8192 +#define DECODE_HEIGHT_LIMIT 4608 +#endif /* AOM_CONFIG_H */ diff --git a/media/libaom/config/win/x64/aom_dsp_rtcd.h b/media/libaom/config/win/x64/aom_dsp_rtcd.h new file mode 100644 index 000000000..6fda772d8 --- /dev/null +++ b/media/libaom/config/win/x64/aom_dsp_rtcd.h @@ -0,0 +1,1434 @@ +#ifndef AOM_DSP_RTCD_H_ +#define AOM_DSP_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * DSP + */ + +#include "aom/aom_integer.h" +#include "aom_dsp/aom_dsp_common.h" +#include "av1/common/enums.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +unsigned int aom_avg_4x4_c(const uint8_t *, int p); +unsigned int aom_avg_4x4_sse2(const uint8_t *, int p); +#define aom_avg_4x4 aom_avg_4x4_sse2 + +unsigned int aom_avg_8x8_c(const uint8_t *, int p); +unsigned int aom_avg_8x8_sse2(const uint8_t *, int p); +#define aom_avg_8x8 aom_avg_8x8_sse2 + +void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); +void aom_blend_a64_hmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); +RTCD_EXTERN void (*aom_blend_a64_hmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); + +void aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx); +void aom_blend_a64_mask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx); +RTCD_EXTERN void (*aom_blend_a64_mask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx); + +void aom_blend_a64_vmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); +void aom_blend_a64_vmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); +RTCD_EXTERN void (*aom_blend_a64_vmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w); + +void aom_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride); +#define aom_comp_avg_pred aom_comp_avg_pred_c + +void aom_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride); +void aom_comp_avg_upsampled_pred_sse2(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride); +#define aom_comp_avg_upsampled_pred aom_comp_avg_upsampled_pred_sse2 + +void aom_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_avg_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve_avg aom_convolve_avg_sse2 + +void aom_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_convolve_copy aom_convolve_copy_sse2 + +void aom_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d117_predictor_16x16 aom_d117_predictor_16x16_c + +void aom_d117_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d117_predictor_2x2 aom_d117_predictor_2x2_c + +void aom_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d117_predictor_32x32 aom_d117_predictor_32x32_c + +void aom_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d117_predictor_4x4 aom_d117_predictor_4x4_c + +void aom_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d117_predictor_8x8 aom_d117_predictor_8x8_c + +void aom_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d135_predictor_16x16 aom_d135_predictor_16x16_c + +void aom_d135_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d135_predictor_2x2 aom_d135_predictor_2x2_c + +void aom_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d135_predictor_32x32 aom_d135_predictor_32x32_c + +void aom_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d135_predictor_4x4 aom_d135_predictor_4x4_c + +void aom_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d135_predictor_8x8 aom_d135_predictor_8x8_c + +void aom_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void aom_d153_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d153_predictor_2x2 aom_d153_predictor_2x2_c + +void aom_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void aom_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_d153_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void aom_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_d153_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_d153_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void aom_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d207e_predictor_16x16 aom_d207e_predictor_16x16_c + +void aom_d207e_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d207e_predictor_2x2 aom_d207e_predictor_2x2_c + +void aom_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d207e_predictor_32x32 aom_d207e_predictor_32x32_c + +void aom_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d207e_predictor_4x4 aom_d207e_predictor_4x4_c + +void aom_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d207e_predictor_8x8 aom_d207e_predictor_8x8_c + +void aom_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d45e_predictor_16x16 aom_d45e_predictor_16x16_c + +void aom_d45e_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d45e_predictor_2x2 aom_d45e_predictor_2x2_c + +void aom_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d45e_predictor_32x32 aom_d45e_predictor_32x32_c + +void aom_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d45e_predictor_4x4 aom_d45e_predictor_4x4_c + +void aom_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d45e_predictor_8x8 aom_d45e_predictor_8x8_c + +void aom_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d63e_predictor_16x16 aom_d63e_predictor_16x16_c + +void aom_d63e_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d63e_predictor_2x2 aom_d63e_predictor_2x2_c + +void aom_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d63e_predictor_32x32 aom_d63e_predictor_32x32_c + +void aom_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_d63e_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*aom_d63e_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void aom_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_d63e_predictor_8x8 aom_d63e_predictor_8x8_c + +void aom_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_16x16 aom_dc_128_predictor_16x16_sse2 + +void aom_dc_128_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c + +void aom_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_32x32 aom_dc_128_predictor_32x32_sse2 + +void aom_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_128_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_4x4 aom_dc_128_predictor_4x4_sse2 + +void aom_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_128_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_128_predictor_8x8 aom_dc_128_predictor_8x8_sse2 + +void aom_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_16x16 aom_dc_left_predictor_16x16_sse2 + +void aom_dc_left_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c + +void aom_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_32x32 aom_dc_left_predictor_32x32_sse2 + +void aom_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_left_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_4x4 aom_dc_left_predictor_4x4_sse2 + +void aom_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_left_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_left_predictor_8x8 aom_dc_left_predictor_8x8_sse2 + +void aom_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_16x16 aom_dc_predictor_16x16_sse2 + +void aom_dc_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c + +void aom_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_32x32 aom_dc_predictor_32x32_sse2 + +void aom_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_4x4 aom_dc_predictor_4x4_sse2 + +void aom_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_predictor_8x8 aom_dc_predictor_8x8_sse2 + +void aom_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_16x16 aom_dc_top_predictor_16x16_sse2 + +void aom_dc_top_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c + +void aom_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_32x32 aom_dc_top_predictor_32x32_sse2 + +void aom_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_top_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_4x4 aom_dc_top_predictor_4x4_sse2 + +void aom_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_dc_top_predictor_8x8 aom_dc_top_predictor_8x8_sse2 + +void aom_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride); +void aom_fdct16x16_sse2(const int16_t *input, tran_low_t *output, int stride); +#define aom_fdct16x16 aom_fdct16x16_sse2 + +void aom_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride); +void aom_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, int stride); +void aom_fdct16x16_1_avx2(const int16_t *input, tran_low_t *output, int stride); +RTCD_EXTERN void (*aom_fdct16x16_1)(const int16_t *input, tran_low_t *output, int stride); + +void aom_fdct32x32_c(const int16_t *input, tran_low_t *output, int stride); +void aom_fdct32x32_sse2(const int16_t *input, tran_low_t *output, int stride); +void aom_fdct32x32_avx2(const int16_t *input, tran_low_t *output, int stride); +RTCD_EXTERN void (*aom_fdct32x32)(const int16_t *input, tran_low_t *output, int stride); + +void aom_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride); +void aom_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output, int stride); +void aom_fdct32x32_1_avx2(const int16_t *input, tran_low_t *output, int stride); +RTCD_EXTERN void (*aom_fdct32x32_1)(const int16_t *input, tran_low_t *output, int stride); + +void aom_fdct32x32_rd_c(const int16_t *input, tran_low_t *output, int stride); +void aom_fdct32x32_rd_sse2(const int16_t *input, tran_low_t *output, int stride); +void aom_fdct32x32_rd_avx2(const int16_t *input, tran_low_t *output, int stride); +RTCD_EXTERN void (*aom_fdct32x32_rd)(const int16_t *input, tran_low_t *output, int stride); + +void aom_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride); +void aom_fdct4x4_sse2(const int16_t *input, tran_low_t *output, int stride); +#define aom_fdct4x4 aom_fdct4x4_sse2 + +void aom_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride); +void aom_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride); +#define aom_fdct4x4_1 aom_fdct4x4_1_sse2 + +void aom_fdct8x8_c(const int16_t *input, tran_low_t *output, int stride); +void aom_fdct8x8_sse2(const int16_t *input, tran_low_t *output, int stride); +void aom_fdct8x8_ssse3(const int16_t *input, tran_low_t *output, int stride); +RTCD_EXTERN void (*aom_fdct8x8)(const int16_t *input, tran_low_t *output, int stride); + +void aom_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride); +void aom_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride); +#define aom_fdct8x8_1 aom_fdct8x8_1_sse2 + +void aom_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void aom_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void aom_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*aom_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); + +unsigned int aom_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride); +#define aom_get4x4sse_cs aom_get4x4sse_cs_c + +void aom_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void aom_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define aom_get8x8var aom_get8x8var_sse2 + +unsigned int aom_get_mb_ss_c(const int16_t *); +unsigned int aom_get_mb_ss_sse2(const int16_t *); +#define aom_get_mb_ss aom_get_mb_ss_sse2 + +void aom_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_h_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_16x16 aom_h_predictor_16x16_sse2 + +void aom_h_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_2x2 aom_h_predictor_2x2_c + +void aom_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_h_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_32x32 aom_h_predictor_32x32_sse2 + +void aom_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_h_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_4x4 aom_h_predictor_4x4_sse2 + +void aom_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_h_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_h_predictor_8x8 aom_h_predictor_8x8_sse2 + +void aom_hadamard_16x16_c(const int16_t *src_diff, int src_stride, int16_t *coeff); +void aom_hadamard_16x16_sse2(const int16_t *src_diff, int src_stride, int16_t *coeff); +#define aom_hadamard_16x16 aom_hadamard_16x16_sse2 + +void aom_hadamard_8x8_c(const int16_t *src_diff, int src_stride, int16_t *coeff); +void aom_hadamard_8x8_sse2(const int16_t *src_diff, int src_stride, int16_t *coeff); +void aom_hadamard_8x8_ssse3(const int16_t *src_diff, int src_stride, int16_t *coeff); +RTCD_EXTERN void (*aom_hadamard_8x8)(const int16_t *src_diff, int src_stride, int16_t *coeff); + +void aom_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define aom_idct16x16_10_add aom_idct16x16_10_add_sse2 + +void aom_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define aom_idct16x16_1_add aom_idct16x16_1_add_sse2 + +void aom_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define aom_idct16x16_256_add aom_idct16x16_256_add_sse2 + +void aom_idct16x16_38_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define aom_idct16x16_38_add aom_idct16x16_38_add_c + +void aom_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct32x32_1024_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*aom_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void aom_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct32x32_135_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*aom_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void aom_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define aom_idct32x32_1_add aom_idct32x32_1_add_sse2 + +void aom_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct32x32_34_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*aom_idct32x32_34_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void aom_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define aom_idct4x4_16_add aom_idct4x4_16_add_sse2 + +void aom_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define aom_idct4x4_1_add aom_idct4x4_1_add_sse2 + +void aom_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct8x8_12_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct8x8_12_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*aom_idct8x8_12_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void aom_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define aom_idct8x8_1_add aom_idct8x8_1_add_sse2 + +void aom_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_idct8x8_64_add_ssse3(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*aom_idct8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +int16_t aom_int_pro_col_c(const uint8_t *ref, int width); +int16_t aom_int_pro_col_sse2(const uint8_t *ref, int width); +#define aom_int_pro_col aom_int_pro_col_sse2 + +void aom_int_pro_row_c(int16_t *hbuf, const uint8_t *ref, int ref_stride, int height); +void aom_int_pro_row_sse2(int16_t *hbuf, const uint8_t *ref, int ref_stride, int height); +#define aom_int_pro_row aom_int_pro_row_sse2 + +void aom_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void aom_iwht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define aom_iwht4x4_16_add aom_iwht4x4_16_add_sse2 + +void aom_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define aom_iwht4x4_1_add aom_iwht4x4_1_add_c + +void aom_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void aom_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_horizontal_4 aom_lpf_horizontal_4_sse2 + +void aom_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +void aom_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_horizontal_4_dual aom_lpf_horizontal_4_dual_sse2 + +void aom_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void aom_lpf_horizontal_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_horizontal_8 aom_lpf_horizontal_8_sse2 + +void aom_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +void aom_lpf_horizontal_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_horizontal_8_dual aom_lpf_horizontal_8_dual_sse2 + +void aom_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void aom_lpf_horizontal_edge_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void aom_lpf_horizontal_edge_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*aom_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void aom_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void aom_lpf_horizontal_edge_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void aom_lpf_horizontal_edge_8_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*aom_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void aom_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void aom_lpf_vertical_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_vertical_16 aom_lpf_vertical_16_sse2 + +void aom_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void aom_lpf_vertical_16_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_vertical_16_dual aom_lpf_vertical_16_dual_sse2 + +void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void aom_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_vertical_4 aom_lpf_vertical_4_sse2 + +void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +void aom_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_vertical_4_dual aom_lpf_vertical_4_dual_sse2 + +void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void aom_lpf_vertical_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define aom_lpf_vertical_8 aom_lpf_vertical_8_sse2 + +void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +void aom_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define aom_lpf_vertical_8_dual aom_lpf_vertical_8_dual_sse2 + +void aom_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +void aom_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max); +#define aom_minmax_8x8 aom_minmax_8x8_sse2 + +unsigned int aom_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int aom_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int aom_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); + +unsigned int aom_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int aom_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define aom_mse16x8 aom_mse16x8_sse2 + +unsigned int aom_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int aom_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define aom_mse8x16 aom_mse8x16_sse2 + +unsigned int aom_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +unsigned int aom_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse); +#define aom_mse8x8 aom_mse8x8_sse2 + +unsigned int aom_obmc_sad16x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_obmc_sad16x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_obmc_sad16x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_obmc_sad16x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_obmc_sad16x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_obmc_sad16x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_obmc_sad16x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_obmc_sad16x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_obmc_sad16x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_obmc_sad32x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_obmc_sad32x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_obmc_sad32x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_obmc_sad32x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_obmc_sad32x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_obmc_sad32x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_obmc_sad32x64_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_obmc_sad32x64_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_obmc_sad32x64)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_obmc_sad4x4_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_obmc_sad4x4_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_obmc_sad4x4)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_obmc_sad4x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_obmc_sad4x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_obmc_sad4x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_obmc_sad64x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_obmc_sad64x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_obmc_sad64x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_obmc_sad64x64_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_obmc_sad64x64_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_obmc_sad64x64)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_obmc_sad8x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_obmc_sad8x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_obmc_sad8x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_obmc_sad8x4_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_obmc_sad8x4_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_obmc_sad8x4)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_obmc_sad8x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +unsigned int aom_obmc_sad8x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); +RTCD_EXTERN unsigned int (*aom_obmc_sad8x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask); + +unsigned int aom_obmc_sub_pixel_variance16x16_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_obmc_sub_pixel_variance16x16 aom_obmc_sub_pixel_variance16x16_c + +unsigned int aom_obmc_sub_pixel_variance16x32_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_obmc_sub_pixel_variance16x32 aom_obmc_sub_pixel_variance16x32_c + +unsigned int aom_obmc_sub_pixel_variance16x8_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_obmc_sub_pixel_variance16x8 aom_obmc_sub_pixel_variance16x8_c + +unsigned int aom_obmc_sub_pixel_variance32x16_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_obmc_sub_pixel_variance32x16 aom_obmc_sub_pixel_variance32x16_c + +unsigned int aom_obmc_sub_pixel_variance32x32_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_obmc_sub_pixel_variance32x32 aom_obmc_sub_pixel_variance32x32_c + +unsigned int aom_obmc_sub_pixel_variance32x64_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_obmc_sub_pixel_variance32x64 aom_obmc_sub_pixel_variance32x64_c + +unsigned int aom_obmc_sub_pixel_variance4x4_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_obmc_sub_pixel_variance4x4 aom_obmc_sub_pixel_variance4x4_c + +unsigned int aom_obmc_sub_pixel_variance4x8_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_obmc_sub_pixel_variance4x8 aom_obmc_sub_pixel_variance4x8_c + +unsigned int aom_obmc_sub_pixel_variance64x32_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_obmc_sub_pixel_variance64x32 aom_obmc_sub_pixel_variance64x32_c + +unsigned int aom_obmc_sub_pixel_variance64x64_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_obmc_sub_pixel_variance64x64 aom_obmc_sub_pixel_variance64x64_c + +unsigned int aom_obmc_sub_pixel_variance8x16_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_obmc_sub_pixel_variance8x16 aom_obmc_sub_pixel_variance8x16_c + +unsigned int aom_obmc_sub_pixel_variance8x4_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_obmc_sub_pixel_variance8x4 aom_obmc_sub_pixel_variance8x4_c + +unsigned int aom_obmc_sub_pixel_variance8x8_c(const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +#define aom_obmc_sub_pixel_variance8x8 aom_obmc_sub_pixel_variance8x8_c + +unsigned int aom_obmc_variance16x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_obmc_variance16x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_obmc_variance16x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_obmc_variance16x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_obmc_variance16x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_obmc_variance16x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_obmc_variance16x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_obmc_variance16x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_obmc_variance16x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_obmc_variance32x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_obmc_variance32x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_obmc_variance32x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_obmc_variance32x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_obmc_variance32x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_obmc_variance32x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_obmc_variance32x64_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_obmc_variance32x64_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_obmc_variance32x64)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_obmc_variance4x4_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_obmc_variance4x4_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_obmc_variance4x4)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_obmc_variance4x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_obmc_variance4x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_obmc_variance4x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_obmc_variance64x32_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_obmc_variance64x32_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_obmc_variance64x32)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_obmc_variance64x64_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_obmc_variance64x64_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_obmc_variance64x64)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_obmc_variance8x16_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_obmc_variance8x16_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_obmc_variance8x16)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_obmc_variance8x4_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_obmc_variance8x4_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_obmc_variance8x4)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +unsigned int aom_obmc_variance8x8_c(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +unsigned int aom_obmc_variance8x8_sse4_1(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_obmc_variance8x8)(const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse); + +void aom_paeth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_c + +void aom_paeth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c + +void aom_paeth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_c + +void aom_paeth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_c + +void aom_paeth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_c + +void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void aom_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void aom_quantize_b_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void aom_quantize_b_avx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +RTCD_EXTERN void (*aom_quantize_b)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); + +void aom_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void aom_quantize_b_32x32_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void aom_quantize_b_32x32_avx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +RTCD_EXTERN void (*aom_quantize_b_32x32)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); + +void aom_quantize_b_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +#define aom_quantize_b_64x64 aom_quantize_b_64x64_c + +unsigned int aom_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define aom_sad16x16 aom_sad16x16_sse2 + +unsigned int aom_sad16x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad16x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define aom_sad16x16_avg aom_sad16x16_avg_sse2 + +void aom_sad16x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void aom_sad16x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void aom_sad16x16x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_sad16x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void aom_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_sad16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define aom_sad16x16x4d aom_sad16x16x4d_sse2 + +void aom_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void aom_sad16x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_sad16x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int aom_sad16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad16x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define aom_sad16x32 aom_sad16x32_sse2 + +unsigned int aom_sad16x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad16x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define aom_sad16x32_avg aom_sad16x32_avg_sse2 + +void aom_sad16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_sad16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define aom_sad16x32x4d aom_sad16x32x4d_sse2 + +unsigned int aom_sad16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad16x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define aom_sad16x8 aom_sad16x8_sse2 + +unsigned int aom_sad16x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad16x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define aom_sad16x8_avg aom_sad16x8_avg_sse2 + +void aom_sad16x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void aom_sad16x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void aom_sad16x8x3_ssse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_sad16x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void aom_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_sad16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define aom_sad16x8x4d aom_sad16x8x4d_sse2 + +void aom_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void aom_sad16x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_sad16x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int aom_sad32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*aom_sad32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int aom_sad32x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad32x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad32x16_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*aom_sad32x16_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void aom_sad32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_sad32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define aom_sad32x16x4d aom_sad32x16x4d_sse2 + +unsigned int aom_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*aom_sad32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int aom_sad32x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad32x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad32x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*aom_sad32x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void aom_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_sad32x32x3 aom_sad32x32x3_c + +void aom_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_sad32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_sad32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void aom_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_sad32x32x8 aom_sad32x32x8_c + +unsigned int aom_sad32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*aom_sad32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int aom_sad32x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad32x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad32x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*aom_sad32x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void aom_sad32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_sad32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_sad32x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_sad32x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int aom_sad4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad4x4_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define aom_sad4x4 aom_sad4x4_sse2 + +unsigned int aom_sad4x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad4x4_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define aom_sad4x4_avg aom_sad4x4_avg_sse2 + +void aom_sad4x4x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void aom_sad4x4x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_sad4x4x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void aom_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_sad4x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define aom_sad4x4x4d aom_sad4x4x4d_sse2 + +void aom_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void aom_sad4x4x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_sad4x4x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int aom_sad4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad4x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define aom_sad4x8 aom_sad4x8_sse2 + +unsigned int aom_sad4x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad4x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define aom_sad4x8_avg aom_sad4x8_avg_sse2 + +void aom_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_sad4x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define aom_sad4x8x4d aom_sad4x8x4d_sse2 + +void aom_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_sad4x8x8 aom_sad4x8x8_c + +unsigned int aom_sad64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*aom_sad64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int aom_sad64x32_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad64x32_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad64x32_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*aom_sad64x32_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void aom_sad64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_sad64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_sad64x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_sad64x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +unsigned int aom_sad64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +RTCD_EXTERN unsigned int (*aom_sad64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); + +unsigned int aom_sad64x64_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad64x64_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad64x64_avg_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +RTCD_EXTERN unsigned int (*aom_sad64x64_avg)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); + +void aom_sad64x64x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_sad64x64x3 aom_sad64x64x3_c + +void aom_sad64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_sad64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_sad64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); + +void aom_sad64x64x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_sad64x64x8 aom_sad64x64x8_c + +unsigned int aom_sad8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad8x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define aom_sad8x16 aom_sad8x16_sse2 + +unsigned int aom_sad8x16_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad8x16_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define aom_sad8x16_avg aom_sad8x16_avg_sse2 + +void aom_sad8x16x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void aom_sad8x16x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_sad8x16x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void aom_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_sad8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define aom_sad8x16x4d aom_sad8x16x4d_sse2 + +void aom_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void aom_sad8x16x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_sad8x16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +unsigned int aom_sad8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad8x4_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define aom_sad8x4 aom_sad8x4_sse2 + +unsigned int aom_sad8x4_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad8x4_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define aom_sad8x4_avg aom_sad8x4_avg_sse2 + +void aom_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_sad8x4x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define aom_sad8x4x4d aom_sad8x4x4d_sse2 + +void aom_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +#define aom_sad8x4x8 aom_sad8x4x8_c + +unsigned int aom_sad8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +unsigned int aom_sad8x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); +#define aom_sad8x8 aom_sad8x8_sse2 + +unsigned int aom_sad8x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +unsigned int aom_sad8x8_avg_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); +#define aom_sad8x8_avg aom_sad8x8_avg_sse2 + +void aom_sad8x8x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void aom_sad8x8x3_sse3(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_sad8x8x3)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +void aom_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +void aom_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array); +#define aom_sad8x8x4d aom_sad8x8x4d_sse2 + +void aom_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +void aom_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); +RTCD_EXTERN void (*aom_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array); + +int aom_satd_c(const int16_t *coeff, int length); +int aom_satd_sse2(const int16_t *coeff, int length); +#define aom_satd aom_satd_sse2 + +void aom_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void aom_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*aom_scaled_2d)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void aom_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_scaled_avg_2d aom_scaled_avg_2d_c + +void aom_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_scaled_avg_horiz aom_scaled_avg_horiz_c + +void aom_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_scaled_avg_vert aom_scaled_avg_vert_c + +void aom_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_scaled_horiz aom_scaled_horiz_c + +void aom_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define aom_scaled_vert aom_scaled_vert_c + +void aom_smooth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_smooth_predictor_16x16 aom_smooth_predictor_16x16_c + +void aom_smooth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c + +void aom_smooth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_smooth_predictor_32x32 aom_smooth_predictor_32x32_c + +void aom_smooth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_smooth_predictor_4x4 aom_smooth_predictor_4x4_c + +void aom_smooth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_smooth_predictor_8x8 aom_smooth_predictor_8x8_c + +uint32_t aom_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance16x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +RTCD_EXTERN uint32_t (*aom_sub_pixel_avg_variance16x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t aom_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance16x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +RTCD_EXTERN uint32_t (*aom_sub_pixel_avg_variance16x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t aom_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance16x8_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +RTCD_EXTERN uint32_t (*aom_sub_pixel_avg_variance16x8)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t aom_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance32x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +RTCD_EXTERN uint32_t (*aom_sub_pixel_avg_variance32x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t aom_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +RTCD_EXTERN uint32_t (*aom_sub_pixel_avg_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t aom_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance32x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +RTCD_EXTERN uint32_t (*aom_sub_pixel_avg_variance32x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t aom_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance4x4_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +RTCD_EXTERN uint32_t (*aom_sub_pixel_avg_variance4x4)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t aom_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance4x8_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +RTCD_EXTERN uint32_t (*aom_sub_pixel_avg_variance4x8)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t aom_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance64x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +RTCD_EXTERN uint32_t (*aom_sub_pixel_avg_variance64x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t aom_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +RTCD_EXTERN uint32_t (*aom_sub_pixel_avg_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t aom_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance8x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +RTCD_EXTERN uint32_t (*aom_sub_pixel_avg_variance8x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t aom_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance8x4_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +RTCD_EXTERN uint32_t (*aom_sub_pixel_avg_variance8x4)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t aom_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +uint32_t aom_sub_pixel_avg_variance8x8_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); +RTCD_EXTERN uint32_t (*aom_sub_pixel_avg_variance8x8)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t aom_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance16x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +RTCD_EXTERN uint32_t (*aom_sub_pixel_variance16x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t aom_sub_pixel_variance16x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance16x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +RTCD_EXTERN uint32_t (*aom_sub_pixel_variance16x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t aom_sub_pixel_variance16x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance16x8_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +RTCD_EXTERN uint32_t (*aom_sub_pixel_variance16x8)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t aom_sub_pixel_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance32x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +RTCD_EXTERN uint32_t (*aom_sub_pixel_variance32x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t aom_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance32x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +RTCD_EXTERN uint32_t (*aom_sub_pixel_variance32x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t aom_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance32x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +RTCD_EXTERN uint32_t (*aom_sub_pixel_variance32x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t aom_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance4x4_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +RTCD_EXTERN uint32_t (*aom_sub_pixel_variance4x4)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t aom_sub_pixel_variance4x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance4x8_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +RTCD_EXTERN uint32_t (*aom_sub_pixel_variance4x8)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t aom_sub_pixel_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance64x32_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +RTCD_EXTERN uint32_t (*aom_sub_pixel_variance64x32)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t aom_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance64x64_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +RTCD_EXTERN uint32_t (*aom_sub_pixel_variance64x64)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t aom_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance8x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +RTCD_EXTERN uint32_t (*aom_sub_pixel_variance8x16)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t aom_sub_pixel_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance8x4_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +RTCD_EXTERN uint32_t (*aom_sub_pixel_variance8x4)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t aom_sub_pixel_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_sub_pixel_variance8x8_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); +RTCD_EXTERN uint32_t (*aom_sub_pixel_variance8x8)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +void aom_subtract_block_c(int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride); +void aom_subtract_block_sse2(int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride); +#define aom_subtract_block aom_subtract_block_sse2 + +uint64_t aom_sum_squares_2d_i16_c(const int16_t *src, int stride, int width, int height); +uint64_t aom_sum_squares_2d_i16_sse2(const int16_t *src, int stride, int width, int height); +#define aom_sum_squares_2d_i16 aom_sum_squares_2d_i16_sse2 + +uint64_t aom_sum_squares_i16_c(const int16_t *src, uint32_t N); +uint64_t aom_sum_squares_i16_sse2(const int16_t *src, uint32_t N); +#define aom_sum_squares_i16 aom_sum_squares_i16_sse2 + +void aom_upsampled_pred_c(uint8_t *comp_pred, int width, int height, const uint8_t *ref, int ref_stride); +void aom_upsampled_pred_sse2(uint8_t *comp_pred, int width, int height, const uint8_t *ref, int ref_stride); +#define aom_upsampled_pred aom_upsampled_pred_sse2 + +void aom_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_v_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_16x16 aom_v_predictor_16x16_sse2 + +void aom_v_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_2x2 aom_v_predictor_2x2_c + +void aom_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_v_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_32x32 aom_v_predictor_32x32_sse2 + +void aom_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_v_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_4x4 aom_v_predictor_4x4_sse2 + +void aom_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void aom_v_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define aom_v_predictor_8x8 aom_v_predictor_8x8_sse2 + +unsigned int aom_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int aom_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_variance16x32 aom_variance16x32_sse2 + +unsigned int aom_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_variance16x8 aom_variance16x8_sse2 + +unsigned int aom_variance2x2_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_variance2x2 aom_variance2x2_c + +unsigned int aom_variance2x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_variance2x4 aom_variance2x4_c + +unsigned int aom_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int aom_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int aom_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_variance32x64 aom_variance32x64_sse2 + +unsigned int aom_variance4x2_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_variance4x2 aom_variance4x2_c + +unsigned int aom_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_variance4x4 aom_variance4x4_sse2 + +unsigned int aom_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_variance4x8 aom_variance4x8_sse2 + +unsigned int aom_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int aom_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +RTCD_EXTERN unsigned int (*aom_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int aom_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_variance8x16 aom_variance8x16_sse2 + +unsigned int aom_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_variance8x4 aom_variance8x4_sse2 + +unsigned int aom_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +unsigned int aom_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); +#define aom_variance8x8 aom_variance8x8_sse2 + +uint32_t aom_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_variance_halfpixvar16x16_h_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_variance_halfpixvar16x16_h aom_variance_halfpixvar16x16_h_sse2 + +uint32_t aom_variance_halfpixvar16x16_hv_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_variance_halfpixvar16x16_hv_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_variance_halfpixvar16x16_hv aom_variance_halfpixvar16x16_hv_sse2 + +uint32_t aom_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); +uint32_t aom_variance_halfpixvar16x16_v_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse); +#define aom_variance_halfpixvar16x16_v aom_variance_halfpixvar16x16_v_sse2 + +int aom_vector_var_c(const int16_t *ref, const int16_t *src, int bwl); +int aom_vector_var_sse2(const int16_t *ref, const int16_t *src, int bwl); +#define aom_vector_var aom_vector_var_sse2 + +void aom_dsp_rtcd(void); + +#ifdef RTCD_C +#include "aom_ports/x86.h" +static void setup_rtcd_internal(void) +{ + int flags = x86_simd_caps(); + + (void)flags; + + aom_blend_a64_hmask = aom_blend_a64_hmask_c; + if (flags & HAS_SSE4_1) aom_blend_a64_hmask = aom_blend_a64_hmask_sse4_1; + aom_blend_a64_mask = aom_blend_a64_mask_c; + if (flags & HAS_SSE4_1) aom_blend_a64_mask = aom_blend_a64_mask_sse4_1; + aom_blend_a64_vmask = aom_blend_a64_vmask_c; + if (flags & HAS_SSE4_1) aom_blend_a64_vmask = aom_blend_a64_vmask_sse4_1; + aom_convolve8 = aom_convolve8_sse2; + if (flags & HAS_SSSE3) aom_convolve8 = aom_convolve8_ssse3; + if (flags & HAS_AVX2) aom_convolve8 = aom_convolve8_avx2; + aom_convolve8_avg = aom_convolve8_avg_sse2; + if (flags & HAS_SSSE3) aom_convolve8_avg = aom_convolve8_avg_ssse3; + aom_convolve8_avg_horiz = aom_convolve8_avg_horiz_sse2; + if (flags & HAS_SSSE3) aom_convolve8_avg_horiz = aom_convolve8_avg_horiz_ssse3; + aom_convolve8_avg_vert = aom_convolve8_avg_vert_sse2; + if (flags & HAS_SSSE3) aom_convolve8_avg_vert = aom_convolve8_avg_vert_ssse3; + aom_convolve8_horiz = aom_convolve8_horiz_sse2; + if (flags & HAS_SSSE3) aom_convolve8_horiz = aom_convolve8_horiz_ssse3; + if (flags & HAS_AVX2) aom_convolve8_horiz = aom_convolve8_horiz_avx2; + aom_convolve8_vert = aom_convolve8_vert_sse2; + if (flags & HAS_SSSE3) aom_convolve8_vert = aom_convolve8_vert_ssse3; + if (flags & HAS_AVX2) aom_convolve8_vert = aom_convolve8_vert_avx2; + aom_d153_predictor_16x16 = aom_d153_predictor_16x16_c; + if (flags & HAS_SSSE3) aom_d153_predictor_16x16 = aom_d153_predictor_16x16_ssse3; + aom_d153_predictor_32x32 = aom_d153_predictor_32x32_c; + if (flags & HAS_SSSE3) aom_d153_predictor_32x32 = aom_d153_predictor_32x32_ssse3; + aom_d153_predictor_4x4 = aom_d153_predictor_4x4_c; + if (flags & HAS_SSSE3) aom_d153_predictor_4x4 = aom_d153_predictor_4x4_ssse3; + aom_d153_predictor_8x8 = aom_d153_predictor_8x8_c; + if (flags & HAS_SSSE3) aom_d153_predictor_8x8 = aom_d153_predictor_8x8_ssse3; + aom_d63e_predictor_4x4 = aom_d63e_predictor_4x4_c; + if (flags & HAS_SSSE3) aom_d63e_predictor_4x4 = aom_d63e_predictor_4x4_ssse3; + aom_fdct16x16_1 = aom_fdct16x16_1_sse2; + if (flags & HAS_AVX2) aom_fdct16x16_1 = aom_fdct16x16_1_avx2; + aom_fdct32x32 = aom_fdct32x32_sse2; + if (flags & HAS_AVX2) aom_fdct32x32 = aom_fdct32x32_avx2; + aom_fdct32x32_1 = aom_fdct32x32_1_sse2; + if (flags & HAS_AVX2) aom_fdct32x32_1 = aom_fdct32x32_1_avx2; + aom_fdct32x32_rd = aom_fdct32x32_rd_sse2; + if (flags & HAS_AVX2) aom_fdct32x32_rd = aom_fdct32x32_rd_avx2; + aom_fdct8x8 = aom_fdct8x8_sse2; + if (flags & HAS_SSSE3) aom_fdct8x8 = aom_fdct8x8_ssse3; + aom_get16x16var = aom_get16x16var_sse2; + if (flags & HAS_AVX2) aom_get16x16var = aom_get16x16var_avx2; + aom_hadamard_8x8 = aom_hadamard_8x8_sse2; + if (flags & HAS_SSSE3) aom_hadamard_8x8 = aom_hadamard_8x8_ssse3; + aom_idct32x32_1024_add = aom_idct32x32_1024_add_sse2; + if (flags & HAS_SSSE3) aom_idct32x32_1024_add = aom_idct32x32_1024_add_ssse3; + aom_idct32x32_135_add = aom_idct32x32_1024_add_sse2; + if (flags & HAS_SSSE3) aom_idct32x32_135_add = aom_idct32x32_135_add_ssse3; + aom_idct32x32_34_add = aom_idct32x32_34_add_sse2; + if (flags & HAS_SSSE3) aom_idct32x32_34_add = aom_idct32x32_34_add_ssse3; + aom_idct8x8_12_add = aom_idct8x8_12_add_sse2; + if (flags & HAS_SSSE3) aom_idct8x8_12_add = aom_idct8x8_12_add_ssse3; + aom_idct8x8_64_add = aom_idct8x8_64_add_sse2; + if (flags & HAS_SSSE3) aom_idct8x8_64_add = aom_idct8x8_64_add_ssse3; + aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_sse2; + if (flags & HAS_AVX2) aom_lpf_horizontal_edge_16 = aom_lpf_horizontal_edge_16_avx2; + aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_sse2; + if (flags & HAS_AVX2) aom_lpf_horizontal_edge_8 = aom_lpf_horizontal_edge_8_avx2; + aom_mse16x16 = aom_mse16x16_sse2; + if (flags & HAS_AVX2) aom_mse16x16 = aom_mse16x16_avx2; + aom_obmc_sad16x16 = aom_obmc_sad16x16_c; + if (flags & HAS_SSE4_1) aom_obmc_sad16x16 = aom_obmc_sad16x16_sse4_1; + aom_obmc_sad16x32 = aom_obmc_sad16x32_c; + if (flags & HAS_SSE4_1) aom_obmc_sad16x32 = aom_obmc_sad16x32_sse4_1; + aom_obmc_sad16x8 = aom_obmc_sad16x8_c; + if (flags & HAS_SSE4_1) aom_obmc_sad16x8 = aom_obmc_sad16x8_sse4_1; + aom_obmc_sad32x16 = aom_obmc_sad32x16_c; + if (flags & HAS_SSE4_1) aom_obmc_sad32x16 = aom_obmc_sad32x16_sse4_1; + aom_obmc_sad32x32 = aom_obmc_sad32x32_c; + if (flags & HAS_SSE4_1) aom_obmc_sad32x32 = aom_obmc_sad32x32_sse4_1; + aom_obmc_sad32x64 = aom_obmc_sad32x64_c; + if (flags & HAS_SSE4_1) aom_obmc_sad32x64 = aom_obmc_sad32x64_sse4_1; + aom_obmc_sad4x4 = aom_obmc_sad4x4_c; + if (flags & HAS_SSE4_1) aom_obmc_sad4x4 = aom_obmc_sad4x4_sse4_1; + aom_obmc_sad4x8 = aom_obmc_sad4x8_c; + if (flags & HAS_SSE4_1) aom_obmc_sad4x8 = aom_obmc_sad4x8_sse4_1; + aom_obmc_sad64x32 = aom_obmc_sad64x32_c; + if (flags & HAS_SSE4_1) aom_obmc_sad64x32 = aom_obmc_sad64x32_sse4_1; + aom_obmc_sad64x64 = aom_obmc_sad64x64_c; + if (flags & HAS_SSE4_1) aom_obmc_sad64x64 = aom_obmc_sad64x64_sse4_1; + aom_obmc_sad8x16 = aom_obmc_sad8x16_c; + if (flags & HAS_SSE4_1) aom_obmc_sad8x16 = aom_obmc_sad8x16_sse4_1; + aom_obmc_sad8x4 = aom_obmc_sad8x4_c; + if (flags & HAS_SSE4_1) aom_obmc_sad8x4 = aom_obmc_sad8x4_sse4_1; + aom_obmc_sad8x8 = aom_obmc_sad8x8_c; + if (flags & HAS_SSE4_1) aom_obmc_sad8x8 = aom_obmc_sad8x8_sse4_1; + aom_obmc_variance16x16 = aom_obmc_variance16x16_c; + if (flags & HAS_SSE4_1) aom_obmc_variance16x16 = aom_obmc_variance16x16_sse4_1; + aom_obmc_variance16x32 = aom_obmc_variance16x32_c; + if (flags & HAS_SSE4_1) aom_obmc_variance16x32 = aom_obmc_variance16x32_sse4_1; + aom_obmc_variance16x8 = aom_obmc_variance16x8_c; + if (flags & HAS_SSE4_1) aom_obmc_variance16x8 = aom_obmc_variance16x8_sse4_1; + aom_obmc_variance32x16 = aom_obmc_variance32x16_c; + if (flags & HAS_SSE4_1) aom_obmc_variance32x16 = aom_obmc_variance32x16_sse4_1; + aom_obmc_variance32x32 = aom_obmc_variance32x32_c; + if (flags & HAS_SSE4_1) aom_obmc_variance32x32 = aom_obmc_variance32x32_sse4_1; + aom_obmc_variance32x64 = aom_obmc_variance32x64_c; + if (flags & HAS_SSE4_1) aom_obmc_variance32x64 = aom_obmc_variance32x64_sse4_1; + aom_obmc_variance4x4 = aom_obmc_variance4x4_c; + if (flags & HAS_SSE4_1) aom_obmc_variance4x4 = aom_obmc_variance4x4_sse4_1; + aom_obmc_variance4x8 = aom_obmc_variance4x8_c; + if (flags & HAS_SSE4_1) aom_obmc_variance4x8 = aom_obmc_variance4x8_sse4_1; + aom_obmc_variance64x32 = aom_obmc_variance64x32_c; + if (flags & HAS_SSE4_1) aom_obmc_variance64x32 = aom_obmc_variance64x32_sse4_1; + aom_obmc_variance64x64 = aom_obmc_variance64x64_c; + if (flags & HAS_SSE4_1) aom_obmc_variance64x64 = aom_obmc_variance64x64_sse4_1; + aom_obmc_variance8x16 = aom_obmc_variance8x16_c; + if (flags & HAS_SSE4_1) aom_obmc_variance8x16 = aom_obmc_variance8x16_sse4_1; + aom_obmc_variance8x4 = aom_obmc_variance8x4_c; + if (flags & HAS_SSE4_1) aom_obmc_variance8x4 = aom_obmc_variance8x4_sse4_1; + aom_obmc_variance8x8 = aom_obmc_variance8x8_c; + if (flags & HAS_SSE4_1) aom_obmc_variance8x8 = aom_obmc_variance8x8_sse4_1; + aom_quantize_b = aom_quantize_b_sse2; + if (flags & HAS_SSSE3) aom_quantize_b = aom_quantize_b_ssse3; + if (flags & HAS_AVX) aom_quantize_b = aom_quantize_b_avx; + aom_quantize_b_32x32 = aom_quantize_b_32x32_c; + if (flags & HAS_SSSE3) aom_quantize_b_32x32 = aom_quantize_b_32x32_ssse3; + if (flags & HAS_AVX) aom_quantize_b_32x32 = aom_quantize_b_32x32_avx; + aom_sad16x16x3 = aom_sad16x16x3_c; + if (flags & HAS_SSE3) aom_sad16x16x3 = aom_sad16x16x3_sse3; + if (flags & HAS_SSSE3) aom_sad16x16x3 = aom_sad16x16x3_ssse3; + aom_sad16x16x8 = aom_sad16x16x8_c; + if (flags & HAS_SSE4_1) aom_sad16x16x8 = aom_sad16x16x8_sse4_1; + aom_sad16x8x3 = aom_sad16x8x3_c; + if (flags & HAS_SSE3) aom_sad16x8x3 = aom_sad16x8x3_sse3; + if (flags & HAS_SSSE3) aom_sad16x8x3 = aom_sad16x8x3_ssse3; + aom_sad16x8x8 = aom_sad16x8x8_c; + if (flags & HAS_SSE4_1) aom_sad16x8x8 = aom_sad16x8x8_sse4_1; + aom_sad32x16 = aom_sad32x16_sse2; + if (flags & HAS_AVX2) aom_sad32x16 = aom_sad32x16_avx2; + aom_sad32x16_avg = aom_sad32x16_avg_sse2; + if (flags & HAS_AVX2) aom_sad32x16_avg = aom_sad32x16_avg_avx2; + aom_sad32x32 = aom_sad32x32_sse2; + if (flags & HAS_AVX2) aom_sad32x32 = aom_sad32x32_avx2; + aom_sad32x32_avg = aom_sad32x32_avg_sse2; + if (flags & HAS_AVX2) aom_sad32x32_avg = aom_sad32x32_avg_avx2; + aom_sad32x32x4d = aom_sad32x32x4d_sse2; + if (flags & HAS_AVX2) aom_sad32x32x4d = aom_sad32x32x4d_avx2; + aom_sad32x64 = aom_sad32x64_sse2; + if (flags & HAS_AVX2) aom_sad32x64 = aom_sad32x64_avx2; + aom_sad32x64_avg = aom_sad32x64_avg_sse2; + if (flags & HAS_AVX2) aom_sad32x64_avg = aom_sad32x64_avg_avx2; + aom_sad32x64x4d = aom_sad32x64x4d_sse2; + if (flags & HAS_AVX2) aom_sad32x64x4d = aom_sad32x64x4d_avx2; + aom_sad4x4x3 = aom_sad4x4x3_c; + if (flags & HAS_SSE3) aom_sad4x4x3 = aom_sad4x4x3_sse3; + aom_sad4x4x8 = aom_sad4x4x8_c; + if (flags & HAS_SSE4_1) aom_sad4x4x8 = aom_sad4x4x8_sse4_1; + aom_sad64x32 = aom_sad64x32_sse2; + if (flags & HAS_AVX2) aom_sad64x32 = aom_sad64x32_avx2; + aom_sad64x32_avg = aom_sad64x32_avg_sse2; + if (flags & HAS_AVX2) aom_sad64x32_avg = aom_sad64x32_avg_avx2; + aom_sad64x32x4d = aom_sad64x32x4d_sse2; + if (flags & HAS_AVX2) aom_sad64x32x4d = aom_sad64x32x4d_avx2; + aom_sad64x64 = aom_sad64x64_sse2; + if (flags & HAS_AVX2) aom_sad64x64 = aom_sad64x64_avx2; + aom_sad64x64_avg = aom_sad64x64_avg_sse2; + if (flags & HAS_AVX2) aom_sad64x64_avg = aom_sad64x64_avg_avx2; + aom_sad64x64x4d = aom_sad64x64x4d_sse2; + if (flags & HAS_AVX2) aom_sad64x64x4d = aom_sad64x64x4d_avx2; + aom_sad8x16x3 = aom_sad8x16x3_c; + if (flags & HAS_SSE3) aom_sad8x16x3 = aom_sad8x16x3_sse3; + aom_sad8x16x8 = aom_sad8x16x8_c; + if (flags & HAS_SSE4_1) aom_sad8x16x8 = aom_sad8x16x8_sse4_1; + aom_sad8x8x3 = aom_sad8x8x3_c; + if (flags & HAS_SSE3) aom_sad8x8x3 = aom_sad8x8x3_sse3; + aom_sad8x8x8 = aom_sad8x8x8_c; + if (flags & HAS_SSE4_1) aom_sad8x8x8 = aom_sad8x8x8_sse4_1; + aom_scaled_2d = aom_scaled_2d_c; + if (flags & HAS_SSSE3) aom_scaled_2d = aom_scaled_2d_ssse3; + aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance16x16 = aom_sub_pixel_avg_variance16x16_ssse3; + aom_sub_pixel_avg_variance16x32 = aom_sub_pixel_avg_variance16x32_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance16x32 = aom_sub_pixel_avg_variance16x32_ssse3; + aom_sub_pixel_avg_variance16x8 = aom_sub_pixel_avg_variance16x8_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance16x8 = aom_sub_pixel_avg_variance16x8_ssse3; + aom_sub_pixel_avg_variance32x16 = aom_sub_pixel_avg_variance32x16_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance32x16 = aom_sub_pixel_avg_variance32x16_ssse3; + aom_sub_pixel_avg_variance32x32 = aom_sub_pixel_avg_variance32x32_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance32x32 = aom_sub_pixel_avg_variance32x32_ssse3; + if (flags & HAS_AVX2) aom_sub_pixel_avg_variance32x32 = aom_sub_pixel_avg_variance32x32_avx2; + aom_sub_pixel_avg_variance32x64 = aom_sub_pixel_avg_variance32x64_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance32x64 = aom_sub_pixel_avg_variance32x64_ssse3; + aom_sub_pixel_avg_variance4x4 = aom_sub_pixel_avg_variance4x4_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance4x4 = aom_sub_pixel_avg_variance4x4_ssse3; + aom_sub_pixel_avg_variance4x8 = aom_sub_pixel_avg_variance4x8_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance4x8 = aom_sub_pixel_avg_variance4x8_ssse3; + aom_sub_pixel_avg_variance64x32 = aom_sub_pixel_avg_variance64x32_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance64x32 = aom_sub_pixel_avg_variance64x32_ssse3; + aom_sub_pixel_avg_variance64x64 = aom_sub_pixel_avg_variance64x64_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance64x64 = aom_sub_pixel_avg_variance64x64_ssse3; + if (flags & HAS_AVX2) aom_sub_pixel_avg_variance64x64 = aom_sub_pixel_avg_variance64x64_avx2; + aom_sub_pixel_avg_variance8x16 = aom_sub_pixel_avg_variance8x16_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance8x16 = aom_sub_pixel_avg_variance8x16_ssse3; + aom_sub_pixel_avg_variance8x4 = aom_sub_pixel_avg_variance8x4_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance8x4 = aom_sub_pixel_avg_variance8x4_ssse3; + aom_sub_pixel_avg_variance8x8 = aom_sub_pixel_avg_variance8x8_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_avg_variance8x8 = aom_sub_pixel_avg_variance8x8_ssse3; + aom_sub_pixel_variance16x16 = aom_sub_pixel_variance16x16_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_variance16x16 = aom_sub_pixel_variance16x16_ssse3; + aom_sub_pixel_variance16x32 = aom_sub_pixel_variance16x32_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_variance16x32 = aom_sub_pixel_variance16x32_ssse3; + aom_sub_pixel_variance16x8 = aom_sub_pixel_variance16x8_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_variance16x8 = aom_sub_pixel_variance16x8_ssse3; + aom_sub_pixel_variance32x16 = aom_sub_pixel_variance32x16_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_variance32x16 = aom_sub_pixel_variance32x16_ssse3; + aom_sub_pixel_variance32x32 = aom_sub_pixel_variance32x32_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_variance32x32 = aom_sub_pixel_variance32x32_ssse3; + if (flags & HAS_AVX2) aom_sub_pixel_variance32x32 = aom_sub_pixel_variance32x32_avx2; + aom_sub_pixel_variance32x64 = aom_sub_pixel_variance32x64_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_variance32x64 = aom_sub_pixel_variance32x64_ssse3; + aom_sub_pixel_variance4x4 = aom_sub_pixel_variance4x4_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_variance4x4 = aom_sub_pixel_variance4x4_ssse3; + aom_sub_pixel_variance4x8 = aom_sub_pixel_variance4x8_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_variance4x8 = aom_sub_pixel_variance4x8_ssse3; + aom_sub_pixel_variance64x32 = aom_sub_pixel_variance64x32_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_variance64x32 = aom_sub_pixel_variance64x32_ssse3; + aom_sub_pixel_variance64x64 = aom_sub_pixel_variance64x64_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_variance64x64 = aom_sub_pixel_variance64x64_ssse3; + if (flags & HAS_AVX2) aom_sub_pixel_variance64x64 = aom_sub_pixel_variance64x64_avx2; + aom_sub_pixel_variance8x16 = aom_sub_pixel_variance8x16_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_variance8x16 = aom_sub_pixel_variance8x16_ssse3; + aom_sub_pixel_variance8x4 = aom_sub_pixel_variance8x4_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_variance8x4 = aom_sub_pixel_variance8x4_ssse3; + aom_sub_pixel_variance8x8 = aom_sub_pixel_variance8x8_sse2; + if (flags & HAS_SSSE3) aom_sub_pixel_variance8x8 = aom_sub_pixel_variance8x8_ssse3; + aom_variance16x16 = aom_variance16x16_sse2; + if (flags & HAS_AVX2) aom_variance16x16 = aom_variance16x16_avx2; + aom_variance32x16 = aom_variance32x16_sse2; + if (flags & HAS_AVX2) aom_variance32x16 = aom_variance32x16_avx2; + aom_variance32x32 = aom_variance32x32_sse2; + if (flags & HAS_AVX2) aom_variance32x32 = aom_variance32x32_avx2; + aom_variance64x32 = aom_variance64x32_sse2; + if (flags & HAS_AVX2) aom_variance64x32 = aom_variance64x32_avx2; + aom_variance64x64 = aom_variance64x64_sse2; + if (flags & HAS_AVX2) aom_variance64x64 = aom_variance64x64_avx2; +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/media/libaom/config/win/x64/aom_scale_rtcd.h b/media/libaom/config/win/x64/aom_scale_rtcd.h new file mode 100644 index 000000000..62f079065 --- /dev/null +++ b/media/libaom/config/win/x64/aom_scale_rtcd.h @@ -0,0 +1,78 @@ +#ifndef AOM_SCALE_RTCD_H_ +#define AOM_SCALE_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +struct yv12_buffer_config; + +#ifdef __cplusplus +extern "C" { +#endif + +void aom_extend_frame_borders_c(struct yv12_buffer_config *ybf); +#define aom_extend_frame_borders aom_extend_frame_borders_c + +void aom_extend_frame_borders_y_c(struct yv12_buffer_config *ybf); +#define aom_extend_frame_borders_y aom_extend_frame_borders_y_c + +void aom_extend_frame_inner_borders_c(struct yv12_buffer_config *ybf); +#define aom_extend_frame_inner_borders aom_extend_frame_inner_borders_c + +void aom_horizontal_line_2_1_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); +#define aom_horizontal_line_2_1_scale aom_horizontal_line_2_1_scale_c + +void aom_horizontal_line_5_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); +#define aom_horizontal_line_5_3_scale aom_horizontal_line_5_3_scale_c + +void aom_horizontal_line_5_4_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); +#define aom_horizontal_line_5_4_scale aom_horizontal_line_5_4_scale_c + +void aom_vertical_band_2_1_scale_c(unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width); +#define aom_vertical_band_2_1_scale aom_vertical_band_2_1_scale_c + +void aom_vertical_band_2_1_scale_i_c(unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width); +#define aom_vertical_band_2_1_scale_i aom_vertical_band_2_1_scale_i_c + +void aom_vertical_band_5_3_scale_c(unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width); +#define aom_vertical_band_5_3_scale aom_vertical_band_5_3_scale_c + +void aom_vertical_band_5_4_scale_c(unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width); +#define aom_vertical_band_5_4_scale aom_vertical_band_5_4_scale_c + +void aom_yv12_copy_frame_c(const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc); +#define aom_yv12_copy_frame aom_yv12_copy_frame_c + +void aom_yv12_copy_u_c(const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc); +#define aom_yv12_copy_u aom_yv12_copy_u_c + +void aom_yv12_copy_v_c(const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc); +#define aom_yv12_copy_v aom_yv12_copy_v_c + +void aom_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc); +#define aom_yv12_copy_y aom_yv12_copy_y_c + +void aom_yv12_extend_frame_borders_c(struct yv12_buffer_config *ybf); +#define aom_yv12_extend_frame_borders aom_yv12_extend_frame_borders_c + +void aom_scale_rtcd(void); + +#ifdef RTCD_C +#include "aom_ports/x86.h" +static void setup_rtcd_internal(void) +{ + int flags = x86_simd_caps(); + + (void)flags; + +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/media/libaom/config/win/x64/av1_rtcd.h b/media/libaom/config/win/x64/av1_rtcd.h new file mode 100644 index 000000000..eacb5a555 --- /dev/null +++ b/media/libaom/config/win/x64/av1_rtcd.h @@ -0,0 +1,364 @@ +#ifndef AOM_RTCD_H_ +#define AOM_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * AV1 + */ + +#include "aom/aom_integer.h" +#include "av1/common/common.h" +#include "av1/common/enums.h" +#include "av1/common/quant_common.h" +#include "av1/common/filter.h" +#include "av1/common/convolve.h" +#include "av1/common/av1_txfm.h" +#include "av1/common/odintrin.h" + +struct macroblockd; + +/* Encoder forward decls */ +struct macroblock; +struct aom_variance_vtable; +struct search_site_config; +struct mv; +union int_mv; +struct yv12_buffer_config; +typedef uint16_t od_dering_in; + +#ifdef __cplusplus +extern "C" { +#endif + +void aom_clpf_block_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void aom_clpf_block_sse2(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void aom_clpf_block_ssse3(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void aom_clpf_block_sse4_1(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +RTCD_EXTERN void (*aom_clpf_block)(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); + +void aom_clpf_block_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void aom_clpf_block_hbd_sse2(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void aom_clpf_block_hbd_ssse3(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void aom_clpf_block_hbd_sse4_1(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +RTCD_EXTERN void (*aom_clpf_block_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); + +void aom_clpf_hblock_c(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void aom_clpf_hblock_sse2(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void aom_clpf_hblock_ssse3(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void aom_clpf_hblock_sse4_1(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +RTCD_EXTERN void (*aom_clpf_hblock)(uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); + +void aom_clpf_hblock_hbd_c(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void aom_clpf_hblock_hbd_sse2(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void aom_clpf_hblock_hbd_ssse3(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +void aom_clpf_hblock_hbd_sse4_1(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); +RTCD_EXTERN void (*aom_clpf_hblock_hbd)(uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd); + +int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +int64_t av1_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +int64_t av1_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); +RTCD_EXTERN int64_t (*av1_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); + +int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +int64_t av1_block_error_fp_sse2(const int16_t *coeff, const int16_t *dqcoeff, int block_size); +#define av1_block_error_fp av1_block_error_fp_sse2 + +void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); +void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); +RTCD_EXTERN void (*av1_convolve_horiz)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); + +void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); +void av1_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); +RTCD_EXTERN void (*av1_convolve_vert)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params); + +int av1_diamond_search_sad_c(struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); +#define av1_diamond_search_sad av1_diamond_search_sad_c + +void av1_fdct8x8_quant_c(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void av1_fdct8x8_quant_sse2(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void av1_fdct8x8_quant_ssse3(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +RTCD_EXTERN void (*av1_fdct8x8_quant)(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); + +void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +RTCD_EXTERN void (*av1_fht16x16)(const int16_t *input, tran_low_t *output, int stride, int tx_type); + +void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht16x32_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_fht16x32 av1_fht16x32_sse2 + +void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_fht16x4 av1_fht16x4_c + +void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht16x8_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_fht16x8 av1_fht16x8_sse2 + +void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht32x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_fht32x16 av1_fht32x16_sse2 + +void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht32x32_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht32x32_avx2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +RTCD_EXTERN void (*av1_fht32x32)(const int16_t *input, tran_low_t *output, int stride, int tx_type); + +void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_fht32x8 av1_fht32x8_c + +void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_fht4x16 av1_fht4x16_c + +void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_fht4x4 av1_fht4x4_sse2 + +void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht4x8_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_fht4x8 av1_fht4x8_sse2 + +void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht8x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_fht8x16 av1_fht8x16_sse2 + +void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_fht8x32 av1_fht8x32_c + +void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht8x4_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_fht8x4 av1_fht8x4_sse2 + +void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type); +void av1_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type); +#define av1_fht8x8 av1_fht8x8_sse2 + +int av1_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv); +#define av1_full_range_search av1_full_range_search_c + +int av1_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); +int av1_full_search_sadx3(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); +int av1_full_search_sadx8(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); +RTCD_EXTERN int (*av1_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv); + +void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type); +#define av1_fwd_idtx av1_fwd_idtx_c + +void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride); +#define av1_fwht4x4 av1_fwht4x4_c + +void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); +void av1_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); +void av1_iht16x16_256_add_avx2(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); +RTCD_EXTERN void (*av1_iht16x16_256_add)(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); + +void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht16x32_512_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +#define av1_iht16x32_512_add av1_iht16x32_512_add_sse2 + +void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +#define av1_iht16x4_64_add av1_iht16x4_64_add_c + +void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht16x8_128_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +#define av1_iht16x8_128_add av1_iht16x8_128_add_sse2 + +void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht32x16_512_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +#define av1_iht32x16_512_add av1_iht32x16_512_add_sse2 + +void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); +#define av1_iht32x32_1024_add av1_iht32x32_1024_add_c + +void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +#define av1_iht32x8_256_add av1_iht32x8_256_add_c + +void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +#define av1_iht4x16_64_add av1_iht4x16_64_add_c + +void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +#define av1_iht4x4_16_add av1_iht4x4_16_add_sse2 + +void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht4x8_32_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +#define av1_iht4x8_32_add av1_iht4x8_32_add_sse2 + +void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht8x16_128_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +#define av1_iht8x16_128_add av1_iht8x16_128_add_sse2 + +void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +#define av1_iht8x32_256_add av1_iht8x32_256_add_c + +void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht8x4_32_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +#define av1_iht8x4_32_add av1_iht8x4_32_add_sse2 + +void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void av1_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +#define av1_iht8x8_64_add av1_iht8x8_64_add_sse2 + +void av1_lowbd_convolve_init_c(void); +void av1_lowbd_convolve_init_ssse3(void); +RTCD_EXTERN void (*av1_lowbd_convolve_init)(void); + +void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale); +#define av1_quantize_b av1_quantize_b_c + +void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void av1_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void av1_quantize_fp_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +RTCD_EXTERN void (*av1_quantize_fp)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); + +void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +void av1_quantize_fp_32x32_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); +RTCD_EXTERN void (*av1_quantize_fp_32x32)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan); + +void av1_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); +void av1_temporal_filter_apply_sse2(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count); +#define av1_temporal_filter_apply av1_temporal_filter_apply_sse2 + +void av1_warp_affine_c(int32_t *mat, uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int ref_frm, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +void av1_warp_affine_sse2(int32_t *mat, uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int ref_frm, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta); +#define av1_warp_affine av1_warp_affine_sse2 + +void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); +void copy_4x4_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride); +void copy_4x4_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride); +void copy_4x4_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride); +RTCD_EXTERN void (*copy_4x4_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride); + +void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); +void copy_4x4_16bit_to_8bit_sse2(uint8_t *dst, int dstride, const uint16_t *src, int sstride); +void copy_4x4_16bit_to_8bit_ssse3(uint8_t *dst, int dstride, const uint16_t *src, int sstride); +void copy_4x4_16bit_to_8bit_sse4_1(uint8_t *dst, int dstride, const uint16_t *src, int sstride); +RTCD_EXTERN void (*copy_4x4_16bit_to_8bit)(uint8_t *dst, int dstride, const uint16_t *src, int sstride); + +void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride); +void copy_8x8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride); +void copy_8x8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride); +void copy_8x8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride); +RTCD_EXTERN void (*copy_8x8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride); + +void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src, int sstride); +void copy_8x8_16bit_to_8bit_sse2(uint8_t *dst, int dstride, const uint16_t *src, int sstride); +void copy_8x8_16bit_to_8bit_ssse3(uint8_t *dst, int dstride, const uint16_t *src, int sstride); +void copy_8x8_16bit_to_8bit_sse4_1(uint8_t *dst, int dstride, const uint16_t *src, int sstride); +RTCD_EXTERN void (*copy_8x8_16bit_to_8bit)(uint8_t *dst, int dstride, const uint16_t *src, int sstride); + +void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); +void copy_rect8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); +void copy_rect8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); +void copy_rect8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); +RTCD_EXTERN void (*copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h); + +void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); +void copy_rect8_8bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); +void copy_rect8_8bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); +void copy_rect8_8bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); +RTCD_EXTERN void (*copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h); + +int od_dir_find8_c(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); +int od_dir_find8_sse2(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); +int od_dir_find8_ssse3(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); +int od_dir_find8_sse4_1(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); +RTCD_EXTERN int (*od_dir_find8)(const od_dering_in *img, int stride, int32_t *var, int coeff_shift); + +void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); +void od_filter_dering_direction_4x4_sse2(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); +void od_filter_dering_direction_4x4_ssse3(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); +void od_filter_dering_direction_4x4_sse4_1(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); +RTCD_EXTERN void (*od_filter_dering_direction_4x4)(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); + +void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); +void od_filter_dering_direction_8x8_sse2(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); +void od_filter_dering_direction_8x8_ssse3(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); +void od_filter_dering_direction_8x8_sse4_1(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); +RTCD_EXTERN void (*od_filter_dering_direction_8x8)(uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping); + +void aom_rtcd(void); + +#ifdef RTCD_C +#include "aom_ports/x86.h" +static void setup_rtcd_internal(void) +{ + int flags = x86_simd_caps(); + + (void)flags; + + aom_clpf_block = aom_clpf_block_sse2; + if (flags & HAS_SSSE3) aom_clpf_block = aom_clpf_block_ssse3; + if (flags & HAS_SSE4_1) aom_clpf_block = aom_clpf_block_sse4_1; + aom_clpf_block_hbd = aom_clpf_block_hbd_sse2; + if (flags & HAS_SSSE3) aom_clpf_block_hbd = aom_clpf_block_hbd_ssse3; + if (flags & HAS_SSE4_1) aom_clpf_block_hbd = aom_clpf_block_hbd_sse4_1; + aom_clpf_hblock = aom_clpf_hblock_sse2; + if (flags & HAS_SSSE3) aom_clpf_hblock = aom_clpf_hblock_ssse3; + if (flags & HAS_SSE4_1) aom_clpf_hblock = aom_clpf_hblock_sse4_1; + aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse2; + if (flags & HAS_SSSE3) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_ssse3; + if (flags & HAS_SSE4_1) aom_clpf_hblock_hbd = aom_clpf_hblock_hbd_sse4_1; + av1_block_error = av1_block_error_sse2; + if (flags & HAS_AVX2) av1_block_error = av1_block_error_avx2; + av1_convolve_horiz = av1_convolve_horiz_c; + if (flags & HAS_SSSE3) av1_convolve_horiz = av1_convolve_horiz_ssse3; + av1_convolve_vert = av1_convolve_vert_c; + if (flags & HAS_SSSE3) av1_convolve_vert = av1_convolve_vert_ssse3; + av1_fdct8x8_quant = av1_fdct8x8_quant_sse2; + if (flags & HAS_SSSE3) av1_fdct8x8_quant = av1_fdct8x8_quant_ssse3; + av1_fht16x16 = av1_fht16x16_sse2; + if (flags & HAS_AVX2) av1_fht16x16 = av1_fht16x16_avx2; + av1_fht32x32 = av1_fht32x32_sse2; + if (flags & HAS_AVX2) av1_fht32x32 = av1_fht32x32_avx2; + av1_full_search_sad = av1_full_search_sad_c; + if (flags & HAS_SSE3) av1_full_search_sad = av1_full_search_sadx3; + if (flags & HAS_SSE4_1) av1_full_search_sad = av1_full_search_sadx8; + av1_iht16x16_256_add = av1_iht16x16_256_add_sse2; + if (flags & HAS_AVX2) av1_iht16x16_256_add = av1_iht16x16_256_add_avx2; + av1_lowbd_convolve_init = av1_lowbd_convolve_init_c; + if (flags & HAS_SSSE3) av1_lowbd_convolve_init = av1_lowbd_convolve_init_ssse3; + av1_quantize_fp = av1_quantize_fp_sse2; + if (flags & HAS_SSSE3) av1_quantize_fp = av1_quantize_fp_ssse3; + av1_quantize_fp_32x32 = av1_quantize_fp_32x32_c; + if (flags & HAS_SSSE3) av1_quantize_fp_32x32 = av1_quantize_fp_32x32_ssse3; + copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_sse2; + if (flags & HAS_SSSE3) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_ssse3; + if (flags & HAS_SSE4_1) copy_4x4_16bit_to_16bit = copy_4x4_16bit_to_16bit_sse4_1; + copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_sse2; + if (flags & HAS_SSSE3) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_ssse3; + if (flags & HAS_SSE4_1) copy_4x4_16bit_to_8bit = copy_4x4_16bit_to_8bit_sse4_1; + copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_sse2; + if (flags & HAS_SSSE3) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_ssse3; + if (flags & HAS_SSE4_1) copy_8x8_16bit_to_16bit = copy_8x8_16bit_to_16bit_sse4_1; + copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_sse2; + if (flags & HAS_SSSE3) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_ssse3; + if (flags & HAS_SSE4_1) copy_8x8_16bit_to_8bit = copy_8x8_16bit_to_8bit_sse4_1; + copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse2; + if (flags & HAS_SSSE3) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_ssse3; + if (flags & HAS_SSE4_1) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse4_1; + copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse2; + if (flags & HAS_SSSE3) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_ssse3; + if (flags & HAS_SSE4_1) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse4_1; + od_dir_find8 = od_dir_find8_sse2; + if (flags & HAS_SSSE3) od_dir_find8 = od_dir_find8_ssse3; + if (flags & HAS_SSE4_1) od_dir_find8 = od_dir_find8_sse4_1; + od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_sse2; + if (flags & HAS_SSSE3) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_ssse3; + if (flags & HAS_SSE4_1) od_filter_dering_direction_4x4 = od_filter_dering_direction_4x4_sse4_1; + od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_sse2; + if (flags & HAS_SSSE3) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_ssse3; + if (flags & HAS_SSE4_1) od_filter_dering_direction_8x8 = od_filter_dering_direction_8x8_sse4_1; +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif |