diff options
Diffstat (limited to 'third_party/aom/aom_dsp/x86/obmc_variance_sse4.c')
-rw-r--r-- | third_party/aom/aom_dsp/x86/obmc_variance_sse4.c | 69 |
1 files changed, 29 insertions, 40 deletions
diff --git a/third_party/aom/aom_dsp/x86/obmc_variance_sse4.c b/third_party/aom/aom_dsp/x86/obmc_variance_sse4.c index 392616af3..571aa770b 100644 --- a/third_party/aom/aom_dsp/x86/obmc_variance_sse4.c +++ b/third_party/aom/aom_dsp/x86/obmc_variance_sse4.c @@ -12,7 +12,8 @@ #include <assert.h> #include <immintrin.h> -#include "./aom_config.h" +#include "config/aom_config.h" + #include "aom_ports/mem.h" #include "aom/aom_integer.h" @@ -128,11 +129,9 @@ static INLINE void obmc_variance_w8n(const uint8_t *pre, const int pre_stride, return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \ } -#if CONFIG_EXT_PARTITION OBMCVARWXH(128, 128) OBMCVARWXH(128, 64) OBMCVARWXH(64, 128) -#endif // CONFIG_EXT_PARTITION OBMCVARWXH(64, 64) OBMCVARWXH(64, 32) OBMCVARWXH(32, 64) @@ -146,24 +145,17 @@ OBMCVARWXH(8, 8) OBMCVARWXH(8, 4) OBMCVARWXH(4, 8) OBMCVARWXH(4, 4) -#if CONFIG_EXT_PARTITION_TYPES OBMCVARWXH(4, 16) OBMCVARWXH(16, 4) OBMCVARWXH(8, 32) OBMCVARWXH(32, 8) OBMCVARWXH(16, 64) OBMCVARWXH(64, 16) -#if CONFIG_EXT_PARTITION -OBMCVARWXH(32, 128) -OBMCVARWXH(128, 32) -#endif // CONFIG_EXT_PARTITION -#endif // CONFIG_EXT_PARTITION_TYPES //////////////////////////////////////////////////////////////////////////////// // High bit-depth //////////////////////////////////////////////////////////////////////////////// -#if CONFIG_HIGHBITDEPTH static INLINE void hbd_obmc_variance_w4( const uint8_t *pre8, const int pre_stride, const int32_t *wsrc, const int32_t *mask, uint64_t *const sse, int64_t *const sum, const int h) { @@ -278,8 +270,19 @@ static INLINE void highbd_10_obmc_variance(const uint8_t *pre8, int pre_stride, uint64_t sse64 = 0; if (w == 4) { hbd_obmc_variance_w4(pre8, pre_stride, wsrc, mask, &sse64, &sum64, h); - } else { + } else if (w < 128 || h < 128) { hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, w, h); + } else { + assert(w == 128 && h == 128); + + do { + hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, w, + 64); + pre8 += 64 * pre_stride; + wsrc += 64 * w; + mask += 64 * w; + h -= 64; + } while (h > 0); } *sum = (int)ROUND_POWER_OF_TWO(sum64, 2); *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4); @@ -291,28 +294,23 @@ static INLINE void highbd_12_obmc_variance(const uint8_t *pre8, int pre_stride, unsigned int *sse, int *sum) { int64_t sum64 = 0; uint64_t sse64 = 0; - if (w == 128) { - do { - hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, 128, - 32); - pre8 += 32 * pre_stride; - wsrc += 32 * 128; - mask += 32 * 128; - h -= 32; - } while (h > 0); - } else if (w == 64 && h >= 128) { - do { - hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, 64, - 64); - pre8 += 64 * pre_stride; - wsrc += 64 * 64; - mask += 64 * 64; - h -= 64; - } while (h > 0); - } else if (w == 4) { + int max_pel_allowed_per_ovf = 512; + if (w == 4) { hbd_obmc_variance_w4(pre8, pre_stride, wsrc, mask, &sse64, &sum64, h); - } else { + } else if (w * h <= max_pel_allowed_per_ovf) { hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, w, h); + } else { + int h_per_ovf = max_pel_allowed_per_ovf / w; + + assert(max_pel_allowed_per_ovf % w == 0); + do { + hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, w, + h_per_ovf); + pre8 += h_per_ovf * pre_stride; + wsrc += h_per_ovf * w; + mask += h_per_ovf * w; + h -= h_per_ovf; + } while (h > 0); } *sum = (int)ROUND_POWER_OF_TWO(sum64, 4); *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8); @@ -347,11 +345,9 @@ static INLINE void highbd_12_obmc_variance(const uint8_t *pre8, int pre_stride, return (var >= 0) ? (uint32_t)var : 0; \ } -#if CONFIG_EXT_PARTITION HBD_OBMCVARWXH(128, 128) HBD_OBMCVARWXH(128, 64) HBD_OBMCVARWXH(64, 128) -#endif // CONFIG_EXT_PARTITION HBD_OBMCVARWXH(64, 64) HBD_OBMCVARWXH(64, 32) HBD_OBMCVARWXH(32, 64) @@ -365,16 +361,9 @@ HBD_OBMCVARWXH(8, 8) HBD_OBMCVARWXH(8, 4) HBD_OBMCVARWXH(4, 8) HBD_OBMCVARWXH(4, 4) -#if CONFIG_EXT_PARTITION_TYPES HBD_OBMCVARWXH(4, 16) HBD_OBMCVARWXH(16, 4) HBD_OBMCVARWXH(8, 32) HBD_OBMCVARWXH(32, 8) HBD_OBMCVARWXH(16, 64) HBD_OBMCVARWXH(64, 16) -#if CONFIG_EXT_PARTITION -HBD_OBMCVARWXH(32, 128) -HBD_OBMCVARWXH(128, 32) -#endif // CONFIG_EXT_PARTITION -#endif // CONFIG_EXT_PARTITION_TYPES -#endif // CONFIG_HIGHBITDEPTH |