summaryrefslogtreecommitdiffstats
path: root/third_party/aom/aom_dsp/x86/obmc_variance_sse4.c
diff options
context:
space:
mode:
authortrav90 <travawine@palemoon.org>2018-10-19 21:52:15 -0500
committertrav90 <travawine@palemoon.org>2018-10-19 21:52:20 -0500
commitbbcc64772580c8a979288791afa02d30bc476d2e (patch)
tree437ce94c3fdd7497508e5b55de06c6d011678597 /third_party/aom/aom_dsp/x86/obmc_variance_sse4.c
parent14805f6ddbfb173c327768fff9f81f40ce5e81b0 (diff)
downloadUXP-bbcc64772580c8a979288791afa02d30bc476d2e.tar
UXP-bbcc64772580c8a979288791afa02d30bc476d2e.tar.gz
UXP-bbcc64772580c8a979288791afa02d30bc476d2e.tar.lz
UXP-bbcc64772580c8a979288791afa02d30bc476d2e.tar.xz
UXP-bbcc64772580c8a979288791afa02d30bc476d2e.zip
Update aom to v1.0.0
Update aom to commit id d14c5bb4f336ef1842046089849dee4a301fbbf0.
Diffstat (limited to 'third_party/aom/aom_dsp/x86/obmc_variance_sse4.c')
-rw-r--r--third_party/aom/aom_dsp/x86/obmc_variance_sse4.c69
1 files changed, 29 insertions, 40 deletions
diff --git a/third_party/aom/aom_dsp/x86/obmc_variance_sse4.c b/third_party/aom/aom_dsp/x86/obmc_variance_sse4.c
index 392616af3..571aa770b 100644
--- a/third_party/aom/aom_dsp/x86/obmc_variance_sse4.c
+++ b/third_party/aom/aom_dsp/x86/obmc_variance_sse4.c
@@ -12,7 +12,8 @@
#include <assert.h>
#include <immintrin.h>
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
#include "aom_ports/mem.h"
#include "aom/aom_integer.h"
@@ -128,11 +129,9 @@ static INLINE void obmc_variance_w8n(const uint8_t *pre, const int pre_stride,
return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
}
-#if CONFIG_EXT_PARTITION
OBMCVARWXH(128, 128)
OBMCVARWXH(128, 64)
OBMCVARWXH(64, 128)
-#endif // CONFIG_EXT_PARTITION
OBMCVARWXH(64, 64)
OBMCVARWXH(64, 32)
OBMCVARWXH(32, 64)
@@ -146,24 +145,17 @@ OBMCVARWXH(8, 8)
OBMCVARWXH(8, 4)
OBMCVARWXH(4, 8)
OBMCVARWXH(4, 4)
-#if CONFIG_EXT_PARTITION_TYPES
OBMCVARWXH(4, 16)
OBMCVARWXH(16, 4)
OBMCVARWXH(8, 32)
OBMCVARWXH(32, 8)
OBMCVARWXH(16, 64)
OBMCVARWXH(64, 16)
-#if CONFIG_EXT_PARTITION
-OBMCVARWXH(32, 128)
-OBMCVARWXH(128, 32)
-#endif // CONFIG_EXT_PARTITION
-#endif // CONFIG_EXT_PARTITION_TYPES
////////////////////////////////////////////////////////////////////////////////
// High bit-depth
////////////////////////////////////////////////////////////////////////////////
-#if CONFIG_HIGHBITDEPTH
static INLINE void hbd_obmc_variance_w4(
const uint8_t *pre8, const int pre_stride, const int32_t *wsrc,
const int32_t *mask, uint64_t *const sse, int64_t *const sum, const int h) {
@@ -278,8 +270,19 @@ static INLINE void highbd_10_obmc_variance(const uint8_t *pre8, int pre_stride,
uint64_t sse64 = 0;
if (w == 4) {
hbd_obmc_variance_w4(pre8, pre_stride, wsrc, mask, &sse64, &sum64, h);
- } else {
+ } else if (w < 128 || h < 128) {
hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, w, h);
+ } else {
+ assert(w == 128 && h == 128);
+
+ do {
+ hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, w,
+ 64);
+ pre8 += 64 * pre_stride;
+ wsrc += 64 * w;
+ mask += 64 * w;
+ h -= 64;
+ } while (h > 0);
}
*sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
*sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
@@ -291,28 +294,23 @@ static INLINE void highbd_12_obmc_variance(const uint8_t *pre8, int pre_stride,
unsigned int *sse, int *sum) {
int64_t sum64 = 0;
uint64_t sse64 = 0;
- if (w == 128) {
- do {
- hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, 128,
- 32);
- pre8 += 32 * pre_stride;
- wsrc += 32 * 128;
- mask += 32 * 128;
- h -= 32;
- } while (h > 0);
- } else if (w == 64 && h >= 128) {
- do {
- hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, 64,
- 64);
- pre8 += 64 * pre_stride;
- wsrc += 64 * 64;
- mask += 64 * 64;
- h -= 64;
- } while (h > 0);
- } else if (w == 4) {
+ int max_pel_allowed_per_ovf = 512;
+ if (w == 4) {
hbd_obmc_variance_w4(pre8, pre_stride, wsrc, mask, &sse64, &sum64, h);
- } else {
+ } else if (w * h <= max_pel_allowed_per_ovf) {
hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, w, h);
+ } else {
+ int h_per_ovf = max_pel_allowed_per_ovf / w;
+
+ assert(max_pel_allowed_per_ovf % w == 0);
+ do {
+ hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, w,
+ h_per_ovf);
+ pre8 += h_per_ovf * pre_stride;
+ wsrc += h_per_ovf * w;
+ mask += h_per_ovf * w;
+ h -= h_per_ovf;
+ } while (h > 0);
}
*sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
*sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
@@ -347,11 +345,9 @@ static INLINE void highbd_12_obmc_variance(const uint8_t *pre8, int pre_stride,
return (var >= 0) ? (uint32_t)var : 0; \
}
-#if CONFIG_EXT_PARTITION
HBD_OBMCVARWXH(128, 128)
HBD_OBMCVARWXH(128, 64)
HBD_OBMCVARWXH(64, 128)
-#endif // CONFIG_EXT_PARTITION
HBD_OBMCVARWXH(64, 64)
HBD_OBMCVARWXH(64, 32)
HBD_OBMCVARWXH(32, 64)
@@ -365,16 +361,9 @@ HBD_OBMCVARWXH(8, 8)
HBD_OBMCVARWXH(8, 4)
HBD_OBMCVARWXH(4, 8)
HBD_OBMCVARWXH(4, 4)
-#if CONFIG_EXT_PARTITION_TYPES
HBD_OBMCVARWXH(4, 16)
HBD_OBMCVARWXH(16, 4)
HBD_OBMCVARWXH(8, 32)
HBD_OBMCVARWXH(32, 8)
HBD_OBMCVARWXH(16, 64)
HBD_OBMCVARWXH(64, 16)
-#if CONFIG_EXT_PARTITION
-HBD_OBMCVARWXH(32, 128)
-HBD_OBMCVARWXH(128, 32)
-#endif // CONFIG_EXT_PARTITION
-#endif // CONFIG_EXT_PARTITION_TYPES
-#endif // CONFIG_HIGHBITDEPTH