summaryrefslogtreecommitdiffstats
path: root/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c')
-rw-r--r--third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c28
1 files changed, 24 insertions, 4 deletions
diff --git a/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c b/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c
index be9d437d2..24e7ed1c6 100644
--- a/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c
+++ b/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c
@@ -126,6 +126,12 @@ MASK_SUBPIX_VAR8XH_SSSE3(8)
MASK_SUBPIX_VAR8XH_SSSE3(4)
MASK_SUBPIX_VAR4XH_SSSE3(8)
MASK_SUBPIX_VAR4XH_SSSE3(4)
+#if CONFIG_EXT_PARTITION_TYPES
+MASK_SUBPIX_VAR4XH_SSSE3(16)
+MASK_SUBPIX_VAR_SSSE3(16, 4)
+MASK_SUBPIX_VAR8XH_SSSE3(32)
+MASK_SUBPIX_VAR_SSSE3(32, 8)
+#endif
static INLINE __m128i filter_block(const __m128i a, const __m128i b,
const __m128i filter) {
@@ -564,6 +570,7 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride,
const uint8_t *msk, int msk_stride, int invert_mask, uint32_t *sse) { \
uint64_t sse64; \
int sum; \
+ int64_t var; \
uint16_t temp[(H + 1) * W]; \
const uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
@@ -579,7 +586,8 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride,
msk_stride, W, H, &sse64, &sum); \
*sse = (uint32_t)ROUND_POWER_OF_TWO(sse64, 4); \
sum = ROUND_POWER_OF_TWO(sum, 2); \
- return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
+ var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
+ return (var >= 0) ? (uint32_t)var : 0; \
} \
unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_ssse3( \
const uint8_t *src8, int src_stride, int xoffset, int yoffset, \
@@ -587,6 +595,7 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride,
const uint8_t *msk, int msk_stride, int invert_mask, uint32_t *sse) { \
uint64_t sse64; \
int sum; \
+ int64_t var; \
uint16_t temp[(H + 1) * W]; \
const uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
@@ -602,7 +611,8 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride,
msk_stride, W, H, &sse64, &sum); \
*sse = (uint32_t)ROUND_POWER_OF_TWO(sse64, 8); \
sum = ROUND_POWER_OF_TWO(sum, 4); \
- return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
+ var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
+ return (var >= 0) ? (uint32_t)var : 0; \
}
#define HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(H) \
@@ -634,6 +644,7 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride,
const uint8_t *msk, int msk_stride, int invert_mask, uint32_t *sse) { \
int sse_; \
int sum; \
+ int64_t var; \
uint16_t temp[(H + 1) * 4]; \
const uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
@@ -649,7 +660,8 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride,
msk_stride, H, &sse_, &sum); \
*sse = (uint32_t)ROUND_POWER_OF_TWO(sse_, 4); \
sum = ROUND_POWER_OF_TWO(sum, 2); \
- return *sse - (uint32_t)(((int64_t)sum * sum) / (4 * H)); \
+ var = (int64_t)(*sse) - (((int64_t)sum * sum) / (4 * H)); \
+ return (var >= 0) ? (uint32_t)var : 0; \
} \
unsigned int aom_highbd_12_masked_sub_pixel_variance4x##H##_ssse3( \
const uint8_t *src8, int src_stride, int xoffset, int yoffset, \
@@ -657,6 +669,7 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride,
const uint8_t *msk, int msk_stride, int invert_mask, uint32_t *sse) { \
int sse_; \
int sum; \
+ int64_t var; \
uint16_t temp[(H + 1) * 4]; \
const uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
@@ -672,7 +685,8 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride,
msk_stride, H, &sse_, &sum); \
*sse = (uint32_t)ROUND_POWER_OF_TWO(sse_, 8); \
sum = ROUND_POWER_OF_TWO(sum, 4); \
- return *sse - (uint32_t)(((int64_t)sum * sum) / (4 * H)); \
+ var = (int64_t)(*sse) - (((int64_t)sum * sum) / (4 * H)); \
+ return (var >= 0) ? (uint32_t)var : 0; \
}
#if CONFIG_EXT_PARTITION
@@ -693,6 +707,12 @@ HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 8)
HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 4)
HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(8)
HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(4)
+#if CONFIG_EXT_PARTITION_TYPES
+HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(16)
+HIGHBD_MASK_SUBPIX_VAR_SSSE3(16, 4)
+HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 32)
+HIGHBD_MASK_SUBPIX_VAR_SSSE3(32, 8)
+#endif
static INLINE __m128i highbd_filter_block(const __m128i a, const __m128i b,
const __m128i filter) {