diff options
author | trav90 <travawine@palemoon.org> | 2018-10-18 06:04:57 -0500 |
---|---|---|
committer | trav90 <travawine@palemoon.org> | 2018-10-18 06:04:57 -0500 |
commit | 7369c7d7a5eed32963d8af37658286617919f91c (patch) | |
tree | 5397ce7ee9bca1641118fdc3187bd9e2b24fdc9c /third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c | |
parent | 77887af9c4ad1420bbdb33984af4f74b55ca59db (diff) | |
download | UXP-7369c7d7a5eed32963d8af37658286617919f91c.tar UXP-7369c7d7a5eed32963d8af37658286617919f91c.tar.gz UXP-7369c7d7a5eed32963d8af37658286617919f91c.tar.lz UXP-7369c7d7a5eed32963d8af37658286617919f91c.tar.xz UXP-7369c7d7a5eed32963d8af37658286617919f91c.zip |
Update aom to commit id f5bdeac22930ff4c6b219be49c843db35970b918
Diffstat (limited to 'third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c')
-rw-r--r-- | third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c | 28 |
1 files changed, 24 insertions, 4 deletions
diff --git a/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c b/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c index be9d437d2..24e7ed1c6 100644 --- a/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c +++ b/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c @@ -126,6 +126,12 @@ MASK_SUBPIX_VAR8XH_SSSE3(8) MASK_SUBPIX_VAR8XH_SSSE3(4) MASK_SUBPIX_VAR4XH_SSSE3(8) MASK_SUBPIX_VAR4XH_SSSE3(4) +#if CONFIG_EXT_PARTITION_TYPES +MASK_SUBPIX_VAR4XH_SSSE3(16) +MASK_SUBPIX_VAR_SSSE3(16, 4) +MASK_SUBPIX_VAR8XH_SSSE3(32) +MASK_SUBPIX_VAR_SSSE3(32, 8) +#endif static INLINE __m128i filter_block(const __m128i a, const __m128i b, const __m128i filter) { @@ -564,6 +570,7 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride, const uint8_t *msk, int msk_stride, int invert_mask, uint32_t *sse) { \ uint64_t sse64; \ int sum; \ + int64_t var; \ uint16_t temp[(H + 1) * W]; \ const uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ @@ -579,7 +586,8 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride, msk_stride, W, H, &sse64, &sum); \ *sse = (uint32_t)ROUND_POWER_OF_TWO(sse64, 4); \ sum = ROUND_POWER_OF_TWO(sum, 2); \ - return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ + var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ + return (var >= 0) ? (uint32_t)var : 0; \ } \ unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_ssse3( \ const uint8_t *src8, int src_stride, int xoffset, int yoffset, \ @@ -587,6 +595,7 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride, const uint8_t *msk, int msk_stride, int invert_mask, uint32_t *sse) { \ uint64_t sse64; \ int sum; \ + int64_t var; \ uint16_t temp[(H + 1) * W]; \ const uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ @@ -602,7 +611,8 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride, msk_stride, W, H, &sse64, &sum); \ *sse = (uint32_t)ROUND_POWER_OF_TWO(sse64, 8); \ sum = ROUND_POWER_OF_TWO(sum, 4); \ - return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ + var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ + return (var >= 0) ? (uint32_t)var : 0; \ } #define HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(H) \ @@ -634,6 +644,7 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride, const uint8_t *msk, int msk_stride, int invert_mask, uint32_t *sse) { \ int sse_; \ int sum; \ + int64_t var; \ uint16_t temp[(H + 1) * 4]; \ const uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ @@ -649,7 +660,8 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride, msk_stride, H, &sse_, &sum); \ *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_, 4); \ sum = ROUND_POWER_OF_TWO(sum, 2); \ - return *sse - (uint32_t)(((int64_t)sum * sum) / (4 * H)); \ + var = (int64_t)(*sse) - (((int64_t)sum * sum) / (4 * H)); \ + return (var >= 0) ? (uint32_t)var : 0; \ } \ unsigned int aom_highbd_12_masked_sub_pixel_variance4x##H##_ssse3( \ const uint8_t *src8, int src_stride, int xoffset, int yoffset, \ @@ -657,6 +669,7 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride, const uint8_t *msk, int msk_stride, int invert_mask, uint32_t *sse) { \ int sse_; \ int sum; \ + int64_t var; \ uint16_t temp[(H + 1) * 4]; \ const uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ @@ -672,7 +685,8 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride, msk_stride, H, &sse_, &sum); \ *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_, 8); \ sum = ROUND_POWER_OF_TWO(sum, 4); \ - return *sse - (uint32_t)(((int64_t)sum * sum) / (4 * H)); \ + var = (int64_t)(*sse) - (((int64_t)sum * sum) / (4 * H)); \ + return (var >= 0) ? (uint32_t)var : 0; \ } #if CONFIG_EXT_PARTITION @@ -693,6 +707,12 @@ HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 8) HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 4) HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(8) HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(4) +#if CONFIG_EXT_PARTITION_TYPES +HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(16) +HIGHBD_MASK_SUBPIX_VAR_SSSE3(16, 4) +HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 32) +HIGHBD_MASK_SUBPIX_VAR_SSSE3(32, 8) +#endif static INLINE __m128i highbd_filter_block(const __m128i a, const __m128i b, const __m128i filter) { |