summaryrefslogtreecommitdiffstats
path: root/third_party/aom/aom_dsp/variance.c
diff options
context:
space:
mode:
authortrav90 <travawine@palemoon.org>2018-10-17 05:59:08 -0500
committertrav90 <travawine@palemoon.org>2018-10-17 05:59:08 -0500
commitdf9477dfa60ebb5d31bc142e58ce46535c17abce (patch)
treec4fdd5d1b09d08c0514f208246260fc87372cb56 /third_party/aom/aom_dsp/variance.c
parent0cc51bc106250988cc3b89cb5d743a5af52cd35a (diff)
downloadUXP-df9477dfa60ebb5d31bc142e58ce46535c17abce.tar
UXP-df9477dfa60ebb5d31bc142e58ce46535c17abce.tar.gz
UXP-df9477dfa60ebb5d31bc142e58ce46535c17abce.tar.lz
UXP-df9477dfa60ebb5d31bc142e58ce46535c17abce.tar.xz
UXP-df9477dfa60ebb5d31bc142e58ce46535c17abce.zip
Update aom to slightly newer commit ID
Diffstat (limited to 'third_party/aom/aom_dsp/variance.c')
-rw-r--r--third_party/aom/aom_dsp/variance.c365
1 files changed, 142 insertions, 223 deletions
diff --git a/third_party/aom/aom_dsp/variance.c b/third_party/aom/aom_dsp/variance.c
index 9fc0db783..79677c92f 100644
--- a/third_party/aom/aom_dsp/variance.c
+++ b/third_party/aom/aom_dsp/variance.c
@@ -18,6 +18,7 @@
#include "aom_dsp/variance.h"
#include "aom_dsp/aom_filter.h"
+#include "aom_dsp/blend.h"
uint32_t aom_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b,
int b_stride) {
@@ -672,297 +673,215 @@ void aom_highbd_comp_avg_upsampled_pred_c(uint16_t *comp_pred,
#endif // CONFIG_HIGHBITDEPTH
#if CONFIG_AV1 && CONFIG_EXT_INTER
-void masked_variance(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, const uint8_t *m, int m_stride, int w, int h,
- unsigned int *sse, int *sum) {
+void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
+ int height, const uint8_t *ref, int ref_stride,
+ const uint8_t *mask, int mask_stride,
+ int invert_mask) {
int i, j;
- int64_t sum64 = 0;
- uint64_t sse64 = 0;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const int diff = (a[j] - b[j]) * (m[j]);
- sum64 += diff;
- sse64 += diff * diff;
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
+ if (!invert_mask)
+ comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]);
+ else
+ comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]);
}
-
- a += a_stride;
- b += b_stride;
- m += m_stride;
+ comp_pred += width;
+ pred += width;
+ ref += ref_stride;
+ mask += mask_stride;
}
- sum64 = (sum64 >= 0) ? sum64 : -sum64;
- *sum = (int)ROUND_POWER_OF_TWO(sum64, 6);
- *sse = (uint32_t)ROUND_POWER_OF_TWO(sse64, 12);
}
-#define MASK_VAR(W, H) \
- unsigned int aom_masked_variance##W##x##H##_c( \
- const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, \
- const uint8_t *m, int m_stride, unsigned int *sse) { \
- int sum; \
- masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, &sum); \
- return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
+void aom_comp_mask_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
+ int width, int height, const uint8_t *ref,
+ int ref_stride, const uint8_t *mask,
+ int mask_stride, int invert_mask) {
+ int i, j;
+ int stride = ref_stride << 3;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++) {
+ if (!invert_mask)
+ comp_pred[j] = AOM_BLEND_A64(mask[j], ref[(j << 3)], pred[j]);
+ else
+ comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[(j << 3)]);
+ }
+ comp_pred += width;
+ pred += width;
+ ref += stride;
+ mask += mask_stride;
}
+}
#define MASK_SUBPIX_VAR(W, H) \
unsigned int aom_masked_sub_pixel_variance##W##x##H##_c( \
const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
+ const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
+ const uint8_t *msk, int msk_stride, int invert_mask, \
unsigned int *sse) { \
uint16_t fdata3[(H + 1) * W]; \
uint8_t temp2[H * W]; \
+ DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
\
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
bilinear_filters_2t[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
bilinear_filters_2t[yoffset]); \
\
- return aom_masked_variance##W##x##H##_c(temp2, W, dst, dst_stride, msk, \
- msk_stride, sse); \
+ aom_comp_mask_pred_c(temp3, second_pred, W, H, temp2, W, msk, msk_stride, \
+ invert_mask); \
+ return aom_variance##W##x##H##_c(temp3, W, ref, ref_stride, sse); \
}
-MASK_VAR(4, 4)
MASK_SUBPIX_VAR(4, 4)
-
-MASK_VAR(4, 8)
MASK_SUBPIX_VAR(4, 8)
-
-MASK_VAR(8, 4)
MASK_SUBPIX_VAR(8, 4)
-
-MASK_VAR(8, 8)
MASK_SUBPIX_VAR(8, 8)
-
-MASK_VAR(8, 16)
MASK_SUBPIX_VAR(8, 16)
-
-MASK_VAR(16, 8)
MASK_SUBPIX_VAR(16, 8)
-
-MASK_VAR(16, 16)
MASK_SUBPIX_VAR(16, 16)
-
-MASK_VAR(16, 32)
MASK_SUBPIX_VAR(16, 32)
-
-MASK_VAR(32, 16)
MASK_SUBPIX_VAR(32, 16)
-
-MASK_VAR(32, 32)
MASK_SUBPIX_VAR(32, 32)
-
-MASK_VAR(32, 64)
MASK_SUBPIX_VAR(32, 64)
-
-MASK_VAR(64, 32)
MASK_SUBPIX_VAR(64, 32)
-
-MASK_VAR(64, 64)
MASK_SUBPIX_VAR(64, 64)
-
#if CONFIG_EXT_PARTITION
-MASK_VAR(64, 128)
MASK_SUBPIX_VAR(64, 128)
-
-MASK_VAR(128, 64)
MASK_SUBPIX_VAR(128, 64)
-
-MASK_VAR(128, 128)
MASK_SUBPIX_VAR(128, 128)
#endif // CONFIG_EXT_PARTITION
#if CONFIG_HIGHBITDEPTH
-void highbd_masked_variance64(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride, const uint8_t *m,
- int m_stride, int w, int h, uint64_t *sse,
- int64_t *sum) {
+void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
+ int width, int height, const uint8_t *ref8,
+ int ref_stride, const uint8_t *mask,
+ int mask_stride, int invert_mask) {
int i, j;
- uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- uint16_t *b = CONVERT_TO_SHORTPTR(b8);
-
- *sum = 0;
- *sse = 0;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const int diff = (a[j] - b[j]) * (m[j]);
- *sum += (int64_t)diff;
- *sse += (int64_t)diff * diff;
+ uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
+ if (!invert_mask)
+ comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]);
+ else
+ comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]);
}
-
- a += a_stride;
- b += b_stride;
- m += m_stride;
+ comp_pred += width;
+ pred += width;
+ ref += ref_stride;
+ mask += mask_stride;
}
- *sum = (*sum >= 0) ? *sum : -*sum;
- *sum = ROUND_POWER_OF_TWO(*sum, 6);
- *sse = ROUND_POWER_OF_TWO(*sse, 12);
}
-void highbd_masked_variance(const uint8_t *a8, int a_stride, const uint8_t *b8,
- int b_stride, const uint8_t *m, int m_stride, int w,
- int h, unsigned int *sse, int *sum) {
- int64_t sum64;
- uint64_t sse64;
- highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, w, h,
- &sse64, &sum64);
- *sum = (int)sum64;
- *sse = (unsigned int)sse64;
-}
-
-void highbd_10_masked_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- const uint8_t *m, int m_stride, int w, int h,
- unsigned int *sse, int *sum) {
- int64_t sum64;
- uint64_t sse64;
- highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, w, h,
- &sse64, &sum64);
- *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
- *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
-}
-
-void highbd_12_masked_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- const uint8_t *m, int m_stride, int w, int h,
- unsigned int *sse, int *sum) {
- int64_t sum64;
- uint64_t sse64;
- highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, w, h,
- &sse64, &sum64);
- *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
- *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
-}
+void aom_highbd_comp_mask_upsampled_pred_c(uint16_t *comp_pred,
+ const uint8_t *pred8, int width,
+ int height, const uint8_t *ref8,
+ int ref_stride, const uint8_t *mask,
+ int mask_stride, int invert_mask) {
+ int i, j;
+ int stride = ref_stride << 3;
-#define HIGHBD_MASK_VAR(W, H) \
- unsigned int aom_highbd_masked_variance##W##x##H##_c( \
- const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, \
- const uint8_t *m, int m_stride, unsigned int *sse) { \
- int sum; \
- highbd_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, \
- &sum); \
- return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
- } \
- \
- unsigned int aom_highbd_10_masked_variance##W##x##H##_c( \
- const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, \
- const uint8_t *m, int m_stride, unsigned int *sse) { \
- int sum; \
- int64_t var; \
- highbd_10_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, \
- sse, &sum); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
- return (var >= 0) ? (uint32_t)var : 0; \
- } \
- \
- unsigned int aom_highbd_12_masked_variance##W##x##H##_c( \
- const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, \
- const uint8_t *m, int m_stride, unsigned int *sse) { \
- int sum; \
- int64_t var; \
- highbd_12_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, \
- sse, &sum); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
- return (var >= 0) ? (uint32_t)var : 0; \
+ uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
+ if (!invert_mask)
+ comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j << 3], pred[j]);
+ else
+ comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j << 3]);
+ }
+ comp_pred += width;
+ pred += width;
+ ref += stride;
+ mask += mask_stride;
}
+}
-#define HIGHBD_MASK_SUBPIX_VAR(W, H) \
- unsigned int aom_highbd_masked_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
- unsigned int *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- return aom_highbd_masked_variance##W##x##H##_c( \
- CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
- } \
- \
- unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
- unsigned int *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- return aom_highbd_10_masked_variance##W##x##H##_c( \
- CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
- } \
- \
- unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
- unsigned int *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- return aom_highbd_12_masked_variance##W##x##H##_c( \
- CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
+#define HIGHBD_MASK_SUBPIX_VAR(W, H) \
+ unsigned int aom_highbd_8_masked_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *src, int src_stride, int xoffset, int yoffset, \
+ const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
+ const uint8_t *msk, int msk_stride, int invert_mask, \
+ unsigned int *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
+ \
+ aom_highbd_var_filter_block2d_bil_first_pass( \
+ src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
+ aom_highbd_var_filter_block2d_bil_second_pass( \
+ fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
+ \
+ aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \
+ CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
+ invert_mask); \
+ \
+ return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
+ ref, ref_stride, sse); \
+ } \
+ \
+ unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *src, int src_stride, int xoffset, int yoffset, \
+ const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
+ const uint8_t *msk, int msk_stride, int invert_mask, \
+ unsigned int *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
+ \
+ aom_highbd_var_filter_block2d_bil_first_pass( \
+ src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
+ aom_highbd_var_filter_block2d_bil_second_pass( \
+ fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
+ \
+ aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \
+ CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
+ invert_mask); \
+ \
+ return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
+ ref, ref_stride, sse); \
+ } \
+ \
+ unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *src, int src_stride, int xoffset, int yoffset, \
+ const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
+ const uint8_t *msk, int msk_stride, int invert_mask, \
+ unsigned int *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
+ \
+ aom_highbd_var_filter_block2d_bil_first_pass( \
+ src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
+ aom_highbd_var_filter_block2d_bil_second_pass( \
+ fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
+ \
+ aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \
+ CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
+ invert_mask); \
+ \
+ return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
+ ref, ref_stride, sse); \
}
-HIGHBD_MASK_VAR(4, 4)
HIGHBD_MASK_SUBPIX_VAR(4, 4)
-
-HIGHBD_MASK_VAR(4, 8)
HIGHBD_MASK_SUBPIX_VAR(4, 8)
-
-HIGHBD_MASK_VAR(8, 4)
HIGHBD_MASK_SUBPIX_VAR(8, 4)
-
-HIGHBD_MASK_VAR(8, 8)
HIGHBD_MASK_SUBPIX_VAR(8, 8)
-
-HIGHBD_MASK_VAR(8, 16)
HIGHBD_MASK_SUBPIX_VAR(8, 16)
-
-HIGHBD_MASK_VAR(16, 8)
HIGHBD_MASK_SUBPIX_VAR(16, 8)
-
-HIGHBD_MASK_VAR(16, 16)
HIGHBD_MASK_SUBPIX_VAR(16, 16)
-
-HIGHBD_MASK_VAR(16, 32)
HIGHBD_MASK_SUBPIX_VAR(16, 32)
-
-HIGHBD_MASK_VAR(32, 16)
HIGHBD_MASK_SUBPIX_VAR(32, 16)
-
-HIGHBD_MASK_VAR(32, 32)
HIGHBD_MASK_SUBPIX_VAR(32, 32)
-
-HIGHBD_MASK_VAR(32, 64)
HIGHBD_MASK_SUBPIX_VAR(32, 64)
-
-HIGHBD_MASK_VAR(64, 32)
HIGHBD_MASK_SUBPIX_VAR(64, 32)
-
-HIGHBD_MASK_VAR(64, 64)
HIGHBD_MASK_SUBPIX_VAR(64, 64)
-
#if CONFIG_EXT_PARTITION
-HIGHBD_MASK_VAR(64, 128)
HIGHBD_MASK_SUBPIX_VAR(64, 128)
-
-HIGHBD_MASK_VAR(128, 64)
HIGHBD_MASK_SUBPIX_VAR(128, 64)
-
-HIGHBD_MASK_VAR(128, 128)
HIGHBD_MASK_SUBPIX_VAR(128, 128)
#endif // CONFIG_EXT_PARTITION
#endif // CONFIG_HIGHBITDEPTH