diff options
Diffstat (limited to 'third_party/aom/aom_dsp/variance.c')
-rw-r--r-- | third_party/aom/aom_dsp/variance.c | 365 |
1 files changed, 142 insertions, 223 deletions
diff --git a/third_party/aom/aom_dsp/variance.c b/third_party/aom/aom_dsp/variance.c index 9fc0db783..79677c92f 100644 --- a/third_party/aom/aom_dsp/variance.c +++ b/third_party/aom/aom_dsp/variance.c @@ -18,6 +18,7 @@ #include "aom_dsp/variance.h" #include "aom_dsp/aom_filter.h" +#include "aom_dsp/blend.h" uint32_t aom_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride) { @@ -672,297 +673,215 @@ void aom_highbd_comp_avg_upsampled_pred_c(uint16_t *comp_pred, #endif // CONFIG_HIGHBITDEPTH #if CONFIG_AV1 && CONFIG_EXT_INTER -void masked_variance(const uint8_t *a, int a_stride, const uint8_t *b, - int b_stride, const uint8_t *m, int m_stride, int w, int h, - unsigned int *sse, int *sum) { +void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, + int height, const uint8_t *ref, int ref_stride, + const uint8_t *mask, int mask_stride, + int invert_mask) { int i, j; - int64_t sum64 = 0; - uint64_t sse64 = 0; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const int diff = (a[j] - b[j]) * (m[j]); - sum64 += diff; - sse64 += diff * diff; + for (i = 0; i < height; ++i) { + for (j = 0; j < width; ++j) { + if (!invert_mask) + comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]); + else + comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]); } - - a += a_stride; - b += b_stride; - m += m_stride; + comp_pred += width; + pred += width; + ref += ref_stride; + mask += mask_stride; } - sum64 = (sum64 >= 0) ? sum64 : -sum64; - *sum = (int)ROUND_POWER_OF_TWO(sum64, 6); - *sse = (uint32_t)ROUND_POWER_OF_TWO(sse64, 12); } -#define MASK_VAR(W, H) \ - unsigned int aom_masked_variance##W##x##H##_c( \ - const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, \ - const uint8_t *m, int m_stride, unsigned int *sse) { \ - int sum; \ - masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, &sum); \ - return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \ +void aom_comp_mask_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred, + int width, int height, const uint8_t *ref, + int ref_stride, const uint8_t *mask, + int mask_stride, int invert_mask) { + int i, j; + int stride = ref_stride << 3; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + if (!invert_mask) + comp_pred[j] = AOM_BLEND_A64(mask[j], ref[(j << 3)], pred[j]); + else + comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[(j << 3)]); + } + comp_pred += width; + pred += width; + ref += stride; + mask += mask_stride; } +} #define MASK_SUBPIX_VAR(W, H) \ unsigned int aom_masked_sub_pixel_variance##W##x##H##_c( \ const uint8_t *src, int src_stride, int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \ + const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \ + const uint8_t *msk, int msk_stride, int invert_mask, \ unsigned int *sse) { \ uint16_t fdata3[(H + 1) * W]; \ uint8_t temp2[H * W]; \ + DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \ \ var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \ bilinear_filters_2t[xoffset]); \ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ bilinear_filters_2t[yoffset]); \ \ - return aom_masked_variance##W##x##H##_c(temp2, W, dst, dst_stride, msk, \ - msk_stride, sse); \ + aom_comp_mask_pred_c(temp3, second_pred, W, H, temp2, W, msk, msk_stride, \ + invert_mask); \ + return aom_variance##W##x##H##_c(temp3, W, ref, ref_stride, sse); \ } -MASK_VAR(4, 4) MASK_SUBPIX_VAR(4, 4) - -MASK_VAR(4, 8) MASK_SUBPIX_VAR(4, 8) - -MASK_VAR(8, 4) MASK_SUBPIX_VAR(8, 4) - -MASK_VAR(8, 8) MASK_SUBPIX_VAR(8, 8) - -MASK_VAR(8, 16) MASK_SUBPIX_VAR(8, 16) - -MASK_VAR(16, 8) MASK_SUBPIX_VAR(16, 8) - -MASK_VAR(16, 16) MASK_SUBPIX_VAR(16, 16) - -MASK_VAR(16, 32) MASK_SUBPIX_VAR(16, 32) - -MASK_VAR(32, 16) MASK_SUBPIX_VAR(32, 16) - -MASK_VAR(32, 32) MASK_SUBPIX_VAR(32, 32) - -MASK_VAR(32, 64) MASK_SUBPIX_VAR(32, 64) - -MASK_VAR(64, 32) MASK_SUBPIX_VAR(64, 32) - -MASK_VAR(64, 64) MASK_SUBPIX_VAR(64, 64) - #if CONFIG_EXT_PARTITION -MASK_VAR(64, 128) MASK_SUBPIX_VAR(64, 128) - -MASK_VAR(128, 64) MASK_SUBPIX_VAR(128, 64) - -MASK_VAR(128, 128) MASK_SUBPIX_VAR(128, 128) #endif // CONFIG_EXT_PARTITION #if CONFIG_HIGHBITDEPTH -void highbd_masked_variance64(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, const uint8_t *m, - int m_stride, int w, int h, uint64_t *sse, - int64_t *sum) { +void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8, + int width, int height, const uint8_t *ref8, + int ref_stride, const uint8_t *mask, + int mask_stride, int invert_mask) { int i, j; - uint16_t *a = CONVERT_TO_SHORTPTR(a8); - uint16_t *b = CONVERT_TO_SHORTPTR(b8); - - *sum = 0; - *sse = 0; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const int diff = (a[j] - b[j]) * (m[j]); - *sum += (int64_t)diff; - *sse += (int64_t)diff * diff; + uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + for (i = 0; i < height; ++i) { + for (j = 0; j < width; ++j) { + if (!invert_mask) + comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]); + else + comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]); } - - a += a_stride; - b += b_stride; - m += m_stride; + comp_pred += width; + pred += width; + ref += ref_stride; + mask += mask_stride; } - *sum = (*sum >= 0) ? *sum : -*sum; - *sum = ROUND_POWER_OF_TWO(*sum, 6); - *sse = ROUND_POWER_OF_TWO(*sse, 12); } -void highbd_masked_variance(const uint8_t *a8, int a_stride, const uint8_t *b8, - int b_stride, const uint8_t *m, int m_stride, int w, - int h, unsigned int *sse, int *sum) { - int64_t sum64; - uint64_t sse64; - highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, w, h, - &sse64, &sum64); - *sum = (int)sum64; - *sse = (unsigned int)sse64; -} - -void highbd_10_masked_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - const uint8_t *m, int m_stride, int w, int h, - unsigned int *sse, int *sum) { - int64_t sum64; - uint64_t sse64; - highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, w, h, - &sse64, &sum64); - *sum = (int)ROUND_POWER_OF_TWO(sum64, 2); - *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4); -} - -void highbd_12_masked_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, - const uint8_t *m, int m_stride, int w, int h, - unsigned int *sse, int *sum) { - int64_t sum64; - uint64_t sse64; - highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, w, h, - &sse64, &sum64); - *sum = (int)ROUND_POWER_OF_TWO(sum64, 4); - *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8); -} +void aom_highbd_comp_mask_upsampled_pred_c(uint16_t *comp_pred, + const uint8_t *pred8, int width, + int height, const uint8_t *ref8, + int ref_stride, const uint8_t *mask, + int mask_stride, int invert_mask) { + int i, j; + int stride = ref_stride << 3; -#define HIGHBD_MASK_VAR(W, H) \ - unsigned int aom_highbd_masked_variance##W##x##H##_c( \ - const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, \ - const uint8_t *m, int m_stride, unsigned int *sse) { \ - int sum; \ - highbd_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, \ - &sum); \ - return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \ - } \ - \ - unsigned int aom_highbd_10_masked_variance##W##x##H##_c( \ - const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, \ - const uint8_t *m, int m_stride, unsigned int *sse) { \ - int sum; \ - int64_t var; \ - highbd_10_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, \ - sse, &sum); \ - var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ - return (var >= 0) ? (uint32_t)var : 0; \ - } \ - \ - unsigned int aom_highbd_12_masked_variance##W##x##H##_c( \ - const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, \ - const uint8_t *m, int m_stride, unsigned int *sse) { \ - int sum; \ - int64_t var; \ - highbd_12_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, \ - sse, &sum); \ - var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ - return (var >= 0) ? (uint32_t)var : 0; \ + uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + for (i = 0; i < height; ++i) { + for (j = 0; j < width; ++j) { + if (!invert_mask) + comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j << 3], pred[j]); + else + comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j << 3]); + } + comp_pred += width; + pred += width; + ref += stride; + mask += mask_stride; } +} -#define HIGHBD_MASK_SUBPIX_VAR(W, H) \ - unsigned int aom_highbd_masked_sub_pixel_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \ - unsigned int *sse) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - \ - aom_highbd_var_filter_block2d_bil_first_pass( \ - src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ - aom_highbd_var_filter_block2d_bil_second_pass( \ - fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ - \ - return aom_highbd_masked_variance##W##x##H##_c( \ - CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \ - } \ - \ - unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \ - unsigned int *sse) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - \ - aom_highbd_var_filter_block2d_bil_first_pass( \ - src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ - aom_highbd_var_filter_block2d_bil_second_pass( \ - fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ - \ - return aom_highbd_10_masked_variance##W##x##H##_c( \ - CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \ - } \ - \ - unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \ - unsigned int *sse) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - \ - aom_highbd_var_filter_block2d_bil_first_pass( \ - src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ - aom_highbd_var_filter_block2d_bil_second_pass( \ - fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ - \ - return aom_highbd_12_masked_variance##W##x##H##_c( \ - CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \ +#define HIGHBD_MASK_SUBPIX_VAR(W, H) \ + unsigned int aom_highbd_8_masked_sub_pixel_variance##W##x##H##_c( \ + const uint8_t *src, int src_stride, int xoffset, int yoffset, \ + const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \ + const uint8_t *msk, int msk_stride, int invert_mask, \ + unsigned int *sse) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ + DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ + \ + aom_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ + aom_highbd_var_filter_block2d_bil_second_pass( \ + fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ + \ + aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \ + CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \ + invert_mask); \ + \ + return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ + ref, ref_stride, sse); \ + } \ + \ + unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c( \ + const uint8_t *src, int src_stride, int xoffset, int yoffset, \ + const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \ + const uint8_t *msk, int msk_stride, int invert_mask, \ + unsigned int *sse) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ + DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ + \ + aom_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ + aom_highbd_var_filter_block2d_bil_second_pass( \ + fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ + \ + aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \ + CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \ + invert_mask); \ + \ + return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ + ref, ref_stride, sse); \ + } \ + \ + unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c( \ + const uint8_t *src, int src_stride, int xoffset, int yoffset, \ + const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \ + const uint8_t *msk, int msk_stride, int invert_mask, \ + unsigned int *sse) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ + DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ + \ + aom_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ + aom_highbd_var_filter_block2d_bil_second_pass( \ + fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ + \ + aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \ + CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \ + invert_mask); \ + \ + return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ + ref, ref_stride, sse); \ } -HIGHBD_MASK_VAR(4, 4) HIGHBD_MASK_SUBPIX_VAR(4, 4) - -HIGHBD_MASK_VAR(4, 8) HIGHBD_MASK_SUBPIX_VAR(4, 8) - -HIGHBD_MASK_VAR(8, 4) HIGHBD_MASK_SUBPIX_VAR(8, 4) - -HIGHBD_MASK_VAR(8, 8) HIGHBD_MASK_SUBPIX_VAR(8, 8) - -HIGHBD_MASK_VAR(8, 16) HIGHBD_MASK_SUBPIX_VAR(8, 16) - -HIGHBD_MASK_VAR(16, 8) HIGHBD_MASK_SUBPIX_VAR(16, 8) - -HIGHBD_MASK_VAR(16, 16) HIGHBD_MASK_SUBPIX_VAR(16, 16) - -HIGHBD_MASK_VAR(16, 32) HIGHBD_MASK_SUBPIX_VAR(16, 32) - -HIGHBD_MASK_VAR(32, 16) HIGHBD_MASK_SUBPIX_VAR(32, 16) - -HIGHBD_MASK_VAR(32, 32) HIGHBD_MASK_SUBPIX_VAR(32, 32) - -HIGHBD_MASK_VAR(32, 64) HIGHBD_MASK_SUBPIX_VAR(32, 64) - -HIGHBD_MASK_VAR(64, 32) HIGHBD_MASK_SUBPIX_VAR(64, 32) - -HIGHBD_MASK_VAR(64, 64) HIGHBD_MASK_SUBPIX_VAR(64, 64) - #if CONFIG_EXT_PARTITION -HIGHBD_MASK_VAR(64, 128) HIGHBD_MASK_SUBPIX_VAR(64, 128) - -HIGHBD_MASK_VAR(128, 64) HIGHBD_MASK_SUBPIX_VAR(128, 64) - -HIGHBD_MASK_VAR(128, 128) HIGHBD_MASK_SUBPIX_VAR(128, 128) #endif // CONFIG_EXT_PARTITION #endif // CONFIG_HIGHBITDEPTH |