diff options
Diffstat (limited to 'third_party/aom/aom_dsp/variance.c')
-rw-r--r-- | third_party/aom/aom_dsp/variance.c | 218 |
1 files changed, 171 insertions, 47 deletions
diff --git a/third_party/aom/aom_dsp/variance.c b/third_party/aom/aom_dsp/variance.c index 79677c92f..a4c3616e7 100644 --- a/third_party/aom/aom_dsp/variance.c +++ b/third_party/aom/aom_dsp/variance.c @@ -9,6 +9,8 @@ * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ #include <stdlib.h> +#include <string.h> +#include <assert.h> #include "./aom_config.h" #include "./aom_dsp_rtcd.h" @@ -20,6 +22,9 @@ #include "aom_dsp/aom_filter.h" #include "aom_dsp/blend.h" +#include "./av1_rtcd.h" +#include "av1/common/filter.h" + uint32_t aom_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride) { int distortion = 0; @@ -246,6 +251,13 @@ VARIANCES(4, 2) VARIANCES(2, 4) VARIANCES(2, 2) +#if CONFIG_AV1 && CONFIG_EXT_PARTITION_TYPES +VARIANCES(4, 16) +VARIANCES(16, 4) +VARIANCES(8, 32) +VARIANCES(32, 8) +#endif + GET_VAR(16, 16) GET_VAR(8, 8) @@ -271,33 +283,66 @@ void aom_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, // Get pred block from up-sampled reference. void aom_upsampled_pred_c(uint8_t *comp_pred, int width, int height, - const uint8_t *ref, int ref_stride) { - int i, j, k; - int stride = ref_stride << 3; - - for (i = 0; i < height; i++) { - for (j = 0, k = 0; j < width; j++, k += 8) { - comp_pred[j] = ref[k]; + int subpel_x_q3, int subpel_y_q3, const uint8_t *ref, + int ref_stride) { + if (!subpel_x_q3 && !subpel_y_q3) { + int i; + for (i = 0; i < height; i++) { + memcpy(comp_pred, ref, width * sizeof(*comp_pred)); + comp_pred += width; + ref += ref_stride; + } + } else { + InterpFilterParams filter; + filter = av1_get_interp_filter_params(EIGHTTAP_REGULAR); + if (!subpel_y_q3) { + const int16_t *kernel; + kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1); + /*Directly call C version to allow this to work for small (2x2) sizes.*/ + aom_convolve8_horiz_c(ref, ref_stride, comp_pred, width, kernel, 16, NULL, + -1, width, height); + } else if (!subpel_x_q3) { + const int16_t *kernel; + kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1); + /*Directly call C version to allow this to work for small (2x2) sizes.*/ + aom_convolve8_vert_c(ref, ref_stride, comp_pred, width, NULL, -1, kernel, + 16, width, height); + } else { + DECLARE_ALIGNED(16, uint8_t, + temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]); + const int16_t *kernel_x; + const int16_t *kernel_y; + int intermediate_height; + kernel_x = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1); + kernel_y = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1); + intermediate_height = + (((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps; + assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16); + /*Directly call C versions to allow this to work for small (2x2) sizes.*/ + aom_convolve8_horiz_c(ref - ref_stride * ((filter.taps >> 1) - 1), + ref_stride, temp, MAX_SB_SIZE, kernel_x, 16, NULL, + -1, width, intermediate_height); + aom_convolve8_vert_c(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1), + MAX_SB_SIZE, comp_pred, width, NULL, -1, kernel_y, + 16, width, height); } - comp_pred += width; - ref += stride; } } void aom_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred, - int width, int height, const uint8_t *ref, + int width, int height, int subpel_x_q3, + int subpel_y_q3, const uint8_t *ref, int ref_stride) { int i, j; - int stride = ref_stride << 3; + aom_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3, ref, + ref_stride); for (i = 0; i < height; i++) { for (j = 0; j < width; j++) { - const int tmp = ref[(j << 3)] + pred[j]; - comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); + comp_pred[j] = ROUND_POWER_OF_TWO(comp_pred[j] + pred[j], 1); } comp_pred += width; pred += width; - ref += stride; } } @@ -611,6 +656,13 @@ HIGHBD_VARIANCES(4, 2) HIGHBD_VARIANCES(2, 4) HIGHBD_VARIANCES(2, 2) +#if CONFIG_AV1 && CONFIG_EXT_PARTITION_TYPES +HIGHBD_VARIANCES(4, 16) +HIGHBD_VARIANCES(16, 4) +HIGHBD_VARIANCES(8, 32) +HIGHBD_VARIANCES(32, 8) +#endif + HIGHBD_GET_VAR(8) HIGHBD_GET_VAR(16) @@ -637,37 +689,74 @@ void aom_highbd_comp_avg_pred_c(uint16_t *comp_pred, const uint8_t *pred8, } void aom_highbd_upsampled_pred_c(uint16_t *comp_pred, int width, int height, - const uint8_t *ref8, int ref_stride) { - int i, j; - int stride = ref_stride << 3; - - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - for (i = 0; i < height; ++i) { - for (j = 0; j < width; ++j) { - comp_pred[j] = ref[(j << 3)]; + int subpel_x_q3, int subpel_y_q3, + const uint8_t *ref8, int ref_stride, int bd) { + if (!subpel_x_q3 && !subpel_y_q3) { + const uint16_t *ref; + int i; + ref = CONVERT_TO_SHORTPTR(ref8); + for (i = 0; i < height; i++) { + memcpy(comp_pred, ref, width * sizeof(*comp_pred)); + comp_pred += width; + ref += ref_stride; + } + } else { + InterpFilterParams filter; + filter = av1_get_interp_filter_params(EIGHTTAP_REGULAR); + if (!subpel_y_q3) { + const int16_t *kernel; + kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1); + /*Directly call C version to allow this to work for small (2x2) sizes.*/ + aom_highbd_convolve8_horiz_c(ref8, ref_stride, + CONVERT_TO_BYTEPTR(comp_pred), width, kernel, + 16, NULL, -1, width, height, bd); + } else if (!subpel_x_q3) { + const int16_t *kernel; + kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1); + /*Directly call C version to allow this to work for small (2x2) sizes.*/ + aom_highbd_convolve8_vert_c(ref8, ref_stride, + CONVERT_TO_BYTEPTR(comp_pred), width, NULL, + -1, kernel, 16, width, height, bd); + } else { + DECLARE_ALIGNED(16, uint16_t, + temp[((MAX_SB_SIZE + 16) + 16) * MAX_SB_SIZE]); + const int16_t *kernel_x; + const int16_t *kernel_y; + int intermediate_height; + kernel_x = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1); + kernel_y = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1); + intermediate_height = + (((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps; + assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16); + /*Directly call C versions to allow this to work for small (2x2) sizes.*/ + aom_highbd_convolve8_horiz_c(ref8 - ref_stride * ((filter.taps >> 1) - 1), + ref_stride, CONVERT_TO_BYTEPTR(temp), + MAX_SB_SIZE, kernel_x, 16, NULL, -1, width, + intermediate_height, bd); + aom_highbd_convolve8_vert_c( + CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1)), + MAX_SB_SIZE, CONVERT_TO_BYTEPTR(comp_pred), width, NULL, -1, kernel_y, + 16, width, height, bd); } - comp_pred += width; - ref += stride; } } void aom_highbd_comp_avg_upsampled_pred_c(uint16_t *comp_pred, const uint8_t *pred8, int width, - int height, const uint8_t *ref8, - int ref_stride) { + int height, int subpel_x_q3, + int subpel_y_q3, const uint8_t *ref8, + int ref_stride, int bd) { int i, j; - int stride = ref_stride << 3; - uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + const uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); + aom_highbd_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3, + ref8, ref_stride, bd); for (i = 0; i < height; ++i) { for (j = 0; j < width; ++j) { - const int tmp = pred[j] + ref[(j << 3)]; - comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); + comp_pred[j] = ROUND_POWER_OF_TWO(pred[j] + comp_pred[j], 1); } comp_pred += width; pred += width; - ref += stride; } } #endif // CONFIG_HIGHBITDEPTH @@ -694,22 +783,23 @@ void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, } void aom_comp_mask_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred, - int width, int height, const uint8_t *ref, + int width, int height, int subpel_x_q3, + int subpel_y_q3, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask) { int i, j; - int stride = ref_stride << 3; + aom_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3, ref, + ref_stride); for (i = 0; i < height; i++) { for (j = 0; j < width; j++) { if (!invert_mask) - comp_pred[j] = AOM_BLEND_A64(mask[j], ref[(j << 3)], pred[j]); + comp_pred[j] = AOM_BLEND_A64(mask[j], comp_pred[j], pred[j]); else - comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[(j << 3)]); + comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], comp_pred[j]); } comp_pred += width; pred += width; - ref += stride; mask += mask_stride; } } @@ -753,6 +843,13 @@ MASK_SUBPIX_VAR(128, 64) MASK_SUBPIX_VAR(128, 128) #endif // CONFIG_EXT_PARTITION +#if CONFIG_EXT_PARTITION_TYPES +MASK_SUBPIX_VAR(4, 16) +MASK_SUBPIX_VAR(16, 4) +MASK_SUBPIX_VAR(8, 32) +MASK_SUBPIX_VAR(32, 8) +#endif + #if CONFIG_HIGHBITDEPTH void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, @@ -775,26 +872,24 @@ void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8, } } -void aom_highbd_comp_mask_upsampled_pred_c(uint16_t *comp_pred, - const uint8_t *pred8, int width, - int height, const uint8_t *ref8, - int ref_stride, const uint8_t *mask, - int mask_stride, int invert_mask) { +void aom_highbd_comp_mask_upsampled_pred_c( + uint16_t *comp_pred, const uint8_t *pred8, int width, int height, + int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, int ref_stride, + const uint8_t *mask, int mask_stride, int invert_mask, int bd) { int i, j; - int stride = ref_stride << 3; uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + aom_highbd_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3, + ref8, ref_stride, bd); for (i = 0; i < height; ++i) { for (j = 0; j < width; ++j) { if (!invert_mask) - comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j << 3], pred[j]); + comp_pred[j] = AOM_BLEND_A64(mask[j], comp_pred[j], pred[j]); else - comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j << 3]); + comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], comp_pred[j]); } comp_pred += width; pred += width; - ref += stride; mask += mask_stride; } } @@ -884,6 +979,13 @@ HIGHBD_MASK_SUBPIX_VAR(64, 128) HIGHBD_MASK_SUBPIX_VAR(128, 64) HIGHBD_MASK_SUBPIX_VAR(128, 128) #endif // CONFIG_EXT_PARTITION + +#if CONFIG_EXT_PARTITION_TYPES +HIGHBD_MASK_SUBPIX_VAR(4, 16) +HIGHBD_MASK_SUBPIX_VAR(16, 4) +HIGHBD_MASK_SUBPIX_VAR(8, 32) +HIGHBD_MASK_SUBPIX_VAR(32, 8) +#endif #endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_AV1 && CONFIG_EXT_INTER @@ -983,6 +1085,17 @@ OBMC_VAR(128, 128) OBMC_SUBPIX_VAR(128, 128) #endif // CONFIG_EXT_PARTITION +#if CONFIG_EXT_PARTITION_TYPES +OBMC_VAR(4, 16) +OBMC_SUBPIX_VAR(4, 16) +OBMC_VAR(16, 4) +OBMC_SUBPIX_VAR(16, 4) +OBMC_VAR(8, 32) +OBMC_SUBPIX_VAR(8, 32) +OBMC_VAR(32, 8) +OBMC_SUBPIX_VAR(32, 8) +#endif + #if CONFIG_HIGHBITDEPTH static INLINE void highbd_obmc_variance64(const uint8_t *pre8, int pre_stride, const int32_t *wsrc, @@ -1164,5 +1277,16 @@ HIGHBD_OBMC_SUBPIX_VAR(128, 64) HIGHBD_OBMC_VAR(128, 128) HIGHBD_OBMC_SUBPIX_VAR(128, 128) #endif // CONFIG_EXT_PARTITION + +#if CONFIG_EXT_PARTITION_TYPES +HIGHBD_OBMC_VAR(4, 16) +HIGHBD_OBMC_SUBPIX_VAR(4, 16) +HIGHBD_OBMC_VAR(16, 4) +HIGHBD_OBMC_SUBPIX_VAR(16, 4) +HIGHBD_OBMC_VAR(8, 32) +HIGHBD_OBMC_SUBPIX_VAR(8, 32) +HIGHBD_OBMC_VAR(32, 8) +HIGHBD_OBMC_SUBPIX_VAR(32, 8) +#endif #endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_AV1 && CONFIG_MOTION_VAR |