diff options
Diffstat (limited to 'third_party/aom/aom_dsp/variance.c')
-rw-r--r-- | third_party/aom/aom_dsp/variance.c | 399 |
1 files changed, 195 insertions, 204 deletions
diff --git a/third_party/aom/aom_dsp/variance.c b/third_party/aom/aom_dsp/variance.c index 817ebe15d..23b715309 100644 --- a/third_party/aom/aom_dsp/variance.c +++ b/third_party/aom/aom_dsp/variance.c @@ -55,24 +55,6 @@ uint32_t aom_get_mb_ss_c(const int16_t *a) { return sum; } -uint32_t aom_variance_halfpixvar16x16_h_c(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - uint32_t *sse) { - return aom_sub_pixel_variance16x16_c(a, a_stride, 4, 0, b, b_stride, sse); -} - -uint32_t aom_variance_halfpixvar16x16_v_c(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - uint32_t *sse) { - return aom_sub_pixel_variance16x16_c(a, a_stride, 0, 4, b, b_stride, sse); -} - -uint32_t aom_variance_halfpixvar16x16_hv_c(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - uint32_t *sse) { - return aom_sub_pixel_variance16x16_c(a, a_stride, 4, 4, b, b_stride, sse); -} - static void variance(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int w, int h, uint32_t *sse, int *sum) { int i, j; @@ -302,7 +284,7 @@ void aom_upsampled_pred_c(MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col, const MV *const mv, uint8_t *comp_pred, int width, int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref, - int ref_stride) { + int ref_stride, int subpel_search) { // expect xd == NULL only in tests if (xd != NULL) { const MB_MODE_INFO *mi = xd->mi[0]; @@ -370,7 +352,7 @@ void aom_upsampled_pred_c(MACROBLOCKD *xd, const AV1_COMMON *const cm, warp_types.local_warp_allowed = mi->motion_mode == WARPED_CAUSAL; // Get convolve parameters. - ConvolveParams conv_params = get_conv_params(ref_num, 0, plane, xd->bd); + ConvolveParams conv_params = get_conv_params(0, plane, xd->bd); const InterpFilters filters = av1_broadcast_interp_filter(EIGHTTAP_REGULAR); @@ -387,7 +369,9 @@ void aom_upsampled_pred_c(MACROBLOCKD *xd, const AV1_COMMON *const cm, } const InterpFilterParams *filter = - av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8); + (subpel_search == 1) + ? av1_get_4tap_interp_filter_params(EIGHTTAP_REGULAR) + : av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8); if (!subpel_x_q3 && !subpel_y_q3) { for (int i = 0; i < height; i++) { @@ -398,13 +382,13 @@ void aom_upsampled_pred_c(MACROBLOCKD *xd, const AV1_COMMON *const cm, } else if (!subpel_y_q3) { const int16_t *const kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1); - aom_convolve8_horiz(ref, ref_stride, comp_pred, width, kernel, 16, NULL, -1, - width, height); + aom_convolve8_horiz_c(ref, ref_stride, comp_pred, width, kernel, 16, NULL, + -1, width, height); } else if (!subpel_x_q3) { const int16_t *const kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1); - aom_convolve8_vert(ref, ref_stride, comp_pred, width, NULL, -1, kernel, 16, - width, height); + aom_convolve8_vert_c(ref, ref_stride, comp_pred, width, NULL, -1, kernel, + 16, width, height); } else { DECLARE_ALIGNED(16, uint8_t, temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]); @@ -415,12 +399,12 @@ void aom_upsampled_pred_c(MACROBLOCKD *xd, const AV1_COMMON *const cm, const int intermediate_height = (((height - 1) * 8 + subpel_y_q3) >> 3) + filter->taps; assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16); - aom_convolve8_horiz(ref - ref_stride * ((filter->taps >> 1) - 1), - ref_stride, temp, MAX_SB_SIZE, kernel_x, 16, NULL, -1, - width, intermediate_height); - aom_convolve8_vert(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1), - MAX_SB_SIZE, comp_pred, width, NULL, -1, kernel_y, 16, - width, height); + aom_convolve8_horiz_c(ref - ref_stride * ((filter->taps >> 1) - 1), + ref_stride, temp, MAX_SB_SIZE, kernel_x, 16, NULL, -1, + width, intermediate_height); + aom_convolve8_vert_c(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1), + MAX_SB_SIZE, comp_pred, width, NULL, -1, kernel_y, 16, + width, height); } } @@ -429,11 +413,11 @@ void aom_comp_avg_upsampled_pred_c(MACROBLOCKD *xd, const AV1_COMMON *const cm, uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref, - int ref_stride) { + int ref_stride, int subpel_search) { int i, j; aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, height, - subpel_x_q3, subpel_y_q3, ref, ref_stride); + subpel_x_q3, subpel_y_q3, ref, ref_stride, subpel_search); for (i = 0; i < height; i++) { for (j = 0; j < width; j++) { comp_pred[j] = ROUND_POWER_OF_TWO(comp_pred[j] + pred[j], 1); @@ -466,13 +450,14 @@ void aom_jnt_comp_avg_upsampled_pred_c( MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col, const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref, - int ref_stride, const JNT_COMP_PARAMS *jcp_param) { + int ref_stride, const JNT_COMP_PARAMS *jcp_param, int subpel_search) { int i, j; const int fwd_offset = jcp_param->fwd_offset; const int bck_offset = jcp_param->bck_offset; - aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, height, - subpel_x_q3, subpel_y_q3, ref, ref_stride); + aom_upsampled_pred_c(xd, cm, mi_row, mi_col, mv, comp_pred, width, height, + subpel_x_q3, subpel_y_q3, ref, ref_stride, + subpel_search); for (i = 0; i < height; i++) { for (j = 0; j < width; j++) { @@ -706,125 +691,125 @@ void aom_highbd_var_filter_block2d_bil_second_pass( dst, dst_stride, sse); \ } -#define HIGHBD_SUBPIX_AVG_VAR(W, H) \ - uint32_t aom_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, uint32_t *sse, \ - const uint8_t *second_pred) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ - \ - aom_highbd_var_filter_block2d_bil_first_pass( \ - src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ - aom_highbd_var_filter_block2d_bil_second_pass( \ - fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ - \ - aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \ - CONVERT_TO_BYTEPTR(temp2), W); \ - \ - return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ - dst, dst_stride, sse); \ - } \ - \ - uint32_t aom_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, uint32_t *sse, \ - const uint8_t *second_pred) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ - \ - aom_highbd_var_filter_block2d_bil_first_pass( \ - src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ - aom_highbd_var_filter_block2d_bil_second_pass( \ - fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ - \ - aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \ - CONVERT_TO_BYTEPTR(temp2), W); \ - \ - return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ - dst, dst_stride, sse); \ - } \ - \ - uint32_t aom_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, uint32_t *sse, \ - const uint8_t *second_pred) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ - \ - aom_highbd_var_filter_block2d_bil_first_pass( \ - src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ - aom_highbd_var_filter_block2d_bil_second_pass( \ - fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ - \ - aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \ - CONVERT_TO_BYTEPTR(temp2), W); \ - \ - return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ - dst, dst_stride, sse); \ - } \ - \ - uint32_t aom_highbd_8_jnt_sub_pixel_avg_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, uint32_t *sse, \ - const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ - \ - aom_highbd_var_filter_block2d_bil_first_pass( \ - src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ - aom_highbd_var_filter_block2d_bil_second_pass( \ - fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ - \ - aom_highbd_jnt_comp_avg_pred(temp3, second_pred, W, H, \ - CONVERT_TO_BYTEPTR(temp2), W, jcp_param); \ - \ - return aom_highbd_8_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst, \ - dst_stride, sse); \ - } \ - \ - uint32_t aom_highbd_10_jnt_sub_pixel_avg_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, uint32_t *sse, \ - const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ - \ - aom_highbd_var_filter_block2d_bil_first_pass( \ - src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ - aom_highbd_var_filter_block2d_bil_second_pass( \ - fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ - \ - aom_highbd_jnt_comp_avg_pred(temp3, second_pred, W, H, \ - CONVERT_TO_BYTEPTR(temp2), W, jcp_param); \ - \ - return aom_highbd_10_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst, \ - dst_stride, sse); \ - } \ - \ - uint32_t aom_highbd_12_jnt_sub_pixel_avg_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, uint32_t *sse, \ - const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ - \ - aom_highbd_var_filter_block2d_bil_first_pass( \ - src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ - aom_highbd_var_filter_block2d_bil_second_pass( \ - fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ - \ - aom_highbd_jnt_comp_avg_pred(temp3, second_pred, W, H, \ - CONVERT_TO_BYTEPTR(temp2), W, jcp_param); \ - \ - return aom_highbd_12_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst, \ - dst_stride, sse); \ +#define HIGHBD_SUBPIX_AVG_VAR(W, H) \ + uint32_t aom_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \ + const uint8_t *src, int src_stride, int xoffset, int yoffset, \ + const uint8_t *dst, int dst_stride, uint32_t *sse, \ + const uint8_t *second_pred) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ + DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ + \ + aom_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ + aom_highbd_var_filter_block2d_bil_second_pass( \ + fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ + \ + aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \ + CONVERT_TO_BYTEPTR(temp2), W); \ + \ + return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ + dst, dst_stride, sse); \ + } \ + \ + uint32_t aom_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ + const uint8_t *src, int src_stride, int xoffset, int yoffset, \ + const uint8_t *dst, int dst_stride, uint32_t *sse, \ + const uint8_t *second_pred) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ + DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ + \ + aom_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ + aom_highbd_var_filter_block2d_bil_second_pass( \ + fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ + \ + aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \ + CONVERT_TO_BYTEPTR(temp2), W); \ + \ + return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ + dst, dst_stride, sse); \ + } \ + \ + uint32_t aom_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \ + const uint8_t *src, int src_stride, int xoffset, int yoffset, \ + const uint8_t *dst, int dst_stride, uint32_t *sse, \ + const uint8_t *second_pred) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ + DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ + \ + aom_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ + aom_highbd_var_filter_block2d_bil_second_pass( \ + fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ + \ + aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \ + CONVERT_TO_BYTEPTR(temp2), W); \ + \ + return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ + dst, dst_stride, sse); \ + } \ + \ + uint32_t aom_highbd_8_jnt_sub_pixel_avg_variance##W##x##H##_c( \ + const uint8_t *src, int src_stride, int xoffset, int yoffset, \ + const uint8_t *dst, int dst_stride, uint32_t *sse, \ + const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ + DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ + \ + aom_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ + aom_highbd_var_filter_block2d_bil_second_pass( \ + fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ + \ + aom_highbd_jnt_comp_avg_pred(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \ + CONVERT_TO_BYTEPTR(temp2), W, jcp_param); \ + \ + return aom_highbd_8_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst, \ + dst_stride, sse); \ + } \ + \ + uint32_t aom_highbd_10_jnt_sub_pixel_avg_variance##W##x##H##_c( \ + const uint8_t *src, int src_stride, int xoffset, int yoffset, \ + const uint8_t *dst, int dst_stride, uint32_t *sse, \ + const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ + DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ + \ + aom_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ + aom_highbd_var_filter_block2d_bil_second_pass( \ + fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ + \ + aom_highbd_jnt_comp_avg_pred(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \ + CONVERT_TO_BYTEPTR(temp2), W, jcp_param); \ + \ + return aom_highbd_10_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst, \ + dst_stride, sse); \ + } \ + \ + uint32_t aom_highbd_12_jnt_sub_pixel_avg_variance##W##x##H##_c( \ + const uint8_t *src, int src_stride, int xoffset, int yoffset, \ + const uint8_t *dst, int dst_stride, uint32_t *sse, \ + const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ + DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ + \ + aom_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \ + aom_highbd_var_filter_block2d_bil_second_pass( \ + fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ + \ + aom_highbd_jnt_comp_avg_pred(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \ + CONVERT_TO_BYTEPTR(temp2), W, jcp_param); \ + \ + return aom_highbd_12_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst, \ + dst_stride, sse); \ } /* All three forms of the variance are available in the same sizes. */ @@ -867,12 +852,13 @@ HIGHBD_MSE(16, 8) HIGHBD_MSE(8, 16) HIGHBD_MSE(8, 8) -void aom_highbd_comp_avg_pred_c(uint16_t *comp_pred, const uint8_t *pred8, +void aom_highbd_comp_avg_pred_c(uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride) { int i, j; uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8); for (i = 0; i < height; ++i) { for (j = 0; j < width; ++j) { const int tmp = pred[j] + ref[j]; @@ -887,9 +873,10 @@ void aom_highbd_comp_avg_pred_c(uint16_t *comp_pred, const uint8_t *pred8, void aom_highbd_upsampled_pred_c(MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col, const MV *const mv, - uint16_t *comp_pred, int width, int height, + uint8_t *comp_pred8, int width, int height, int subpel_x_q3, int subpel_y_q3, - const uint8_t *ref8, int ref_stride, int bd) { + const uint8_t *ref8, int ref_stride, int bd, + int subpel_search) { // expect xd == NULL only in tests if (xd != NULL) { const MB_MODE_INFO *mi = xd->mi[0]; @@ -902,8 +889,6 @@ void aom_highbd_upsampled_pred_c(MACROBLOCKD *xd, if (is_scaled) { // Note: This is mostly a copy from the >=8X8 case in // build_inter_predictors() function, with some small tweaks. - uint8_t *comp_pred8 = CONVERT_TO_BYTEPTR(comp_pred); - // Some assumptions. const int plane = 0; @@ -958,7 +943,7 @@ void aom_highbd_upsampled_pred_c(MACROBLOCKD *xd, warp_types.local_warp_allowed = mi->motion_mode == WARPED_CAUSAL; // Get convolve parameters. - ConvolveParams conv_params = get_conv_params(ref_num, 0, plane, xd->bd); + ConvolveParams conv_params = get_conv_params(0, plane, xd->bd); const InterpFilters filters = av1_broadcast_interp_filter(EIGHTTAP_REGULAR); @@ -975,13 +960,14 @@ void aom_highbd_upsampled_pred_c(MACROBLOCKD *xd, } const InterpFilterParams *filter = - av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8); + (subpel_search == 1) + ? av1_get_4tap_interp_filter_params(EIGHTTAP_REGULAR) + : av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8); if (!subpel_x_q3 && !subpel_y_q3) { - const uint16_t *ref; - int i; - ref = CONVERT_TO_SHORTPTR(ref8); - for (i = 0; i < height; i++) { + const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8); + for (int i = 0; i < height; i++) { memcpy(comp_pred, ref, width * sizeof(*comp_pred)); comp_pred += width; ref += ref_stride; @@ -989,13 +975,13 @@ void aom_highbd_upsampled_pred_c(MACROBLOCKD *xd, } else if (!subpel_y_q3) { const int16_t *const kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1); - aom_highbd_convolve8_horiz(ref8, ref_stride, CONVERT_TO_BYTEPTR(comp_pred), - width, kernel, 16, NULL, -1, width, height, bd); + aom_highbd_convolve8_horiz(ref8, ref_stride, comp_pred8, width, kernel, 16, + NULL, -1, width, height, bd); } else if (!subpel_x_q3) { const int16_t *const kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1); - aom_highbd_convolve8_vert(ref8, ref_stride, CONVERT_TO_BYTEPTR(comp_pred), - width, NULL, -1, kernel, 16, width, height, bd); + aom_highbd_convolve8_vert(ref8, ref_stride, comp_pred8, width, NULL, -1, + kernel, 16, width, height, bd); } else { DECLARE_ALIGNED(16, uint16_t, temp[((MAX_SB_SIZE + 16) + 16) * MAX_SB_SIZE]); @@ -1012,22 +998,23 @@ void aom_highbd_upsampled_pred_c(MACROBLOCKD *xd, intermediate_height, bd); aom_highbd_convolve8_vert( CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1)), - MAX_SB_SIZE, CONVERT_TO_BYTEPTR(comp_pred), width, NULL, -1, kernel_y, - 16, width, height, bd); + MAX_SB_SIZE, comp_pred8, width, NULL, -1, kernel_y, 16, width, height, + bd); } } void aom_highbd_comp_avg_upsampled_pred_c( MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col, - const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width, + const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, - int ref_stride, int bd) { + int ref_stride, int bd, int subpel_search) { int i, j; const uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); - aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, + uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8); + aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred8, width, height, subpel_x_q3, subpel_y_q3, ref8, ref_stride, - bd); + bd, subpel_search); for (i = 0; i < height; ++i) { for (j = 0; j < width; ++j) { comp_pred[j] = ROUND_POWER_OF_TWO(pred[j] + comp_pred[j], 1); @@ -1037,7 +1024,7 @@ void aom_highbd_comp_avg_upsampled_pred_c( } } -void aom_highbd_jnt_comp_avg_pred_c(uint16_t *comp_pred, const uint8_t *pred8, +void aom_highbd_jnt_comp_avg_pred_c(uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const JNT_COMP_PARAMS *jcp_param) { @@ -1046,6 +1033,7 @@ void aom_highbd_jnt_comp_avg_pred_c(uint16_t *comp_pred, const uint8_t *pred8, const int bck_offset = jcp_param->bck_offset; uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8); for (i = 0; i < height; ++i) { for (j = 0; j < width; ++j) { @@ -1061,17 +1049,18 @@ void aom_highbd_jnt_comp_avg_pred_c(uint16_t *comp_pred, const uint8_t *pred8, void aom_highbd_jnt_comp_avg_upsampled_pred_c( MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col, - const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width, + const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, - int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param) { + int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param, + int subpel_search) { int i, j; const int fwd_offset = jcp_param->fwd_offset; const int bck_offset = jcp_param->bck_offset; const uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); - - aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, + uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8); + aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred8, width, height, subpel_x_q3, subpel_y_q3, ref8, ref_stride, - bd); + bd, subpel_search); for (i = 0; i < height; i++) { for (j = 0; j < width; j++) { @@ -1104,21 +1093,23 @@ void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, } } -void aom_comp_mask_upsampled_pred(MACROBLOCKD *xd, const AV1_COMMON *const cm, - int mi_row, int mi_col, const MV *const mv, - uint8_t *comp_pred, const uint8_t *pred, - int width, int height, int subpel_x_q3, - int subpel_y_q3, const uint8_t *ref, - int ref_stride, const uint8_t *mask, - int mask_stride, int invert_mask) { +void aom_comp_mask_upsampled_pred_c(MACROBLOCKD *xd, const AV1_COMMON *const cm, + int mi_row, int mi_col, const MV *const mv, + uint8_t *comp_pred, const uint8_t *pred, + int width, int height, int subpel_x_q3, + int subpel_y_q3, const uint8_t *ref, + int ref_stride, const uint8_t *mask, + int mask_stride, int invert_mask, + int subpel_search) { if (subpel_x_q3 | subpel_y_q3) { - aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, height, - subpel_x_q3, subpel_y_q3, ref, ref_stride); + aom_upsampled_pred_c(xd, cm, mi_row, mi_col, mv, comp_pred, width, height, + subpel_x_q3, subpel_y_q3, ref, ref_stride, + subpel_search); ref = comp_pred; ref_stride = width; } - aom_comp_mask_pred(comp_pred, pred, width, height, ref, ref_stride, mask, - mask_stride, invert_mask); + aom_comp_mask_pred_c(comp_pred, pred, width, height, ref, ref_stride, mask, + mask_stride, invert_mask); } #define MASK_SUBPIX_VAR(W, H) \ @@ -1164,13 +1155,14 @@ MASK_SUBPIX_VAR(32, 8) MASK_SUBPIX_VAR(16, 64) MASK_SUBPIX_VAR(64, 16) -void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8, +void aom_highbd_comp_mask_pred_c(uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask) { int i, j; uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8); for (i = 0; i < height; ++i) { for (j = 0; j < width; ++j) { if (!invert_mask) @@ -1187,16 +1179,15 @@ void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8, void aom_highbd_comp_mask_upsampled_pred( MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col, - const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width, + const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask, - int bd) { - aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, + int bd, int subpel_search) { + aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred8, width, height, subpel_x_q3, subpel_y_q3, ref8, ref_stride, - bd); - aom_highbd_comp_mask_pred(comp_pred, pred8, width, height, - CONVERT_TO_BYTEPTR(comp_pred), width, mask, - mask_stride, invert_mask); + bd, subpel_search); + aom_highbd_comp_mask_pred(comp_pred8, pred8, width, height, comp_pred8, width, + mask, mask_stride, invert_mask); } #define HIGHBD_MASK_SUBPIX_VAR(W, H) \ @@ -1214,7 +1205,7 @@ void aom_highbd_comp_mask_upsampled_pred( aom_highbd_var_filter_block2d_bil_second_pass( \ fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ \ - aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \ + aom_highbd_comp_mask_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \ CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \ invert_mask); \ \ @@ -1236,7 +1227,7 @@ void aom_highbd_comp_mask_upsampled_pred( aom_highbd_var_filter_block2d_bil_second_pass( \ fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ \ - aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \ + aom_highbd_comp_mask_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \ CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \ invert_mask); \ \ @@ -1258,7 +1249,7 @@ void aom_highbd_comp_mask_upsampled_pred( aom_highbd_var_filter_block2d_bil_second_pass( \ fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \ \ - aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \ + aom_highbd_comp_mask_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \ CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \ invert_mask); \ \ |