diff options
Diffstat (limited to 'third_party/aom/av1/common/warped_motion.c')
-rw-r--r-- | third_party/aom/av1/common/warped_motion.c | 464 |
1 files changed, 146 insertions, 318 deletions
diff --git a/third_party/aom/av1/common/warped_motion.c b/third_party/aom/av1/common/warped_motion.c index 75ae08723..34374af69 100644 --- a/third_party/aom/av1/common/warped_motion.c +++ b/third_party/aom/av1/common/warped_motion.c @@ -912,8 +912,8 @@ static void highbd_warp_plane_old(const WarpedMotionParams *const wm, in[0] = j; in[1] = i; projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y); - out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4); - out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4); + out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, SCALE_SUBPEL_BITS); + out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, SCALE_SUBPEL_BITS); if (conv_params->do_average) pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO( pred[(j - p_col) + (i - p_row) * p_stride] + @@ -939,136 +939,51 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int16_t beta, int16_t gamma, int16_t delta) { int32_t tmp[15 * 8]; int i, j, k, l, m; - - for (i = p_row; i < p_row + p_height; i += 8) { - for (j = p_col; j < p_col + p_width; j += 8) { - int32_t x4, y4, ix4, sx4, iy4, sy4; - if (subsampling_x) - x4 = (mat[2] * 4 * (j + 4) + mat[3] * 4 * (i + 4) + mat[0] * 2 + - (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) / - 4; - else - x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0]; - - if (subsampling_y) - y4 = (mat[4] * 4 * (j + 4) + mat[5] * 4 * (i + 4) + mat[1] * 2 + - (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) / - 4; - else - y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1]; - - ix4 = x4 >> WARPEDMODEL_PREC_BITS; - sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); - iy4 = y4 >> WARPEDMODEL_PREC_BITS; - sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); - - sx4 += alpha * (-4) + beta * (-4); - sy4 += gamma * (-4) + delta * (-4); - - sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); - sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); - - // Horizontal filter - for (k = -7; k < 8; ++k) { - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - - int sx = sx4 + beta * (k + 4); - for (l = -4; l < 4; ++l) { - int ix = ix4 + l - 3; - const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) + - WARPEDPIXEL_PREC_SHIFTS; - assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3); - const int16_t *coeffs = warped_filter[offs]; - - int32_t sum = 1 << (bd + WARPEDPIXEL_FILTER_BITS - 1); - for (m = 0; m < 8; ++m) { - int sample_x = ix + m; - if (sample_x < 0) - sample_x = 0; - else if (sample_x > width - 1) - sample_x = width - 1; - sum += ref[iy * stride + sample_x] * coeffs[m]; - } - sum = ROUND_POWER_OF_TWO(sum, HORSHEAR_REDUCE_PREC_BITS); - assert(0 <= sum && - sum < (1 << (bd + WARPEDPIXEL_FILTER_BITS + 1 - - HORSHEAR_REDUCE_PREC_BITS))); - tmp[(k + 7) * 8 + (l + 4)] = sum; - sx += alpha; - } - } - - // Vertical filter - for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) { - int sy = sy4 + delta * (k + 4); - for (l = -4; l < 4; ++l) { - uint16_t *p = - &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)]; - const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) + - WARPEDPIXEL_PREC_SHIFTS; - assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3); - const int16_t *coeffs = warped_filter[offs]; - - int32_t sum = 1 << (bd + 2 * WARPEDPIXEL_FILTER_BITS - - HORSHEAR_REDUCE_PREC_BITS); - for (m = 0; m < 8; ++m) { - sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m]; - } - sum = ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS); - assert(0 <= sum && sum < (1 << (bd + 2))); - uint16_t px = - clip_pixel_highbd(sum - (1 << (bd - 1)) - (1 << bd), bd); - if (conv_params->do_average) - *p = ROUND_POWER_OF_TWO(*p + px, 1); - else - *p = px; - sy += gamma; - } - } - } - } -} - #if CONFIG_CONVOLVE_ROUND -void av1_highbd_warp_affine_post_round_c( - const int32_t *mat, const uint16_t *ref, int width, int height, int stride, - uint16_t *pred, int p_col, int p_row, int p_width, int p_height, - int p_stride, int subsampling_x, int subsampling_y, int bd, - ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, - int16_t delta) { - (void)pred; - (void)p_stride; - int32_t tmp[15 * 8]; - int i, j, k, l, m; - const int offset_bits_horiz = bd + FILTER_BITS - 1; - const int offset_bits_vert = bd + 2 * FILTER_BITS - conv_params->round_0; + const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND; + const int reduce_bits_horiz = + use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS; + const int max_bits_horiz = + use_conv_params + ? bd + FILTER_BITS + 1 - conv_params->round_0 + : bd + WARPEDPIXEL_FILTER_BITS + 1 - HORSHEAR_REDUCE_PREC_BITS; + const int offset_bits_horiz = + use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1; + const int offset_bits_vert = + use_conv_params + ? bd + 2 * FILTER_BITS - conv_params->round_0 + : bd + 2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS; + if (use_conv_params) { + conv_params->do_post_rounding = 1; + } assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS); +#else + const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS; + const int max_bits_horiz = + bd + WARPEDPIXEL_FILTER_BITS + 1 - HORSHEAR_REDUCE_PREC_BITS; + const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1; + const int offset_bits_vert = + bd + 2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS; +#endif + (void)max_bits_horiz; for (i = p_row; i < p_row + p_height; i += 8) { for (j = p_col; j < p_col + p_width; j += 8) { - int32_t x4, y4, ix4, sx4, iy4, sy4; - if (subsampling_x) - x4 = (mat[2] * 4 * (j + 4) + mat[3] * 4 * (i + 4) + mat[0] * 2 + - (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) / - 4; - else - x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0]; - - if (subsampling_y) - y4 = (mat[4] * 4 * (j + 4) + mat[5] * 4 * (i + 4) + mat[1] * 2 + - (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) / - 4; - else - y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1]; - - ix4 = x4 >> WARPEDMODEL_PREC_BITS; - sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); - iy4 = y4 >> WARPEDMODEL_PREC_BITS; - sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); + // Calculate the center of this 8x8 block, + // project to luma coordinates (if in a subsampled chroma plane), + // apply the affine transformation, + // then convert back to the original coordinates (if necessary) + const int32_t src_x = (j + 4) << subsampling_x; + const int32_t src_y = (i + 4) << subsampling_y; + const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0]; + const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1]; + const int32_t x4 = dst_x >> subsampling_x; + const int32_t y4 = dst_y >> subsampling_y; + + int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS; + int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); + int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS; + int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); sx4 += alpha * (-4) + beta * (-4); sy4 += gamma * (-4) + delta * (-4); @@ -1101,9 +1016,8 @@ void av1_highbd_warp_affine_post_round_c( sample_x = width - 1; sum += ref[iy * stride + sample_x] * coeffs[m]; } - sum = ROUND_POWER_OF_TWO(sum, conv_params->round_0); - assert(0 <= sum && - sum < (1 << (bd + FILTER_BITS + 1 - conv_params->round_0))); + sum = ROUND_POWER_OF_TWO(sum, reduce_bits_horiz); + assert(0 <= sum && sum < (1 << max_bits_horiz)); tmp[(k + 7) * 8 + (l + 4)] = sum; sx += alpha; } @@ -1112,7 +1026,7 @@ void av1_highbd_warp_affine_post_round_c( // Vertical filter for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) { int sy = sy4 + delta * (k + 4); - for (l = -4; l < 4; ++l) { + for (l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) { const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) + WARPEDPIXEL_PREC_SHIFTS; assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3); @@ -1122,22 +1036,41 @@ void av1_highbd_warp_affine_post_round_c( for (m = 0; m < 8; ++m) { sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m]; } - - sum = ROUND_POWER_OF_TWO(sum, conv_params->round_1) - - (1 << (offset_bits_horiz + FILTER_BITS - conv_params->round_0 - - conv_params->round_1)) - - (1 << (offset_bits_vert - conv_params->round_1)); - CONV_BUF_TYPE *p = - &conv_params->dst[(i - p_row + k + 4) * conv_params->dst_stride + - (j - p_col + l + 4)]; - *p += sum; +#if CONFIG_CONVOLVE_ROUND + if (use_conv_params) { + CONV_BUF_TYPE *p = + &conv_params + ->dst[(i - p_row + k + 4) * conv_params->dst_stride + + (j - p_col + l + 4)]; + sum = ROUND_POWER_OF_TWO(sum, conv_params->round_1) - + (1 << (offset_bits_horiz + FILTER_BITS - + conv_params->round_0 - conv_params->round_1)) - + (1 << (offset_bits_vert - conv_params->round_1)); + if (conv_params->do_average) + *p += sum; + else + *p = sum; + } else { +#else + { +#endif + uint16_t *p = + &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)]; + sum = ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS); + assert(0 <= sum && sum < (1 << (bd + 2))); + uint16_t px = + clip_pixel_highbd(sum - (1 << (bd - 1)) - (1 << bd), bd); + if (conv_params->do_average) + *p = ROUND_POWER_OF_TWO(*p + px, 1); + else + *p = px; + } sy += gamma; } } } } } -#endif static void highbd_warp_plane(WarpedMotionParams *wm, const uint8_t *const ref8, int width, int height, int stride, @@ -1160,25 +1093,10 @@ static void highbd_warp_plane(WarpedMotionParams *wm, const uint8_t *const ref8, const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8); uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); -#if CONFIG_CONVOLVE_ROUND - if (conv_params->round == CONVOLVE_OPT_NO_ROUND) { - conv_params->do_post_rounding = 1; - av1_highbd_warp_affine_post_round( - mat, ref, width, height, stride, pred, p_col, p_row, p_width, - p_height, p_stride, subsampling_x, subsampling_y, bd, conv_params, - alpha, beta, gamma, delta); - } else { - av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, - p_row, p_width, p_height, p_stride, subsampling_x, - subsampling_y, bd, conv_params, alpha, beta, gamma, - delta); - } -#else av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row, p_width, p_height, p_stride, subsampling_x, subsampling_y, bd, conv_params, alpha, beta, gamma, delta); -#endif } else { highbd_warp_plane_old(wm, ref8, width, height, stride, pred8, p_col, p_row, p_width, p_height, p_stride, subsampling_x, @@ -1251,8 +1169,8 @@ static void warp_plane_old(const WarpedMotionParams *const wm, in[0] = j; in[1] = i; projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y); - out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4); - out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4); + out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, SCALE_SUBPEL_BITS); + out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, SCALE_SUBPEL_BITS); if (conv_params->do_average) pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO( pred[(j - p_col) + (i - p_row) * p_stride] + @@ -1359,143 +1277,51 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int32_t tmp[15 * 8]; int i, j, k, l, m; const int bd = 8; - - for (i = p_row; i < p_row + p_height; i += 8) { - for (j = p_col; j < p_col + p_width; j += 8) { - int32_t x4, y4, ix4, sx4, iy4, sy4; - if (subsampling_x) - x4 = (mat[2] * 4 * (j + 4) + mat[3] * 4 * (i + 4) + mat[0] * 2 + - (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) / - 4; - else - x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0]; - - if (subsampling_y) - y4 = (mat[4] * 4 * (j + 4) + mat[5] * 4 * (i + 4) + mat[1] * 2 + - (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) / - 4; - else - y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1]; - - ix4 = x4 >> WARPEDMODEL_PREC_BITS; - sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); - iy4 = y4 >> WARPEDMODEL_PREC_BITS; - sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); - - sx4 += alpha * (-4) + beta * (-4); - sy4 += gamma * (-4) + delta * (-4); - - sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); - sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1); - - // Horizontal filter - for (k = -7; k < 8; ++k) { - // Clamp to top/bottom edge of the frame - int iy = iy4 + k; - if (iy < 0) - iy = 0; - else if (iy > height - 1) - iy = height - 1; - - int sx = sx4 + beta * (k + 4); - - for (l = -4; l < 4; ++l) { - int ix = ix4 + l - 3; - // At this point, sx = sx4 + alpha * l + beta * k - const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) + - WARPEDPIXEL_PREC_SHIFTS; - assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3); - const int16_t *coeffs = warped_filter[offs]; - - int32_t sum = 1 << (bd + WARPEDPIXEL_FILTER_BITS - 1); - for (m = 0; m < 8; ++m) { - // Clamp to left/right edge of the frame - int sample_x = ix + m; - if (sample_x < 0) - sample_x = 0; - else if (sample_x > width - 1) - sample_x = width - 1; - - sum += ref[iy * stride + sample_x] * coeffs[m]; - } - sum = ROUND_POWER_OF_TWO(sum, HORSHEAR_REDUCE_PREC_BITS); - assert(0 <= sum && - sum < (1 << (bd + WARPEDPIXEL_FILTER_BITS + 1 - - HORSHEAR_REDUCE_PREC_BITS))); - tmp[(k + 7) * 8 + (l + 4)] = sum; - sx += alpha; - } - } - - // Vertical filter - for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) { - int sy = sy4 + delta * (k + 4); - for (l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) { - uint8_t *p = - &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)]; - // At this point, sy = sy4 + gamma * l + delta * k - const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) + - WARPEDPIXEL_PREC_SHIFTS; - assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3); - const int16_t *coeffs = warped_filter[offs]; - - int32_t sum = 1 << (bd + 2 * WARPEDPIXEL_FILTER_BITS - - HORSHEAR_REDUCE_PREC_BITS); - for (m = 0; m < 8; ++m) { - sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m]; - } - sum = ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS); - assert(0 <= sum && sum < (1 << (bd + 2))); - uint8_t px = clip_pixel(sum - (1 << (bd - 1)) - (1 << bd)); - if (conv_params->do_average) - *p = ROUND_POWER_OF_TWO(*p + px, 1); - else - *p = px; - sy += gamma; - } - } - } - } -} - #if CONFIG_CONVOLVE_ROUND -void av1_warp_affine_post_round_c(const int32_t *mat, const uint8_t *ref, - int width, int height, int stride, - uint8_t *pred, int p_col, int p_row, - int p_width, int p_height, int p_stride, - int subsampling_x, int subsampling_y, - ConvolveParams *conv_params, int16_t alpha, - int16_t beta, int16_t gamma, int16_t delta) { - (void)pred; - (void)p_stride; - int32_t tmp[15 * 8]; - int i, j, k, l, m; - const int bd = 8; - const int offset_bits_horiz = bd + FILTER_BITS - 1; - const int offset_bits_vert = bd + 2 * FILTER_BITS - conv_params->round_0; + const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND; + const int reduce_bits_horiz = + use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS; + const int max_bits_horiz = + use_conv_params + ? bd + FILTER_BITS + 1 - conv_params->round_0 + : bd + WARPEDPIXEL_FILTER_BITS + 1 - HORSHEAR_REDUCE_PREC_BITS; + const int offset_bits_horiz = + use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1; + const int offset_bits_vert = + use_conv_params + ? bd + 2 * FILTER_BITS - conv_params->round_0 + : bd + 2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS; + if (use_conv_params) { + conv_params->do_post_rounding = 1; + } assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS); +#else + const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS; + const int max_bits_horiz = + bd + WARPEDPIXEL_FILTER_BITS + 1 - HORSHEAR_REDUCE_PREC_BITS; + const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1; + const int offset_bits_vert = + bd + 2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS; +#endif + (void)max_bits_horiz; for (i = p_row; i < p_row + p_height; i += 8) { for (j = p_col; j < p_col + p_width; j += 8) { - int32_t x4, y4, ix4, sx4, iy4, sy4; - if (subsampling_x) - x4 = (mat[2] * 4 * (j + 4) + mat[3] * 4 * (i + 4) + mat[0] * 2 + - (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) / - 4; - else - x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0]; - - if (subsampling_y) - y4 = (mat[4] * 4 * (j + 4) + mat[5] * 4 * (i + 4) + mat[1] * 2 + - (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) / - 4; - else - y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1]; - - ix4 = x4 >> WARPEDMODEL_PREC_BITS; - sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); - iy4 = y4 >> WARPEDMODEL_PREC_BITS; - sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); + // Calculate the center of this 8x8 block, + // project to luma coordinates (if in a subsampled chroma plane), + // apply the affine transformation, + // then convert back to the original coordinates (if necessary) + const int32_t src_x = (j + 4) << subsampling_x; + const int32_t src_y = (i + 4) << subsampling_y; + const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0]; + const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1]; + const int32_t x4 = dst_x >> subsampling_x; + const int32_t y4 = dst_y >> subsampling_y; + + int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS; + int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); + int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS; + int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1); sx4 += alpha * (-4) + beta * (-4); sy4 += gamma * (-4) + delta * (-4); @@ -1533,9 +1359,8 @@ void av1_warp_affine_post_round_c(const int32_t *mat, const uint8_t *ref, sum += ref[iy * stride + sample_x] * coeffs[m]; } - sum = ROUND_POWER_OF_TWO(sum, conv_params->round_0); - assert(0 <= sum && - sum < (1 << (bd + FILTER_BITS + 1 - conv_params->round_0))); + sum = ROUND_POWER_OF_TWO(sum, reduce_bits_horiz); + assert(0 <= sum && sum < (1 << max_bits_horiz)); tmp[(k + 7) * 8 + (l + 4)] = sum; sx += alpha; } @@ -1552,26 +1377,43 @@ void av1_warp_affine_post_round_c(const int32_t *mat, const uint8_t *ref, const int16_t *coeffs = warped_filter[offs]; int32_t sum = 1 << offset_bits_vert; - for (m = 0; m < 8; ++m) { sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m]; } - - sum = ROUND_POWER_OF_TWO(sum, conv_params->round_1) - - (1 << (offset_bits_horiz + FILTER_BITS - conv_params->round_0 - - conv_params->round_1)) - - (1 << (offset_bits_vert - conv_params->round_1)); - CONV_BUF_TYPE *p = - &conv_params->dst[(i - p_row + k + 4) * conv_params->dst_stride + - (j - p_col + l + 4)]; - *p += sum; +#if CONFIG_CONVOLVE_ROUND + if (use_conv_params) { + CONV_BUF_TYPE *p = + &conv_params + ->dst[(i - p_row + k + 4) * conv_params->dst_stride + + (j - p_col + l + 4)]; + sum = ROUND_POWER_OF_TWO(sum, conv_params->round_1) - + (1 << (offset_bits_horiz + FILTER_BITS - + conv_params->round_0 - conv_params->round_1)) - + (1 << (offset_bits_vert - conv_params->round_1)); + if (conv_params->do_average) + *p += sum; + else + *p = sum; + } else { +#else + { +#endif + uint8_t *p = + &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)]; + sum = ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS); + assert(0 <= sum && sum < (1 << (bd + 2))); + uint8_t px = clip_pixel(sum - (1 << (bd - 1)) - (1 << bd)); + if (conv_params->do_average) + *p = ROUND_POWER_OF_TWO(*p + px, 1); + else + *p = px; + } sy += gamma; } } } } } -#endif // CONFIG_CONVOLVE_ROUND static void warp_plane(WarpedMotionParams *wm, const uint8_t *const ref, int width, int height, int stride, uint8_t *pred, @@ -1590,23 +1432,9 @@ static void warp_plane(WarpedMotionParams *wm, const uint8_t *const ref, const int16_t gamma = wm->gamma; const int16_t delta = wm->delta; -#if CONFIG_CONVOLVE_ROUND - if (conv_params->round == CONVOLVE_OPT_NO_ROUND) { - conv_params->do_post_rounding = 1; - av1_warp_affine_post_round(mat, ref, width, height, stride, pred, p_col, - p_row, p_width, p_height, p_stride, - subsampling_x, subsampling_y, conv_params, - alpha, beta, gamma, delta); - } else { - av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row, - p_width, p_height, p_stride, subsampling_x, subsampling_y, - conv_params, alpha, beta, gamma, delta); - } -#else av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row, p_width, p_height, p_stride, subsampling_x, subsampling_y, conv_params, alpha, beta, gamma, delta); -#endif } else { warp_plane_old(wm, ref, width, height, stride, pred, p_col, p_row, p_width, p_height, p_stride, subsampling_x, subsampling_y, x_scale, |