diff options
Diffstat (limited to 'third_party/aom/av1/encoder')
59 files changed, 9090 insertions, 5254 deletions
diff --git a/third_party/aom/av1/encoder/aq_cyclicrefresh.c b/third_party/aom/av1/encoder/aq_cyclicrefresh.c index b2b410617..05aa28c9f 100644 --- a/third_party/aom/av1/encoder/aq_cyclicrefresh.c +++ b/third_party/aom/av1/encoder/aq_cyclicrefresh.c @@ -352,10 +352,7 @@ void av1_cyclic_refresh_check_golden_update(AV1_COMP *const cpi) { // For video conference clips, if the background has high motion in current // frame because of the camera movement, set this frame as the golden frame. // Use 70% and 5% as the thresholds for golden frame refreshing. - // Also, force this frame as a golden update frame if this frame will change - // the resolution (av1_resize_pending != 0). - if (av1_resize_pending(cpi) || - (cnt1 * 10 > (70 * rows * cols) && cnt2 * 20 < cnt1)) { + if (cnt1 * 10 > (70 * rows * cols) && cnt2 * 20 < cnt1) { av1_cyclic_refresh_set_golden_update(cpi); rc->frames_till_gf_update_due = rc->baseline_gf_interval; diff --git a/third_party/aom/av1/encoder/av1_quantize.c b/third_party/aom/av1/encoder/av1_quantize.c index 63727df1f..dd53d4223 100644 --- a/third_party/aom/av1/encoder/av1_quantize.c +++ b/third_party/aom/av1/encoder/av1_quantize.c @@ -845,7 +845,6 @@ void av1_quantize_dc_nuq_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, } #endif // CONFIG_NEW_QUANT -#if CONFIG_HIGHBITDEPTH void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, @@ -899,14 +898,29 @@ void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr, switch (qparam->log_scale) { case 0: - aom_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, - pd->dequant, eob_ptr, sc->scan, sc->iscan + if (LIKELY(n_coeffs >= 8)) { + aom_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, + p->round, p->quant, p->quant_shift, qcoeff_ptr, + dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan, + sc->iscan #if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr + , + qm_ptr, iqm_ptr #endif - ); + ); + } else { + // TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size + // quantization + aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, skip_block, p->zbin, + p->round, p->quant, p->quant_shift, qcoeff_ptr, + dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan, + sc->iscan +#if CONFIG_AOM_QM + , + qm_ptr, iqm_ptr +#endif + ); + } break; case 1: aom_highbd_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, @@ -936,7 +950,6 @@ void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr, } } -#if CONFIG_HIGHBITDEPTH static INLINE void highbd_quantize_dc( const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, @@ -958,14 +971,13 @@ static INLINE void highbd_quantize_dc( const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; const int64_t tmp = abs_coeff + round_ptr[0]; - const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> (16 - log_scale)); + const int abs_qcoeff = (int)((tmp * quant) >> (16 - log_scale)); qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / (1 << log_scale); if (abs_qcoeff) eob = 0; } *eob_ptr = eob + 1; } -#endif // CONFIG_HIGHBITDEPTH void av1_highbd_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p, @@ -1504,9 +1516,7 @@ void av1_highbd_quantize_dc_nuq_facade( } } #endif // CONFIG_NEW_QUANT -#endif // CONFIG_HIGHBITDEPTH -#if CONFIG_HIGHBITDEPTH void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, @@ -1547,15 +1557,14 @@ void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count, #endif const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - const int64_t tmp = abs_coeff + round_ptr[rc != 0]; + const int64_t tmp = abs_coeff + (round_ptr[rc != 0] >> log_scale); #if CONFIG_AOM_QM - const uint32_t abs_qcoeff = - (uint32_t)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS)); + const int abs_qcoeff = + (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS)); qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / scale; #else - const uint32_t abs_qcoeff = - (uint32_t)((tmp * quant_ptr[rc != 0]) >> shift); + const int abs_qcoeff = (int)((tmp * quant_ptr[rc != 0]) >> shift); qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / scale; #endif @@ -1565,8 +1574,6 @@ void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count, *eob_ptr = eob + 1; } -#endif // CONFIG_HIGHBITDEPTH - static void invert_quant(int16_t *quant, int16_t *shift, int d) { uint32_t t; int l, m; diff --git a/third_party/aom/av1/encoder/av1_quantize.h b/third_party/aom/av1/encoder/av1_quantize.h index 4bc9cccc2..e5fc8b528 100644 --- a/third_party/aom/av1/encoder/av1_quantize.h +++ b/third_party/aom/av1/encoder/av1_quantize.h @@ -146,7 +146,6 @@ void av1_quantize_dc_nuq_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const QUANT_PARAM *qparam); #endif // CONFIG_NEW_QUANT -#if CONFIG_HIGHBITDEPTH void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, @@ -190,7 +189,6 @@ void av1_highbd_quantize_dc_nuq_facade( tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const SCAN_ORDER *sc, const QUANT_PARAM *qparam); #endif // CONFIG_NEW_QUANT -#endif // CONFIG_HIGHBITDEPTH #ifdef __cplusplus } // extern "C" diff --git a/third_party/aom/av1/encoder/bgsprite.c b/third_party/aom/av1/encoder/bgsprite.c new file mode 100644 index 000000000..64deade06 --- /dev/null +++ b/third_party/aom/av1/encoder/bgsprite.c @@ -0,0 +1,748 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#define _POSIX_C_SOURCE 200112L // rand_r() +#include <assert.h> +#include <float.h> +#include <limits.h> +#include <math.h> +#include <stdlib.h> +#include <time.h> + +#include "av1/encoder/bgsprite.h" + +#include "aom_mem/aom_mem.h" +#include "./aom_scale_rtcd.h" +#include "av1/common/mv.h" +#include "av1/common/warped_motion.h" +#include "av1/encoder/encoder.h" +#include "av1/encoder/global_motion.h" +#include "av1/encoder/mathutils.h" +#include "av1/encoder/temporal_filter.h" + +/* Blending Modes: + * 0 = Median + * 1 = Mean + */ +#define BGSPRITE_BLENDING_MODE 1 + +/* Interpolation for panorama alignment sampling: + * 0 = Nearest neighbor + * 1 = Bilinear + */ +#define BGSPRITE_INTERPOLATION 0 + +#define TRANSFORM_MAT_DIM 3 + +typedef struct { +#if CONFIG_HIGHBITDEPTH + uint16_t y; + uint16_t u; + uint16_t v; +#else + uint8_t y; + uint8_t u; + uint8_t v; +#endif // CONFIG_HIGHBITDEPTH +} YuvPixel; + +// Maps to convert from matrix form to param vector form. +static const int params_to_matrix_map[] = { 2, 3, 0, 4, 5, 1, 6, 7 }; +static const int matrix_to_params_map[] = { 2, 5, 0, 1, 3, 4, 6, 7 }; + +// Convert the parameter array to a 3x3 matrix form. +static void params_to_matrix(const double *const params, double *target) { + for (int i = 0; i < MAX_PARAMDIM - 1; i++) { + assert(params_to_matrix_map[i] < MAX_PARAMDIM - 1); + target[i] = params[params_to_matrix_map[i]]; + } + target[8] = 1; +} + +// Convert a 3x3 matrix to a parameter array form. +static void matrix_to_params(const double *const matrix, double *target) { + for (int i = 0; i < MAX_PARAMDIM - 1; i++) { + assert(matrix_to_params_map[i] < MAX_PARAMDIM - 1); + target[i] = matrix[matrix_to_params_map[i]]; + } +} + +// Do matrix multiplication on params. +static void multiply_params(double *const m1, double *const m2, + double *target) { + double m1_matrix[MAX_PARAMDIM]; + double m2_matrix[MAX_PARAMDIM]; + double result[MAX_PARAMDIM]; + + params_to_matrix(m1, m1_matrix); + params_to_matrix(m2, m2_matrix); + multiply_mat(m2_matrix, m1_matrix, result, TRANSFORM_MAT_DIM, + TRANSFORM_MAT_DIM, TRANSFORM_MAT_DIM); + matrix_to_params(result, target); +} + +// Finds x and y limits of a single transformed image. +// Width and height are the size of the input video. +static void find_frame_limit(int width, int height, + const double *const transform, int *x_min, + int *x_max, int *y_min, int *y_max) { + double transform_matrix[MAX_PARAMDIM]; + double xy_matrix[3] = { 0, 0, 1 }; + double uv_matrix[3] = { 0 }; +// Macro used to update frame limits based on transformed coordinates. +#define UPDATELIMITS(u, v, x_min, x_max, y_min, y_max) \ + { \ + if ((int)ceil(u) > *x_max) { \ + *x_max = (int)ceil(u); \ + } \ + if ((int)floor(u) < *x_min) { \ + *x_min = (int)floor(u); \ + } \ + if ((int)ceil(v) > *y_max) { \ + *y_max = (int)ceil(v); \ + } \ + if ((int)floor(v) < *y_min) { \ + *y_min = (int)floor(v); \ + } \ + } + + params_to_matrix(transform, transform_matrix); + xy_matrix[0] = 0; + xy_matrix[1] = 0; + multiply_mat(transform_matrix, xy_matrix, uv_matrix, TRANSFORM_MAT_DIM, + TRANSFORM_MAT_DIM, 1); + *x_max = (int)ceil(uv_matrix[0]); + *x_min = (int)floor(uv_matrix[0]); + *y_max = (int)ceil(uv_matrix[1]); + *y_min = (int)floor(uv_matrix[1]); + + xy_matrix[0] = width; + xy_matrix[1] = 0; + multiply_mat(transform_matrix, xy_matrix, uv_matrix, TRANSFORM_MAT_DIM, + TRANSFORM_MAT_DIM, 1); + UPDATELIMITS(uv_matrix[0], uv_matrix[1], x_min, x_max, y_min, y_max); + + xy_matrix[0] = width; + xy_matrix[1] = height; + multiply_mat(transform_matrix, xy_matrix, uv_matrix, TRANSFORM_MAT_DIM, + TRANSFORM_MAT_DIM, 1); + UPDATELIMITS(uv_matrix[0], uv_matrix[1], x_min, x_max, y_min, y_max); + + xy_matrix[0] = 0; + xy_matrix[1] = height; + multiply_mat(transform_matrix, xy_matrix, uv_matrix, TRANSFORM_MAT_DIM, + TRANSFORM_MAT_DIM, 1); + UPDATELIMITS(uv_matrix[0], uv_matrix[1], x_min, x_max, y_min, y_max); + +#undef UPDATELIMITS +} + +// Finds x and y limits for arrays. Also finds the overall max and minimums +static void find_limits(int width, int height, const double **const params, + int num_frames, int *x_min, int *x_max, int *y_min, + int *y_max, int *pano_x_min, int *pano_x_max, + int *pano_y_min, int *pano_y_max) { + *pano_x_max = INT_MIN; + *pano_x_min = INT_MAX; + *pano_y_max = INT_MIN; + *pano_y_min = INT_MAX; + for (int i = 0; i < num_frames; ++i) { + find_frame_limit(width, height, (const double *const)params[i], &x_min[i], + &x_max[i], &y_min[i], &y_max[i]); + if (x_max[i] > *pano_x_max) { + *pano_x_max = x_max[i]; + } + if (x_min[i] < *pano_x_min) { + *pano_x_min = x_min[i]; + } + if (y_max[i] > *pano_y_max) { + *pano_y_max = y_max[i]; + } + if (y_min[i] < *pano_y_min) { + *pano_y_min = y_min[i]; + } + } +} + +// Inverts a 3x3 matrix that is in the parameter form. +static void invert_params(const double *const params, double *target) { + double temp[MAX_PARAMDIM] = { 0 }; + params_to_matrix(params, temp); + + // Find determinant of matrix (expansion by minors). + const double det = temp[0] * ((temp[4] * temp[8]) - (temp[5] * temp[7])) - + temp[1] * ((temp[3] * temp[8]) - (temp[5] * temp[6])) + + temp[2] * ((temp[3] * temp[7]) - (temp[4] * temp[6])); + assert(det != 0); + + // inverse is transpose of cofactor * 1/det. + double inverse[MAX_PARAMDIM] = { 0 }; + inverse[0] = (temp[4] * temp[8] - temp[7] * temp[5]) / det; + inverse[1] = (temp[2] * temp[7] - temp[1] * temp[8]) / det; + inverse[2] = (temp[1] * temp[5] - temp[2] * temp[4]) / det; + inverse[3] = (temp[5] * temp[6] - temp[3] * temp[8]) / det; + inverse[4] = (temp[0] * temp[8] - temp[2] * temp[6]) / det; + inverse[5] = (temp[3] * temp[2] - temp[0] * temp[5]) / det; + inverse[6] = (temp[3] * temp[7] - temp[6] * temp[4]) / det; + inverse[7] = (temp[6] * temp[1] - temp[0] * temp[7]) / det; + inverse[8] = (temp[0] * temp[4] - temp[3] * temp[1]) / det; + + matrix_to_params(inverse, target); +} + +#if BGSPRITE_BLENDING_MODE == 0 +// swaps two YuvPixels. +static void swap_yuv(YuvPixel *a, YuvPixel *b) { + const YuvPixel temp = *b; + *b = *a; + *a = temp; +} + +// Partitions array to find pivot index in qselect. +static int partition(YuvPixel arr[], int left, int right, int pivot_idx) { + YuvPixel pivot = arr[pivot_idx]; + + // Move pivot to the end. + swap_yuv(&arr[pivot_idx], &arr[right]); + + int p_idx = left; + for (int i = left; i < right; ++i) { + if (arr[i].y <= pivot.y) { + swap_yuv(&arr[i], &arr[p_idx]); + p_idx++; + } + } + + swap_yuv(&arr[p_idx], &arr[right]); + + return p_idx; +} + +// Returns the kth element in array, partially sorted in place (quickselect). +static YuvPixel qselect(YuvPixel arr[], int left, int right, int k) { + if (left >= right) { + return arr[left]; + } + unsigned int seed = (int)time(NULL); + int pivot_idx = left + rand_r(&seed) % (right - left + 1); + pivot_idx = partition(arr, left, right, pivot_idx); + + if (k == pivot_idx) { + return arr[k]; + } else if (k < pivot_idx) { + return qselect(arr, left, pivot_idx - 1, k); + } else { + return qselect(arr, pivot_idx + 1, right, k); + } +} +#endif // BGSPRITE_BLENDING_MODE == 0 + +// Stitches images together to create ARF and stores it in 'panorama'. +static void stitch_images(YV12_BUFFER_CONFIG **const frames, + const int num_frames, const int center_idx, + const double **const params, const int *const x_min, + const int *const x_max, const int *const y_min, + const int *const y_max, int pano_x_min, + int pano_x_max, int pano_y_min, int pano_y_max, + YV12_BUFFER_CONFIG *panorama) { + const int width = pano_x_max - pano_x_min + 1; + const int height = pano_y_max - pano_y_min + 1; + + // Create temp_pano[y][x][num_frames] stack of pixel values + YuvPixel ***temp_pano = aom_malloc(height * sizeof(*temp_pano)); + for (int i = 0; i < height; ++i) { + temp_pano[i] = aom_malloc(width * sizeof(**temp_pano)); + for (int j = 0; j < width; ++j) { + temp_pano[i][j] = aom_malloc(num_frames * sizeof(***temp_pano)); + } + } + // Create count[y][x] to count how many values in stack for median filtering + int **count = aom_malloc(height * sizeof(*count)); + for (int i = 0; i < height; ++i) { + count[i] = aom_calloc(width, sizeof(**count)); // counts initialized to 0 + } + + // Re-sample images onto panorama (pre-median filtering). + const int x_offset = -pano_x_min; + const int y_offset = -pano_y_min; + const int frame_width = frames[0]->y_width; + const int frame_height = frames[0]->y_height; + for (int i = 0; i < num_frames; ++i) { + // Find transforms from panorama coordinate system back to single image + // coordinate system for sampling. + int transformed_width = x_max[i] - x_min[i] + 1; + int transformed_height = y_max[i] - y_min[i] + 1; + + double transform_matrix[MAX_PARAMDIM]; + double transform_params[MAX_PARAMDIM - 1]; + invert_params(params[i], transform_params); + params_to_matrix(transform_params, transform_matrix); + +#if CONFIG_HIGHBITDEPTH + const uint16_t *y_buffer16 = CONVERT_TO_SHORTPTR(frames[i]->y_buffer); + const uint16_t *u_buffer16 = CONVERT_TO_SHORTPTR(frames[i]->u_buffer); + const uint16_t *v_buffer16 = CONVERT_TO_SHORTPTR(frames[i]->v_buffer); +#endif // CONFIG_HIGHBITDEPTH + + for (int y = 0; y < transformed_height; ++y) { + for (int x = 0; x < transformed_width; ++x) { + // Do transform. + double xy_matrix[3] = { x + x_min[i], y + y_min[i], 1 }; + double uv_matrix[3] = { 0 }; + multiply_mat(transform_matrix, xy_matrix, uv_matrix, TRANSFORM_MAT_DIM, + TRANSFORM_MAT_DIM, 1); + + // Coordinates used for nearest neighbor interpolation. + int image_x = (int)round(uv_matrix[0]); + int image_y = (int)round(uv_matrix[1]); + + // Temporary values for bilinear interpolation + double interpolated_yvalue = 0.0; + double interpolated_uvalue = 0.0; + double interpolated_vvalue = 0.0; + double interpolated_fraction = 0.0; + int interpolation_count = 0; + +#if BGSPRITE_INTERPOLATION == 1 + // Coordintes used for bilinear interpolation. + double x_base; + double y_base; + double x_decimal = modf(uv_matrix[0], &x_base); + double y_decimal = modf(uv_matrix[1], &y_base); + + if ((x_decimal > 0.2 && x_decimal < 0.8) || + (y_decimal > 0.2 && y_decimal < 0.8)) { + for (int u = 0; u < 2; ++u) { + for (int v = 0; v < 2; ++v) { + int interp_x = (int)x_base + u; + int interp_y = (int)y_base + v; + if (interp_x >= 0 && interp_x < frame_width && interp_y >= 0 && + interp_y < frame_height) { + interpolation_count++; + + interpolated_fraction += + fabs(u - x_decimal) * fabs(v - y_decimal); + int ychannel_idx = interp_y * frames[i]->y_stride + interp_x; + int uvchannel_idx = (interp_y >> frames[i]->subsampling_y) * + frames[i]->uv_stride + + (interp_x >> frames[i]->subsampling_x); +#if CONFIG_HIGHBITDEPTH + if (frames[i]->flags & YV12_FLAG_HIGHBITDEPTH) { + interpolated_yvalue += (1 - fabs(u - x_decimal)) * + (1 - fabs(v - y_decimal)) * + y_buffer16[ychannel_idx]; + interpolated_uvalue += (1 - fabs(u - x_decimal)) * + (1 - fabs(v - y_decimal)) * + u_buffer16[uvchannel_idx]; + interpolated_vvalue += (1 - fabs(u - x_decimal)) * + (1 - fabs(v - y_decimal)) * + v_buffer16[uvchannel_idx]; + } else { +#endif // CONFIG_HIGHBITDEPTH + interpolated_yvalue += (1 - fabs(u - x_decimal)) * + (1 - fabs(v - y_decimal)) * + frames[i]->y_buffer[ychannel_idx]; + interpolated_uvalue += (1 - fabs(u - x_decimal)) * + (1 - fabs(v - y_decimal)) * + frames[i]->u_buffer[uvchannel_idx]; + interpolated_vvalue += (1 - fabs(u - x_decimal)) * + (1 - fabs(v - y_decimal)) * + frames[i]->v_buffer[uvchannel_idx]; +#if CONFIG_HIGHBITDEPTH + } +#endif // CONFIG_HIGHBITDEPTH + } + } + } + } +#endif // BGSPRITE_INTERPOLATION == 1 + + if (BGSPRITE_INTERPOLATION && interpolation_count > 2) { + if (interpolation_count != 4) { + interpolated_yvalue /= interpolated_fraction; + interpolated_uvalue /= interpolated_fraction; + interpolated_vvalue /= interpolated_fraction; + } + int pano_x = x + x_min[i] + x_offset; + int pano_y = y + y_min[i] + y_offset; + +#if CONFIG_HIGHBITDEPTH + if (frames[i]->flags & YV12_FLAG_HIGHBITDEPTH) { + temp_pano[pano_y][pano_x][count[pano_y][pano_x]].y = + (uint16_t)interpolated_yvalue; + temp_pano[pano_y][pano_x][count[pano_y][pano_x]].u = + (uint16_t)interpolated_uvalue; + temp_pano[pano_y][pano_x][count[pano_y][pano_x]].v = + (uint16_t)interpolated_vvalue; + } else { +#endif // CONFIG_HIGHBITDEPTH + temp_pano[pano_y][pano_x][count[pano_y][pano_x]].y = + (uint8_t)interpolated_yvalue; + temp_pano[pano_y][pano_x][count[pano_y][pano_x]].u = + (uint8_t)interpolated_uvalue; + temp_pano[pano_y][pano_x][count[pano_y][pano_x]].v = + (uint8_t)interpolated_vvalue; +#if CONFIG_HIGHBITDEPTH + } +#endif // CONFIG_HIGHBITDEPTH + ++count[pano_y][pano_x]; + } else if (image_x >= 0 && image_x < frame_width && image_y >= 0 && + image_y < frame_height) { + // Place in panorama stack. + int pano_x = x + x_min[i] + x_offset; + int pano_y = y + y_min[i] + y_offset; + + int ychannel_idx = image_y * frames[i]->y_stride + image_x; + int uvchannel_idx = + (image_y >> frames[i]->subsampling_y) * frames[i]->uv_stride + + (image_x >> frames[i]->subsampling_x); +#if CONFIG_HIGHBITDEPTH + if (frames[i]->flags & YV12_FLAG_HIGHBITDEPTH) { + temp_pano[pano_y][pano_x][count[pano_y][pano_x]].y = + y_buffer16[ychannel_idx]; + temp_pano[pano_y][pano_x][count[pano_y][pano_x]].u = + u_buffer16[uvchannel_idx]; + temp_pano[pano_y][pano_x][count[pano_y][pano_x]].v = + v_buffer16[uvchannel_idx]; + } else { +#endif // CONFIG_HIGHBITDEPTH + temp_pano[pano_y][pano_x][count[pano_y][pano_x]].y = + frames[i]->y_buffer[ychannel_idx]; + temp_pano[pano_y][pano_x][count[pano_y][pano_x]].u = + frames[i]->u_buffer[uvchannel_idx]; + temp_pano[pano_y][pano_x][count[pano_y][pano_x]].v = + frames[i]->v_buffer[uvchannel_idx]; +#if CONFIG_HIGHBITDEPTH + } +#endif // CONFIG_HIGHBITDEPTH + ++count[pano_y][pano_x]; + } + } + } + } + +#if BGSPRITE_BLENDING_MODE == 1 + // Apply mean filtering and store result in temp_pano[y][x][0]. + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + if (count[y][x] == 0) { + // Just make the pixel black. + // TODO(toddnguyen): Color the pixel with nearest neighbor + } else { + // Find + uint32_t y_sum = 0; + uint32_t u_sum = 0; + uint32_t v_sum = 0; + for (int i = 0; i < count[y][x]; ++i) { + y_sum += temp_pano[y][x][i].y; + u_sum += temp_pano[y][x][i].u; + v_sum += temp_pano[y][x][i].v; + } + + const uint32_t unsigned_count = (uint32_t)count[y][x]; + +#if CONFIG_HIGHBITDEPTH + if (panorama->flags & YV12_FLAG_HIGHBITDEPTH) { + temp_pano[y][x][0].y = (uint16_t)OD_DIVU(y_sum, unsigned_count); + temp_pano[y][x][0].u = (uint16_t)OD_DIVU(u_sum, unsigned_count); + temp_pano[y][x][0].v = (uint16_t)OD_DIVU(v_sum, unsigned_count); + } else { +#endif // CONFIG_HIGHBITDEPTH + temp_pano[y][x][0].y = (uint8_t)OD_DIVU(y_sum, unsigned_count); + temp_pano[y][x][0].u = (uint8_t)OD_DIVU(u_sum, unsigned_count); + temp_pano[y][x][0].v = (uint8_t)OD_DIVU(v_sum, unsigned_count); +#if CONFIG_HIGHBITDEPTH + } +#endif // CONFIG_HIGHBITDEPTH + } + } + } +#else + // Apply median filtering using quickselect. + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + if (count[y][x] == 0) { + // Just make the pixel black. + // TODO(toddnguyen): Color the pixel with nearest neighbor + } else { + // Find + const int median_idx = (int)floor(count[y][x] / 2); + YuvPixel median = + qselect(temp_pano[y][x], 0, count[y][x] - 1, median_idx); + + // Make the median value the 0th index for UV subsampling later + temp_pano[y][x][0] = median; + assert(median.y == temp_pano[y][x][0].y && + median.u == temp_pano[y][x][0].u && + median.v == temp_pano[y][x][0].v); + } + } + } +#endif // BGSPRITE_BLENDING_MODE == 1 + + // NOTE(toddnguyen): Right now the ARF in the cpi struct is fixed size at + // the same size as the frames. For now, we crop the generated panorama. + // assert(panorama->y_width < width && panorama->y_height < height); + const int crop_x_offset = x_min[center_idx] + x_offset; + const int crop_y_offset = y_min[center_idx] + y_offset; + +#if CONFIG_HIGHBITDEPTH + if (panorama->flags & YV12_FLAG_HIGHBITDEPTH) { + // Use median Y value. + uint16_t *pano_y_buffer16 = CONVERT_TO_SHORTPTR(panorama->y_buffer); + for (int y = 0; y < panorama->y_height; ++y) { + for (int x = 0; x < panorama->y_width; ++x) { + const int ychannel_idx = y * panorama->y_stride + x; + if (count[y + crop_y_offset][x + crop_x_offset] > 0) { + pano_y_buffer16[ychannel_idx] = + temp_pano[y + crop_y_offset][x + crop_x_offset][0].y; + } else { + pano_y_buffer16[ychannel_idx] = 0; + } + } + } + + // UV subsampling with median UV values + uint16_t *pano_u_buffer16 = CONVERT_TO_SHORTPTR(panorama->u_buffer); + uint16_t *pano_v_buffer16 = CONVERT_TO_SHORTPTR(panorama->v_buffer); + + for (int y = 0; y < panorama->uv_height; ++y) { + for (int x = 0; x < panorama->uv_width; ++x) { + uint32_t avg_count = 0; + uint32_t u_sum = 0; + uint32_t v_sum = 0; + + // Look at surrounding pixels for subsampling + for (int s_x = 0; s_x < panorama->subsampling_x + 1; ++s_x) { + for (int s_y = 0; s_y < panorama->subsampling_y + 1; ++s_y) { + int y_sample = crop_y_offset + (y << panorama->subsampling_y) + s_y; + int x_sample = crop_x_offset + (x << panorama->subsampling_x) + s_x; + if (y_sample > 0 && y_sample < height && x_sample > 0 && + x_sample < width && count[y_sample][x_sample] > 0) { + u_sum += temp_pano[y_sample][x_sample][0].u; + v_sum += temp_pano[y_sample][x_sample][0].v; + avg_count++; + } + } + } + + const int uvchannel_idx = y * panorama->uv_stride + x; + if (avg_count != 0) { + pano_u_buffer16[uvchannel_idx] = (uint16_t)OD_DIVU(u_sum, avg_count); + pano_v_buffer16[uvchannel_idx] = (uint16_t)OD_DIVU(v_sum, avg_count); + } else { + pano_u_buffer16[uvchannel_idx] = 0; + pano_v_buffer16[uvchannel_idx] = 0; + } + } + } + } else { +#endif // CONFIG_HIGHBITDEPTH + // Use median Y value. + for (int y = 0; y < panorama->y_height; ++y) { + for (int x = 0; x < panorama->y_width; ++x) { + const int ychannel_idx = y * panorama->y_stride + x; + if (count[y + crop_y_offset][x + crop_x_offset] > 0) { + panorama->y_buffer[ychannel_idx] = + temp_pano[y + crop_y_offset][x + crop_x_offset][0].y; + } else { + panorama->y_buffer[ychannel_idx] = 0; + } + } + } + + // UV subsampling with median UV values + for (int y = 0; y < panorama->uv_height; ++y) { + for (int x = 0; x < panorama->uv_width; ++x) { + uint16_t avg_count = 0; + uint16_t u_sum = 0; + uint16_t v_sum = 0; + + // Look at surrounding pixels for subsampling + for (int s_x = 0; s_x < panorama->subsampling_x + 1; ++s_x) { + for (int s_y = 0; s_y < panorama->subsampling_y + 1; ++s_y) { + int y_sample = crop_y_offset + (y << panorama->subsampling_y) + s_y; + int x_sample = crop_x_offset + (x << panorama->subsampling_x) + s_x; + if (y_sample > 0 && y_sample < height && x_sample > 0 && + x_sample < width && count[y_sample][x_sample] > 0) { + u_sum += temp_pano[y_sample][x_sample][0].u; + v_sum += temp_pano[y_sample][x_sample][0].v; + avg_count++; + } + } + } + + const int uvchannel_idx = y * panorama->uv_stride + x; + if (avg_count != 0) { + panorama->u_buffer[uvchannel_idx] = + (uint8_t)OD_DIVU(u_sum, avg_count); + panorama->v_buffer[uvchannel_idx] = + (uint8_t)OD_DIVU(v_sum, avg_count); + } else { + panorama->u_buffer[uvchannel_idx] = 0; + panorama->v_buffer[uvchannel_idx] = 0; + } + } + } +#if CONFIG_HIGHBITDEPTH + } +#endif // CONFIG_HIGHBITDEPTH + + for (int i = 0; i < height; ++i) { + for (int j = 0; j < width; ++j) { + aom_free(temp_pano[i][j]); + } + aom_free(temp_pano[i]); + aom_free(count[i]); + } + aom_free(count); + aom_free(temp_pano); +} + +int av1_background_sprite(AV1_COMP *cpi, int distance) { + YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL }; + static const double identity_params[MAX_PARAMDIM - 1] = { + 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0 + }; + + const int frames_after_arf = + av1_lookahead_depth(cpi->lookahead) - distance - 1; + int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1; + int frames_bwd; + + // Define the forward and backwards filter limits for this arnr group. + if (frames_fwd > frames_after_arf) frames_fwd = frames_after_arf; + if (frames_fwd > distance) frames_fwd = distance; + frames_bwd = frames_fwd; + +#if CONFIG_EXT_REFS + const GF_GROUP *const gf_group = &cpi->twopass.gf_group; + if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW) { + cpi->alt_ref_buffer = av1_lookahead_peek(cpi->lookahead, distance)->img; + cpi->is_arf_filter_off[gf_group->arf_update_idx[gf_group->index]] = 1; + frames_fwd = 0; + frames_bwd = 0; + } else { + cpi->is_arf_filter_off[gf_group->arf_update_idx[gf_group->index]] = 0; + } +#endif // CONFIG_EXT_REFS + + const int start_frame = distance + frames_fwd; + const int frames_to_stitch = frames_bwd + 1 + frames_fwd; + + // Get frames to be included in background sprite. + for (int frame = 0; frame < frames_to_stitch; ++frame) { + const int which_buffer = start_frame - frame; + struct lookahead_entry *buf = + av1_lookahead_peek(cpi->lookahead, which_buffer); + frames[frames_to_stitch - 1 - frame] = &buf->img; + } + + YV12_BUFFER_CONFIG temp_bg; + memset(&temp_bg, 0, sizeof(temp_bg)); + aom_alloc_frame_buffer(&temp_bg, frames[0]->y_width, frames[0]->y_height, + frames[0]->subsampling_x, frames[0]->subsampling_y, +#if CONFIG_HIGHBITDEPTH + frames[0]->flags & YV12_FLAG_HIGHBITDEPTH, +#endif + frames[0]->border, 0); + aom_yv12_copy_frame(frames[0], &temp_bg); + temp_bg.bit_depth = frames[0]->bit_depth; + + // Allocate empty arrays for parameters between frames. + double **params = aom_malloc(frames_to_stitch * sizeof(*params)); + for (int i = 0; i < frames_to_stitch; ++i) { + params[i] = aom_malloc(sizeof(identity_params)); + memcpy(params[i], identity_params, sizeof(identity_params)); + } + + // Use global motion to find affine transformations between frames. + // params[i] will have the transform from frame[i] to frame[i-1]. + // params[0] will have the identity matrix because it has no previous frame. + TransformationType model = AFFINE; + int inliers_by_motion[RANSAC_NUM_MOTIONS]; + for (int frame = 0; frame < frames_to_stitch - 1; ++frame) { + const int global_motion_ret = compute_global_motion_feature_based( + model, frames[frame + 1], frames[frame], +#if CONFIG_HIGHBITDEPTH + cpi->common.bit_depth, +#endif // CONFIG_HIGHBITDEPTH + inliers_by_motion, params[frame + 1], RANSAC_NUM_MOTIONS); + + // Quit if global motion had an error. + if (global_motion_ret == 0) { + for (int i = 0; i < frames_to_stitch; ++i) { + aom_free(params[i]); + } + aom_free(params); + return 1; + } + } + + // Compound the transformation parameters. + for (int i = 1; i < frames_to_stitch; ++i) { + multiply_params(params[i - 1], params[i], params[i]); + } + + // Compute frame limits for final stitched images. + int pano_x_max = INT_MIN; + int pano_x_min = INT_MAX; + int pano_y_max = INT_MIN; + int pano_y_min = INT_MAX; + int *x_max = aom_malloc(frames_to_stitch * sizeof(*x_max)); + int *x_min = aom_malloc(frames_to_stitch * sizeof(*x_min)); + int *y_max = aom_malloc(frames_to_stitch * sizeof(*y_max)); + int *y_min = aom_malloc(frames_to_stitch * sizeof(*y_min)); + + find_limits(cpi->initial_width, cpi->initial_height, + (const double **const)params, frames_to_stitch, x_min, x_max, + y_min, y_max, &pano_x_min, &pano_x_max, &pano_y_min, &pano_y_max); + + // Center panorama on the ARF. + const int center_idx = frames_bwd; + assert(center_idx >= 0 && center_idx < frames_to_stitch); + + // Recompute transformations to adjust to center image. + // Invert center image's transform. + double inverse[MAX_PARAMDIM - 1] = { 0 }; + invert_params(params[center_idx], inverse); + + // Multiply the inverse to all transformation parameters. + for (int i = 0; i < frames_to_stitch; ++i) { + multiply_params(inverse, params[i], params[i]); + } + + // Recompute frame limits for new adjusted center. + find_limits(cpi->initial_width, cpi->initial_height, + (const double **const)params, frames_to_stitch, x_min, x_max, + y_min, y_max, &pano_x_min, &pano_x_max, &pano_y_min, &pano_y_max); + + // Stitch Images. + stitch_images(frames, frames_to_stitch, center_idx, + (const double **const)params, x_min, x_max, y_min, y_max, + pano_x_min, pano_x_max, pano_y_min, pano_y_max, &temp_bg); + + // Apply temporal filter. + av1_temporal_filter(cpi, &temp_bg, distance); + + // Free memory. + aom_free_frame_buffer(&temp_bg); + for (int i = 0; i < frames_to_stitch; ++i) { + aom_free(params[i]); + } + aom_free(params); + aom_free(x_max); + aom_free(x_min); + aom_free(y_max); + aom_free(y_min); + + return 0; +} diff --git a/third_party/aom/av1/encoder/bgsprite.h b/third_party/aom/av1/encoder/bgsprite.h new file mode 100644 index 000000000..711b00e40 --- /dev/null +++ b/third_party/aom/av1/encoder/bgsprite.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef AV1_ENCODER_BGSPRITE_H_ +#define AV1_ENCODER_BGSPRITE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "av1/encoder/encoder.h" + +// Creates alternate reference frame staring from source image + frames up to +// 'distance' past source frame. +// Returns 0 on success and 1 on failure. +int av1_background_sprite(AV1_COMP *cpi, int distance); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // AV1_ENCODER_BGSPRITE_H_ diff --git a/third_party/aom/av1/encoder/bitstream.c b/third_party/aom/av1/encoder/bitstream.c index f8378b14d..2e0abc186 100644 --- a/third_party/aom/av1/encoder/bitstream.c +++ b/third_party/aom/av1/encoder/bitstream.c @@ -26,7 +26,6 @@ #if CONFIG_CDEF #include "av1/common/cdef.h" -#include "av1/common/clpf.h" #endif // CONFIG_CDEF #include "av1/common/entropy.h" #include "av1/common/entropymode.h" @@ -61,21 +60,12 @@ #include "av1/encoder/pvq_encoder.h" #endif -static struct av1_token intra_mode_encodings[INTRA_MODES]; -static struct av1_token switchable_interp_encodings[SWITCHABLE_FILTERS]; -static struct av1_token partition_encodings[PARTITION_TYPES]; -#if CONFIG_EXT_INTER -static const struct av1_token - inter_compound_mode_encodings[INTER_COMPOUND_MODES] = { - { 2, 2 }, { 12, 4 }, { 52, 6 }, { 53, 6 }, - { 54, 6 }, { 55, 6 }, { 0, 1 }, { 7, 3 } - }; -#endif // CONFIG_EXT_INTER -#if CONFIG_PALETTE -static struct av1_token palette_size_encodings[PALETTE_SIZES]; -static struct av1_token palette_color_index_encodings[PALETTE_SIZES] - [PALETTE_COLORS]; -#endif // CONFIG_PALETTE +#define ENC_MISMATCH_DEBUG 0 + +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF +static struct av1_token + inter_singleref_comp_mode_encodings[INTER_SINGLEREF_COMP_MODES]; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF #if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE static INLINE void write_uniform(aom_writer *w, int n, int v) { @@ -97,9 +87,6 @@ static struct av1_token ext_tx_intra_encodings[EXT_TX_SETS_INTRA][TX_TYPES]; #else static struct av1_token ext_tx_encodings[TX_TYPES]; #endif // CONFIG_EXT_TX -#if CONFIG_GLOBAL_MOTION -static struct av1_token global_motion_types_encodings[GLOBAL_TRANS_TYPES]; -#endif // CONFIG_GLOBAL_MOTION #if CONFIG_EXT_INTRA #if CONFIG_INTRA_INTERP static struct av1_token intra_filter_encodings[INTRA_FILTERS]; @@ -114,7 +101,9 @@ static struct av1_token compound_type_encodings[COMPOUND_TYPES]; #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE #endif // CONFIG_EXT_INTER #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION -static struct av1_token motion_mode_encodings[MOTION_MODES]; +#if CONFIG_NCOBMC_ADAPT_WEIGHT +static struct av1_token ncobmc_mode_encodings[MAX_NCOBMC_MODES]; +#endif #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION #if CONFIG_LOOP_RESTORATION static struct av1_token switchable_restore_encodings[RESTORE_SWITCHABLE_TYPES]; @@ -129,9 +118,9 @@ static int remux_tiles(const AV1_COMMON *const cm, uint8_t *dst, int *const tile_col_size_bytes); void av1_encode_token_init(void) { -#if CONFIG_EXT_TX || CONFIG_PALETTE +#if CONFIG_EXT_TX int s; -#endif // CONFIG_EXT_TX || CONFIG_PALETTE +#endif // CONFIG_EXT_TX #if CONFIG_EXT_TX for (s = 1; s < EXT_TX_SETS_INTER; ++s) { av1_tokens_from_tree(ext_tx_inter_encodings[s], av1_ext_tx_inter_tree[s]); @@ -142,17 +131,6 @@ void av1_encode_token_init(void) { #else av1_tokens_from_tree(ext_tx_encodings, av1_ext_tx_tree); #endif // CONFIG_EXT_TX - av1_tokens_from_tree(intra_mode_encodings, av1_intra_mode_tree); - av1_tokens_from_tree(switchable_interp_encodings, av1_switchable_interp_tree); - av1_tokens_from_tree(partition_encodings, av1_partition_tree); - -#if CONFIG_PALETTE - av1_tokens_from_tree(palette_size_encodings, av1_palette_size_tree); - for (s = 0; s < PALETTE_SIZES; ++s) { - av1_tokens_from_tree(palette_color_index_encodings[s], - av1_palette_color_index_tree[s]); - } -#endif // CONFIG_PALETTE #if CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP av1_tokens_from_tree(intra_filter_encodings, av1_intra_filter_tree); @@ -161,17 +139,19 @@ void av1_encode_token_init(void) { #if CONFIG_INTERINTRA av1_tokens_from_tree(interintra_mode_encodings, av1_interintra_mode_tree); #endif // CONFIG_INTERINTRA +#if CONFIG_COMPOUND_SINGLEREF + av1_tokens_from_tree(inter_singleref_comp_mode_encodings, + av1_inter_singleref_comp_mode_tree); +#endif // CONFIG_COMPOUND_SINGLEREF #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE av1_tokens_from_tree(compound_type_encodings, av1_compound_type_tree); #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE #endif // CONFIG_EXT_INTER #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION - av1_tokens_from_tree(motion_mode_encodings, av1_motion_mode_tree); +#if CONFIG_NCOBMC_ADAPT_WEIGHT + av1_tokens_from_tree(ncobmc_mode_encodings, av1_ncobmc_mode_tree); +#endif #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION -#if CONFIG_GLOBAL_MOTION - av1_tokens_from_tree(global_motion_types_encodings, - av1_global_motion_types_tree); -#endif // CONFIG_GLOBAL_MOTION #if CONFIG_LOOP_RESTORATION av1_tokens_from_tree(switchable_restore_encodings, av1_switchable_restore_tree); @@ -195,10 +175,6 @@ void av1_encode_token_init(void) { #else av1_indices_from_tree(av1_ext_tx_ind, av1_ext_tx_inv, av1_ext_tx_tree); #endif - av1_indices_from_tree(av1_intra_mode_ind, av1_intra_mode_inv, - av1_intra_mode_tree); - av1_indices_from_tree(av1_inter_mode_ind, av1_inter_mode_inv, - av1_inter_mode_tree); } static void write_intra_mode_kf(const AV1_COMMON *cm, FRAME_CONTEXT *frame_ctx, @@ -214,65 +190,72 @@ static void write_intra_mode_kf(const AV1_COMMON *cm, FRAME_CONTEXT *frame_ctx, (void)cm; } -#if CONFIG_EXT_INTER && CONFIG_INTERINTRA -static void write_interintra_mode(aom_writer *w, INTERINTRA_MODE mode, - const aom_prob *probs) { - av1_write_token(w, av1_interintra_mode_tree, probs, - &interintra_mode_encodings[mode]); -} -#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA - static void write_inter_mode(aom_writer *w, PREDICTION_MODE mode, FRAME_CONTEXT *ec_ctx, const int16_t mode_ctx) { const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK; - const aom_prob newmv_prob = ec_ctx->newmv_prob[newmv_ctx]; - aom_write(w, mode != NEWMV, newmv_prob); +#if CONFIG_NEW_MULTISYMBOL + aom_write_symbol(w, mode != NEWMV, ec_ctx->newmv_cdf[newmv_ctx], 2); +#else + aom_write(w, mode != NEWMV, ec_ctx->newmv_prob[newmv_ctx]); +#endif if (mode != NEWMV) { - const int16_t zeromv_ctx = (mode_ctx >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK; - const aom_prob zeromv_prob = ec_ctx->zeromv_prob[zeromv_ctx]; - if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) { assert(mode == ZEROMV); return; } - aom_write(w, mode != ZEROMV, zeromv_prob); + const int16_t zeromv_ctx = (mode_ctx >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK; +#if CONFIG_NEW_MULTISYMBOL + aom_write_symbol(w, mode != ZEROMV, ec_ctx->zeromv_cdf[zeromv_ctx], 2); +#else + aom_write(w, mode != ZEROMV, ec_ctx->zeromv_prob[zeromv_ctx]); +#endif if (mode != ZEROMV) { int16_t refmv_ctx = (mode_ctx >> REFMV_OFFSET) & REFMV_CTX_MASK; - aom_prob refmv_prob; if (mode_ctx & (1 << SKIP_NEARESTMV_OFFSET)) refmv_ctx = 6; if (mode_ctx & (1 << SKIP_NEARMV_OFFSET)) refmv_ctx = 7; if (mode_ctx & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) refmv_ctx = 8; - - refmv_prob = ec_ctx->refmv_prob[refmv_ctx]; - aom_write(w, mode != NEARESTMV, refmv_prob); +#if CONFIG_NEW_MULTISYMBOL + aom_write_symbol(w, mode != NEARESTMV, ec_ctx->refmv_cdf[refmv_ctx], 2); +#else + aom_write(w, mode != NEARESTMV, ec_ctx->refmv_prob[refmv_ctx]); +#endif } } } -static void write_drl_idx(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi, +static void write_drl_idx(FRAME_CONTEXT *ec_ctx, const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext, aom_writer *w) { uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); assert(mbmi->ref_mv_idx < 3); #if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV || + mbmi->mode == SR_NEW_NEWMV) { +#else // !CONFIG_COMPOUND_SINGLEREF if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) { -#else +#endif // CONFIG_COMPOUND_SINGLEREF +#else // !CONFIG_EXT_INTER if (mbmi->mode == NEWMV) { -#endif +#endif // CONFIG_EXT_INTER int idx; for (idx = 0; idx < 2; ++idx) { if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); - aom_prob drl_prob = cm->fc->drl_prob[drl_ctx]; - aom_write(w, mbmi->ref_mv_idx != idx, drl_prob); +#if CONFIG_NEW_MULTISYMBOL + aom_write_symbol(w, mbmi->ref_mv_idx != idx, ec_ctx->drl_cdf[drl_ctx], + 2); +#else + aom_write(w, mbmi->ref_mv_idx != idx, ec_ctx->drl_prob[drl_ctx]); +#endif if (mbmi->ref_mv_idx == idx) return; } } @@ -286,9 +269,12 @@ static void write_drl_idx(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi, if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); - aom_prob drl_prob = cm->fc->drl_prob[drl_ctx]; - - aom_write(w, mbmi->ref_mv_idx != (idx - 1), drl_prob); +#if CONFIG_NEW_MULTISYMBOL + aom_write_symbol(w, mbmi->ref_mv_idx != (idx - 1), + ec_ctx->drl_cdf[drl_ctx], 2); +#else + aom_write(w, mbmi->ref_mv_idx != (idx - 1), ec_ctx->drl_prob[drl_ctx]); +#endif if (mbmi->ref_mv_idx == (idx - 1)) return; } } @@ -297,16 +283,28 @@ static void write_drl_idx(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi, } #if CONFIG_EXT_INTER -static void write_inter_compound_mode(AV1_COMMON *cm, aom_writer *w, - PREDICTION_MODE mode, +static void write_inter_compound_mode(AV1_COMMON *cm, MACROBLOCKD *xd, + aom_writer *w, PREDICTION_MODE mode, const int16_t mode_ctx) { - const aom_prob *const inter_compound_probs = - cm->fc->inter_compound_mode_probs[mode_ctx]; - assert(is_inter_compound_mode(mode)); - av1_write_token(w, av1_inter_compound_mode_tree, inter_compound_probs, - &inter_compound_mode_encodings[INTER_COMPOUND_OFFSET(mode)]); + (void)cm; + aom_write_symbol(w, INTER_COMPOUND_OFFSET(mode), + xd->tile_ctx->inter_compound_mode_cdf[mode_ctx], + INTER_COMPOUND_MODES); +} + +#if CONFIG_COMPOUND_SINGLEREF +static void write_inter_singleref_comp_mode(MACROBLOCKD *xd, aom_writer *w, + PREDICTION_MODE mode, + const int16_t mode_ctx) { + assert(is_inter_singleref_comp_mode(mode)); + aom_cdf_prob *const inter_singleref_comp_cdf = + xd->tile_ctx->inter_singleref_comp_mode_cdf[mode_ctx]; + + aom_write_symbol(w, INTER_SINGLEREF_COMP_OFFSET(mode), + inter_singleref_comp_cdf, INTER_SINGLEREF_COMP_MODES); } +#endif // CONFIG_COMPOUND_SINGLEREF #endif // CONFIG_EXT_INTER static void encode_unsigned_max(struct aom_write_bit_buffer *wb, int data, @@ -314,11 +312,10 @@ static void encode_unsigned_max(struct aom_write_bit_buffer *wb, int data, aom_wb_write_literal(wb, data, get_unsigned_bits(max)); } -#if !CONFIG_EC_ADAPT || \ - (CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION || CONFIG_EXT_INTER) +#if CONFIG_NCOBMC_ADAPT_WEIGHT static void prob_diff_update(const aom_tree_index *tree, aom_prob probs[/*n - 1*/], - const unsigned int counts[/*n - 1*/], int n, + const unsigned int counts[/* n */], int n, int probwt, aom_writer *w) { int i; unsigned int branch_ct[32][2]; @@ -332,31 +329,15 @@ static void prob_diff_update(const aom_tree_index *tree, } #endif -#if CONFIG_EXT_INTER || !CONFIG_EC_ADAPT -static int prob_diff_update_savings(const aom_tree_index *tree, - aom_prob probs[/*n - 1*/], - const unsigned int counts[/*n - 1*/], int n, - int probwt) { - int i; - unsigned int branch_ct[32][2]; - int savings = 0; - - // Assuming max number of probabilities <= 32 - assert(n <= 32); - av1_tree_probs_from_distribution(tree, branch_ct, counts); - for (i = 0; i < n - 1; ++i) { - savings += - av1_cond_prob_diff_update_savings(&probs[i], branch_ct[i], probwt); - } - return savings; -} -#endif // CONFIG_EXT_INTER || !CONFIG_EC_ADAPT - #if CONFIG_VAR_TX -static void write_tx_size_vartx(const AV1_COMMON *cm, const MACROBLOCKD *xd, +static void write_tx_size_vartx(const AV1_COMMON *cm, MACROBLOCKD *xd, const MB_MODE_INFO *mbmi, TX_SIZE tx_size, int depth, int blk_row, int blk_col, aom_writer *w) { +#if CONFIG_NEW_MULTISYMBOL + FRAME_CONTEXT *ec_ctx = xd->tile_ctx; + (void)cm; +#endif const int tx_row = blk_row >> 1; const int tx_col = blk_col >> 1; const int max_blocks_high = max_block_high(xd, mbmi->sb_type, 0); @@ -374,16 +355,31 @@ static void write_tx_size_vartx(const AV1_COMMON *cm, const MACROBLOCKD *xd, return; } +#if CONFIG_RECT_TX_EXT + if (tx_size == mbmi->inter_tx_size[tx_row][tx_col] || + mbmi->tx_size == quarter_txsize_lookup[mbmi->sb_type]) { +#else if (tx_size == mbmi->inter_tx_size[tx_row][tx_col]) { +#endif +#if CONFIG_NEW_MULTISYMBOL + aom_write_symbol(w, 0, ec_ctx->txfm_partition_cdf[ctx], 2); +#else aom_write(w, 0, cm->fc->txfm_partition_prob[ctx]); +#endif + txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, tx_size, tx_size); + // TODO(yuec): set correct txfm partition update for qttx } else { const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; const int bsl = tx_size_wide_unit[sub_txs]; int i; +#if CONFIG_NEW_MULTISYMBOL + aom_write_symbol(w, 1, ec_ctx->txfm_partition_cdf[ctx], 2); +#else aom_write(w, 1, cm->fc->txfm_partition_prob[ctx]); +#endif if (tx_size == TX_8X8) { txfm_partition_update(xd->above_txfm_context + blk_col, @@ -401,6 +397,7 @@ static void write_tx_size_vartx(const AV1_COMMON *cm, const MACROBLOCKD *xd, } } +#if !CONFIG_NEW_MULTISYMBOL static void update_txfm_partition_probs(AV1_COMMON *cm, aom_writer *w, FRAME_COUNTS *counts, int probwt) { int k; @@ -408,18 +405,15 @@ static void update_txfm_partition_probs(AV1_COMMON *cm, aom_writer *w, av1_cond_prob_diff_update(w, &cm->fc->txfm_partition_prob[k], counts->txfm_partition[k], probwt); } +#endif // CONFIG_NEW_MULTISYMBOL #endif static void write_selected_tx_size(const AV1_COMMON *cm, const MACROBLOCKD *xd, aom_writer *w) { const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const BLOCK_SIZE bsize = mbmi->sb_type; -#if CONFIG_EC_ADAPT FRAME_CONTEXT *ec_ctx = xd->tile_ctx; (void)cm; -#else - FRAME_CONTEXT *ec_ctx = cm->fc; -#endif // For sub8x8 blocks the tx_size symbol does not need to be sent #if CONFIG_CB4X4 && (CONFIG_VAR_TX || CONFIG_EXT_TX) && CONFIG_RECT_TX if (bsize > BLOCK_4X4) { @@ -439,22 +433,19 @@ static void write_selected_tx_size(const AV1_COMMON *cm, const MACROBLOCKD *xd, aom_write_symbol(w, depth, ec_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx], tx_size_cat + 2); -#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX) if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size) aom_write(w, tx_size == quarter_txsize_lookup[bsize], cm->fc->quarter_tx_size_prob); -#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#endif } } +#if !CONFIG_NEW_MULTISYMBOL static void update_inter_mode_probs(AV1_COMMON *cm, aom_writer *w, FRAME_COUNTS *counts) { int i; -#if CONFIG_TILE_GROUPS const int probwt = cm->num_tg; -#else - const int probwt = 1; -#endif for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) av1_cond_prob_diff_update(w, &cm->fc->newmv_prob[i], counts->newmv_mode[i], probwt); @@ -468,31 +459,7 @@ static void update_inter_mode_probs(AV1_COMMON *cm, aom_writer *w, av1_cond_prob_diff_update(w, &cm->fc->drl_prob[i], counts->drl_mode[i], probwt); } - -#if CONFIG_EXT_INTER -static void update_inter_compound_mode_probs(AV1_COMMON *cm, int probwt, - aom_writer *w) { - const int savings_thresh = av1_cost_one(GROUP_DIFF_UPDATE_PROB) - - av1_cost_zero(GROUP_DIFF_UPDATE_PROB); - int i; - int savings = 0; - int do_update = 0; - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) { - savings += prob_diff_update_savings( - av1_inter_compound_mode_tree, cm->fc->inter_compound_mode_probs[i], - cm->counts.inter_compound_mode[i], INTER_COMPOUND_MODES, probwt); - } - do_update = savings > savings_thresh; - aom_write(w, do_update, GROUP_DIFF_UPDATE_PROB); - if (do_update) { - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) { - prob_diff_update( - av1_inter_compound_mode_tree, cm->fc->inter_compound_mode_probs[i], - cm->counts.inter_compound_mode[i], INTER_COMPOUND_MODES, probwt, w); - } - } -} -#endif // CONFIG_EXT_INTER +#endif static int write_skip(const AV1_COMMON *cm, const MACROBLOCKD *xd, int segment_id, const MODE_INFO *mi, aom_writer *w) { @@ -500,35 +467,100 @@ static int write_skip(const AV1_COMMON *cm, const MACROBLOCKD *xd, return 1; } else { const int skip = mi->mbmi.skip; +#if CONFIG_NEW_MULTISYMBOL + FRAME_CONTEXT *ec_ctx = xd->tile_ctx; + const int ctx = av1_get_skip_context(xd); + aom_write_symbol(w, skip, ec_ctx->skip_cdfs[ctx], 2); +#else aom_write(w, skip, av1_get_skip_prob(cm, xd)); +#endif return skip; } } +static void write_is_inter(const AV1_COMMON *cm, const MACROBLOCKD *xd, + int segment_id, aom_writer *w, const int is_inter) { + if (!segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { +#if CONFIG_NEW_MULTISYMBOL + FRAME_CONTEXT *ec_ctx = xd->tile_ctx; + const int ctx = av1_get_intra_inter_context(xd); + aom_write_symbol(w, is_inter, ec_ctx->intra_inter_cdf[ctx], 2); +#else + aom_write(w, is_inter, av1_get_intra_inter_prob(cm, xd)); +#endif + } +} + #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION -static void write_motion_mode(const AV1_COMMON *cm, const MODE_INFO *mi, - aom_writer *w) { +static void write_motion_mode(const AV1_COMMON *cm, MACROBLOCKD *xd, + const MODE_INFO *mi, aom_writer *w) { const MB_MODE_INFO *mbmi = &mi->mbmi; + +#if CONFIG_NCOBMC_ADAPT_WEIGHT + MOTION_MODE last_motion_mode_allowed = + motion_mode_allowed_wrapper(0, +#if CONFIG_GLOBAL_MOTION + 0, cm->global_motion, +#endif // CONFIG_GLOBAL_MOTION + mi); +#else MOTION_MODE last_motion_mode_allowed = motion_mode_allowed( -#if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#if CONFIG_GLOBAL_MOTION 0, cm->global_motion, -#endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#endif // CONFIG_GLOBAL_MOTION +#if CONFIG_WARPED_MOTION + xd, +#endif mi); - +#endif // CONFIG_NCOBMC_ADAPT_WEIGHT if (last_motion_mode_allowed == SIMPLE_TRANSLATION) return; #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION if (last_motion_mode_allowed == OBMC_CAUSAL) { +#if CONFIG_NEW_MULTISYMBOL + aom_write_symbol(w, mbmi->motion_mode == OBMC_CAUSAL, + xd->tile_ctx->obmc_cdf[mbmi->sb_type], 2); +#else aom_write(w, mbmi->motion_mode == OBMC_CAUSAL, cm->fc->obmc_prob[mbmi->sb_type]); +#endif } else { #endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION - av1_write_token(w, av1_motion_mode_tree, - cm->fc->motion_mode_prob[mbmi->sb_type], - &motion_mode_encodings[mbmi->motion_mode]); + aom_write_symbol(w, mbmi->motion_mode, + xd->tile_ctx->motion_mode_cdf[mbmi->sb_type], + MOTION_MODES); #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION } #endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION } + +#if CONFIG_NCOBMC_ADAPT_WEIGHT +static void write_ncobmc_mode(MACROBLOCKD *xd, const MODE_INFO *mi, + aom_writer *w) { + const MB_MODE_INFO *mbmi = &mi->mbmi; + ADAPT_OVERLAP_BLOCK ao_block = adapt_overlap_block_lookup[mbmi->sb_type]; + if (mbmi->motion_mode != NCOBMC_ADAPT_WEIGHT) return; + +#ifndef TRAINING_WEIGHTS + aom_write_symbol(w, mbmi->ncobmc_mode[0], + xd->tile_ctx->ncobmc_mode_cdf[ao_block], MAX_NCOBMC_MODES); + if (mi_size_wide[mbmi->sb_type] != mi_size_high[mbmi->sb_type]) { + aom_write_symbol(w, mbmi->ncobmc_mode[1], + xd->tile_ctx->ncobmc_mode_cdf[ao_block], MAX_NCOBMC_MODES); + } +#else + int block; + for (block = 0; block < 4; ++block) + aom_write_symbol(w, mbmi->ncobmc_mode[0][block], + xd->tile_ctx->ncobmc_mode_cdf[ao_block], MAX_NCOBMC_MODES); + if (mi_size_wide[mbmi->sb_type] != mi_size_high[mbmi->sb_type]) { + for (block = 0; block < 4; ++block) + aom_write_symbol(w, mbmi->ncobmc_mode[1][block], + xd->tile_ctx->ncobmc_mode_cdf[ao_block], + MAX_NCOBMC_MODES); + } +#endif +} +#endif #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION #if CONFIG_DELTA_Q @@ -538,13 +570,8 @@ static void write_delta_qindex(const AV1_COMMON *cm, const MACROBLOCKD *xd, int abs = sign ? -delta_qindex : delta_qindex; int rem_bits, thr; int smallval = abs < DELTA_Q_SMALL ? 1 : 0; -#if CONFIG_EC_ADAPT FRAME_CONTEXT *ec_ctx = xd->tile_ctx; (void)cm; -#else - FRAME_CONTEXT *ec_ctx = cm->fc; - (void)xd; -#endif aom_write_symbol(w, AOMMIN(abs, DELTA_Q_SMALL), ec_ctx->delta_q_cdf, DELTA_Q_PROBS + 1); @@ -560,25 +587,6 @@ static void write_delta_qindex(const AV1_COMMON *cm, const MACROBLOCKD *xd, } } -#if !CONFIG_EC_ADAPT -static void update_delta_q_probs(AV1_COMMON *cm, aom_writer *w, - FRAME_COUNTS *counts) { - int k; -#if CONFIG_TILE_GROUPS - const int probwt = cm->num_tg; -#else - const int probwt = 1; -#endif -#if CONFIG_EXT_DELTA_Q - if (!cm->delta_q_present_flag) return; -#endif // CONFIG_EXT_DELTA_Q - for (k = 0; k < DELTA_Q_PROBS; ++k) { - av1_cond_prob_diff_update(w, &cm->fc->delta_q_prob[k], counts->delta_q[k], - probwt); - } -} -#endif // CONFIG_EC_ADAPT - #if CONFIG_EXT_DELTA_Q static void write_delta_lflevel(const AV1_COMMON *cm, const MACROBLOCKD *xd, int delta_lflevel, aom_writer *w) { @@ -586,13 +594,8 @@ static void write_delta_lflevel(const AV1_COMMON *cm, const MACROBLOCKD *xd, int abs = sign ? -delta_lflevel : delta_lflevel; int rem_bits, thr; int smallval = abs < DELTA_LF_SMALL ? 1 : 0; -#if CONFIG_EC_ADAPT FRAME_CONTEXT *ec_ctx = xd->tile_ctx; (void)cm; -#else - FRAME_CONTEXT *ec_ctx = cm->fc; - (void)xd; -#endif aom_write_symbol(w, AOMMIN(abs, DELTA_LF_SMALL), ec_ctx->delta_lf_cdf, DELTA_LF_PROBS + 1); @@ -607,178 +610,32 @@ static void write_delta_lflevel(const AV1_COMMON *cm, const MACROBLOCKD *xd, aom_write_bit(w, sign); } } - -#if !CONFIG_EC_ADAPT -static void update_delta_lf_probs(AV1_COMMON *cm, aom_writer *w, - FRAME_COUNTS *counts) { - int k; -#if CONFIG_TILE_GROUPS - const int probwt = cm->num_tg; -#else - const int probwt = 1; -#endif - if (!cm->delta_lf_present_flag) return; - for (k = 0; k < DELTA_LF_PROBS; ++k) { - av1_cond_prob_diff_update(w, &cm->fc->delta_lf_prob[k], counts->delta_lf[k], - probwt); - } -} -#endif // CONFIG_EC_ADAPT #endif // CONFIG_EXT_DELTA_Q #endif // CONFIG_DELTA_Q +#if !CONFIG_NEW_MULTISYMBOL static void update_skip_probs(AV1_COMMON *cm, aom_writer *w, FRAME_COUNTS *counts) { int k; -#if CONFIG_TILE_GROUPS const int probwt = cm->num_tg; -#else - const int probwt = 1; -#endif for (k = 0; k < SKIP_CONTEXTS; ++k) { av1_cond_prob_diff_update(w, &cm->fc->skip_probs[k], counts->skip[k], probwt); } } - -#if !CONFIG_EC_ADAPT -static void update_switchable_interp_probs(AV1_COMMON *cm, aom_writer *w, - FRAME_COUNTS *counts) { - int j; - for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) { -#if CONFIG_TILE_GROUPS - const int probwt = cm->num_tg; -#else - const int probwt = 1; -#endif - prob_diff_update( - av1_switchable_interp_tree, cm->fc->switchable_interp_prob[j], - counts->switchable_interp[j], SWITCHABLE_FILTERS, probwt, w); - } -} -#endif - -#if !CONFIG_EC_ADAPT -#if CONFIG_EXT_TX -static void update_ext_tx_probs(AV1_COMMON *cm, aom_writer *w) { - const int savings_thresh = av1_cost_one(GROUP_DIFF_UPDATE_PROB) - - av1_cost_zero(GROUP_DIFF_UPDATE_PROB); - int i, j; - int s; -#if CONFIG_TILE_GROUPS - const int probwt = cm->num_tg; -#else - const int probwt = 1; -#endif - for (s = 1; s < EXT_TX_SETS_INTER; ++s) { - int savings = 0; - int do_update = 0; - for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { - if (!use_inter_ext_tx_for_txsize[s][i]) continue; - savings += prob_diff_update_savings( - av1_ext_tx_inter_tree[s], cm->fc->inter_ext_tx_prob[s][i], - cm->counts.inter_ext_tx[s][i], - num_ext_tx_set[ext_tx_set_type_inter[s]], probwt); - } - do_update = savings > savings_thresh; - aom_write(w, do_update, GROUP_DIFF_UPDATE_PROB); - if (do_update) { - for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { - if (!use_inter_ext_tx_for_txsize[s][i]) continue; - prob_diff_update(av1_ext_tx_inter_tree[s], - cm->fc->inter_ext_tx_prob[s][i], - cm->counts.inter_ext_tx[s][i], - num_ext_tx_set[ext_tx_set_type_inter[s]], probwt, w); - } - } - } - - for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { - int savings = 0; - int do_update = 0; - for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { - if (!use_intra_ext_tx_for_txsize[s][i]) continue; - for (j = 0; j < INTRA_MODES; ++j) - savings += prob_diff_update_savings( - av1_ext_tx_intra_tree[s], cm->fc->intra_ext_tx_prob[s][i][j], - cm->counts.intra_ext_tx[s][i][j], - num_ext_tx_set[ext_tx_set_type_intra[s]], probwt); - } - do_update = savings > savings_thresh; - aom_write(w, do_update, GROUP_DIFF_UPDATE_PROB); - if (do_update) { - for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { - if (!use_intra_ext_tx_for_txsize[s][i]) continue; - for (j = 0; j < INTRA_MODES; ++j) - prob_diff_update(av1_ext_tx_intra_tree[s], - cm->fc->intra_ext_tx_prob[s][i][j], - cm->counts.intra_ext_tx[s][i][j], - num_ext_tx_set[ext_tx_set_type_intra[s]], probwt, w); - } - } - } -} - -#else -static void update_ext_tx_probs(AV1_COMMON *cm, aom_writer *w) { - const int savings_thresh = av1_cost_one(GROUP_DIFF_UPDATE_PROB) - - av1_cost_zero(GROUP_DIFF_UPDATE_PROB); - int i, j; - - int savings = 0; - int do_update = 0; -#if CONFIG_TILE_GROUPS - const int probwt = cm->num_tg; -#else - const int probwt = 1; #endif - for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { - for (j = 0; j < TX_TYPES; ++j) - savings += prob_diff_update_savings( - av1_ext_tx_tree, cm->fc->intra_ext_tx_prob[i][j], - cm->counts.intra_ext_tx[i][j], TX_TYPES, probwt); - } - do_update = savings > savings_thresh; - aom_write(w, do_update, GROUP_DIFF_UPDATE_PROB); - if (do_update) { - for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { - for (j = 0; j < TX_TYPES; ++j) { - prob_diff_update(av1_ext_tx_tree, cm->fc->intra_ext_tx_prob[i][j], - cm->counts.intra_ext_tx[i][j], TX_TYPES, probwt, w); - } - } - } - savings = 0; - for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { - savings += - prob_diff_update_savings(av1_ext_tx_tree, cm->fc->inter_ext_tx_prob[i], - cm->counts.inter_ext_tx[i], TX_TYPES, probwt); - } - do_update = savings > savings_thresh; - aom_write(w, do_update, GROUP_DIFF_UPDATE_PROB); - if (do_update) { - for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { - prob_diff_update(av1_ext_tx_tree, cm->fc->inter_ext_tx_prob[i], - cm->counts.inter_ext_tx[i], TX_TYPES, probwt, w); - } - } -} -#endif // CONFIG_EXT_TX -#endif // !CONFIG_EC_ADAPT #if CONFIG_PALETTE static void pack_palette_tokens(aom_writer *w, const TOKENEXTRA **tp, int n, int num) { - int i; const TOKENEXTRA *p = *tp; - - for (i = 0; i < num; ++i) { - av1_write_token( - w, av1_palette_color_index_tree[n - PALETTE_MIN_SIZE], p->context_tree, - &palette_color_index_encodings[n - PALETTE_MIN_SIZE][p->token]); + write_uniform(w, n, p->token); // The first color index. + ++p; + --num; + for (int i = 0; i < num; ++i) { + aom_write_symbol(w, p->token, p->palette_cdf, n); ++p; } - *tp = p; } #endif // CONFIG_PALETTE @@ -930,8 +787,16 @@ static void pack_pvq_tokens(aom_writer *w, MACROBLOCK *const x, int max_blocks_wide; int max_blocks_high; int step = (1 << tx_size); + +#if CONFIG_CHROMA_SUB8X8 + const BLOCK_SIZE plane_bsize = + AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd)); +#elif CONFIG_CB4X4 + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); +#else const BLOCK_SIZE plane_bsize = - get_plane_block_size(AOMMAX(bsize, BLOCK_8X8), pd); + get_plane_block_size(AOMMAX(BLOCK_8X8, bsize), pd); +#endif adapt = x->daala_enc.state.adapt; @@ -1030,7 +895,8 @@ static void pack_txb_tokens(aom_writer *w, uint16_t eob = x->mbmi_ext->eobs[plane][block]; TXB_CTX txb_ctx = { x->mbmi_ext->txb_skip_ctx[plane][block], x->mbmi_ext->dc_sign_ctx[plane][block] }; - av1_write_coeffs_txb(cm, xd, w, block, plane, tcoeff, eob, &txb_ctx); + av1_write_coeffs_txb(cm, xd, w, blk_row, blk_col, block, plane, tx_size, + tcoeff, eob, &txb_ctx); #else pack_pvq_tokens(w, x, xd, plane, bsize, tx_size); #endif @@ -1103,15 +969,30 @@ static void pack_txb_tokens(aom_writer *w, const TOKENEXTRA **tp, token_stats->cost += tmp_token_stats.cost; #endif } else { +#if CONFIG_RECT_TX_EXT + int is_qttx = plane_tx_size == quarter_txsize_lookup[plane_bsize]; + const TX_SIZE sub_txs = is_qttx ? plane_tx_size : sub_tx_size_map[tx_size]; +#else const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; +#endif const int bsl = tx_size_wide_unit[sub_txs]; int i; assert(bsl > 0); for (i = 0; i < 4; ++i) { +#if CONFIG_RECT_TX_EXT + int is_wide_tx = tx_size_wide_unit[sub_txs] > tx_size_high_unit[sub_txs]; + const int offsetr = + is_qttx ? (is_wide_tx ? i * tx_size_high_unit[sub_txs] : 0) + : blk_row + (i >> 1) * bsl; + const int offsetc = + is_qttx ? (is_wide_tx ? 0 : i * tx_size_wide_unit[sub_txs]) + : blk_col + (i & 0x01) * bsl; +#else const int offsetr = blk_row + (i >> 1) * bsl; const int offsetc = blk_col + (i & 0x01) * bsl; +#endif const int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs]; if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue; @@ -1136,6 +1017,14 @@ static void write_segment_id(aom_writer *w, const struct segmentation *seg, } } +#if CONFIG_NEW_MULTISYMBOL +#define WRITE_REF_BIT(bname, pname) \ + aom_write_symbol(w, bname, av1_get_pred_cdf_##pname(cm, xd), 2) +#else +#define WRITE_REF_BIT(bname, pname) \ + aom_write(w, bname, av1_get_pred_prob_##pname(cm, xd)) +#endif + // This function encodes the reference frame static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd, aom_writer *w) { @@ -1153,66 +1042,183 @@ static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd, // does the feature use compound prediction or not // (if not specified at the frame/segment level) if (cm->reference_mode == REFERENCE_MODE_SELECT) { -#if SUB8X8_COMP_REF - aom_write(w, is_compound, av1_get_reference_mode_prob(cm, xd)); -#else +#if !SUB8X8_COMP_REF if (mbmi->sb_type != BLOCK_4X4) - aom_write(w, is_compound, av1_get_reference_mode_prob(cm, xd)); +#endif +#if CONFIG_NEW_MULTISYMBOL + aom_write_symbol(w, is_compound, av1_get_reference_mode_cdf(cm, xd), 2); +#else + aom_write(w, is_compound, av1_get_reference_mode_prob(cm, xd)); #endif } else { assert((!is_compound) == (cm->reference_mode == SINGLE_REFERENCE)); } if (is_compound) { +#if CONFIG_EXT_COMP_REFS + const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi) + ? UNIDIR_COMP_REFERENCE + : BIDIR_COMP_REFERENCE; +#if USE_UNI_COMP_REFS +#if CONFIG_VAR_REFS + if ((L_OR_L2(cm) || L3_OR_G(cm)) && BWD_OR_ALT(cm)) + if (L_AND_L2(cm) || L_AND_L3(cm) || L_AND_G(cm) || BWD_AND_ALT(cm)) +#endif // CONFIG_VAR_REFS + aom_write(w, comp_ref_type, av1_get_comp_reference_type_prob(cm, xd)); +#if CONFIG_VAR_REFS + else + assert(comp_ref_type == BIDIR_COMP_REFERENCE); + else + assert(comp_ref_type == UNIDIR_COMP_REFERENCE); +#endif // CONFIG_VAR_REFS +#else // !USE_UNI_COMP_REFS + // NOTE: uni-directional comp refs disabled + assert(comp_ref_type == BIDIR_COMP_REFERENCE); +#endif // USE_UNI_COMP_REFS + + if (comp_ref_type == UNIDIR_COMP_REFERENCE) { + const int bit = mbmi->ref_frame[0] == BWDREF_FRAME; +#if CONFIG_VAR_REFS + if ((L_AND_L2(cm) || L_AND_L3(cm) || L_AND_G(cm)) && BWD_AND_ALT(cm)) +#endif // CONFIG_VAR_REFS + aom_write(w, bit, av1_get_pred_prob_uni_comp_ref_p(cm, xd)); + + if (!bit) { + assert(mbmi->ref_frame[0] == LAST_FRAME); +#if CONFIG_VAR_REFS + if (L_AND_L2(cm) && (L_AND_L3(cm) || L_AND_G(cm))) { +#endif // CONFIG_VAR_REFS + const int bit1 = mbmi->ref_frame[1] == LAST3_FRAME || + mbmi->ref_frame[1] == GOLDEN_FRAME; + aom_write(w, bit1, av1_get_pred_prob_uni_comp_ref_p1(cm, xd)); + + if (bit1) { +#if CONFIG_VAR_REFS + if (L_AND_L3(cm) && L_AND_G(cm)) { +#endif // CONFIG_VAR_REFS + const int bit2 = mbmi->ref_frame[1] == GOLDEN_FRAME; + aom_write(w, bit2, av1_get_pred_prob_uni_comp_ref_p2(cm, xd)); +#if CONFIG_VAR_REFS + } +#endif // CONFIG_VAR_REFS + } +#if CONFIG_VAR_REFS + } +#endif // CONFIG_VAR_REFS + } else { + assert(mbmi->ref_frame[1] == ALTREF_FRAME); + } + + return; + } + + assert(comp_ref_type == BIDIR_COMP_REFERENCE); +#endif // CONFIG_EXT_COMP_REFS + #if CONFIG_EXT_REFS const int bit = (mbmi->ref_frame[0] == GOLDEN_FRAME || mbmi->ref_frame[0] == LAST3_FRAME); - const int bit_bwd = mbmi->ref_frame[1] == ALTREF_FRAME; -#else // CONFIG_EXT_REFS - const int bit = mbmi->ref_frame[0] == GOLDEN_FRAME; -#endif // CONFIG_EXT_REFS - - aom_write(w, bit, av1_get_pred_prob_comp_ref_p(cm, xd)); +#if CONFIG_VAR_REFS + // Test need to explicitly code (L,L2) vs (L3,G) branch node in tree + if (L_OR_L2(cm) && L3_OR_G(cm)) +#endif // CONFIG_VAR_REFS + WRITE_REF_BIT(bit, comp_ref_p); -#if CONFIG_EXT_REFS if (!bit) { - const int bit1 = mbmi->ref_frame[0] == LAST_FRAME; - aom_write(w, bit1, av1_get_pred_prob_comp_ref_p1(cm, xd)); +#if CONFIG_VAR_REFS + // Test need to explicitly code (L) vs (L2) branch node in tree + if (L_AND_L2(cm)) { +#endif // CONFIG_VAR_REFS + const int bit1 = mbmi->ref_frame[0] == LAST_FRAME; + WRITE_REF_BIT(bit1, comp_ref_p1); +#if CONFIG_VAR_REFS + } +#endif // CONFIG_VAR_REFS } else { - const int bit2 = mbmi->ref_frame[0] == GOLDEN_FRAME; - aom_write(w, bit2, av1_get_pred_prob_comp_ref_p2(cm, xd)); +#if CONFIG_VAR_REFS + // Test need to explicitly code (L3) vs (G) branch node in tree + if (L3_AND_G(cm)) { +#endif // CONFIG_VAR_REFS + const int bit2 = mbmi->ref_frame[0] == GOLDEN_FRAME; + WRITE_REF_BIT(bit2, comp_ref_p2); +#if CONFIG_VAR_REFS + } +#endif // CONFIG_VAR_REFS } - aom_write(w, bit_bwd, av1_get_pred_prob_comp_bwdref_p(cm, xd)); + +#if CONFIG_VAR_REFS + // Test need to explicitly code (BWD) vs (ALT) branch node in tree + if (BWD_AND_ALT(cm)) { +#endif // CONFIG_VAR_REFS + const int bit_bwd = mbmi->ref_frame[1] == ALTREF_FRAME; + WRITE_REF_BIT(bit_bwd, comp_bwdref_p); +#if CONFIG_VAR_REFS + } +#endif // CONFIG_VAR_REFS + +#else // !CONFIG_EXT_REFS + const int bit = mbmi->ref_frame[0] == GOLDEN_FRAME; + WRITE_REF_BIT(bit, comp_ref_p); #endif // CONFIG_EXT_REFS } else { #if CONFIG_EXT_REFS const int bit0 = (mbmi->ref_frame[0] == ALTREF_FRAME || mbmi->ref_frame[0] == BWDREF_FRAME); - aom_write(w, bit0, av1_get_pred_prob_single_ref_p1(cm, xd)); +#if CONFIG_VAR_REFS + // Test need to explicitly code (L,L2,L3,G) vs (BWD,ALT) branch node in + // tree + if ((L_OR_L2(cm) || L3_OR_G(cm)) && BWD_OR_ALT(cm)) +#endif // CONFIG_VAR_REFS + WRITE_REF_BIT(bit0, single_ref_p1); if (bit0) { - const int bit1 = mbmi->ref_frame[0] == ALTREF_FRAME; - aom_write(w, bit1, av1_get_pred_prob_single_ref_p2(cm, xd)); +#if CONFIG_VAR_REFS + // Test need to explicitly code (BWD) vs (ALT) branch node in tree + if (BWD_AND_ALT(cm)) { +#endif // CONFIG_VAR_REFS + const int bit1 = mbmi->ref_frame[0] == ALTREF_FRAME; + WRITE_REF_BIT(bit1, single_ref_p2); +#if CONFIG_VAR_REFS + } +#endif // CONFIG_VAR_REFS } else { const int bit2 = (mbmi->ref_frame[0] == LAST3_FRAME || mbmi->ref_frame[0] == GOLDEN_FRAME); - aom_write(w, bit2, av1_get_pred_prob_single_ref_p3(cm, xd)); +#if CONFIG_VAR_REFS + // Test need to explicitly code (L,L2) vs (L3,G) branch node in tree + if (L_OR_L2(cm) && L3_OR_G(cm)) +#endif // CONFIG_VAR_REFS + WRITE_REF_BIT(bit2, single_ref_p3); if (!bit2) { - const int bit3 = mbmi->ref_frame[0] != LAST_FRAME; - aom_write(w, bit3, av1_get_pred_prob_single_ref_p4(cm, xd)); +#if CONFIG_VAR_REFS + // Test need to explicitly code (L) vs (L2) branch node in tree + if (L_AND_L2(cm)) { +#endif // CONFIG_VAR_REFS + const int bit3 = mbmi->ref_frame[0] != LAST_FRAME; + WRITE_REF_BIT(bit3, single_ref_p4); +#if CONFIG_VAR_REFS + } +#endif // CONFIG_VAR_REFS } else { - const int bit4 = mbmi->ref_frame[0] != LAST3_FRAME; - aom_write(w, bit4, av1_get_pred_prob_single_ref_p5(cm, xd)); +#if CONFIG_VAR_REFS + // Test need to explicitly code (L3) vs (G) branch node in tree + if (L3_AND_G(cm)) { +#endif // CONFIG_VAR_REFS + const int bit4 = mbmi->ref_frame[0] != LAST3_FRAME; + WRITE_REF_BIT(bit4, single_ref_p5); +#if CONFIG_VAR_REFS + } +#endif // CONFIG_VAR_REFS } } -#else // CONFIG_EXT_REFS +#else // !CONFIG_EXT_REFS const int bit0 = mbmi->ref_frame[0] != LAST_FRAME; - aom_write(w, bit0, av1_get_pred_prob_single_ref_p1(cm, xd)); + WRITE_REF_BIT(bit0, single_ref_p1); if (bit0) { const int bit1 = mbmi->ref_frame[0] != GOLDEN_FRAME; - aom_write(w, bit1, av1_get_pred_prob_single_ref_p2(cm, xd)); + WRITE_REF_BIT(bit1, single_ref_p2); } #endif // CONFIG_EXT_REFS } @@ -1250,7 +1256,7 @@ static void write_filter_intra_mode_info(const AV1_COMMON *const cm, (void)mi_col; #endif // CONFIG_CB4X4 - if (mbmi->uv_mode == DC_PRED + if (mbmi->uv_mode == UV_DC_PRED #if CONFIG_PALETTE && mbmi->palette_mode_info.palette_size[1] == 0 #endif // CONFIG_PALETTE @@ -1277,7 +1283,7 @@ static void write_intra_angle_info(const MACROBLOCKD *xd, #endif // CONFIG_INTRA_INTERP (void)ec_ctx; - if (bsize < BLOCK_8X8) return; + if (!av1_use_angle_delta(bsize)) return; if (av1_is_directional_mode(mbmi->mode, bsize)) { write_uniform(w, 2 * MAX_ANGLE_DELTA + 1, @@ -1292,7 +1298,7 @@ static void write_intra_angle_info(const MACROBLOCKD *xd, #endif // CONFIG_INTRA_INTERP } - if (av1_is_directional_mode(mbmi->uv_mode, bsize)) { + if (av1_is_directional_mode(get_uv_mode(mbmi->uv_mode), bsize)) { write_uniform(w, 2 * MAX_ANGLE_DELTA + 1, MAX_ANGLE_DELTA + mbmi->angle_delta[1]); } @@ -1303,11 +1309,7 @@ static void write_mb_interp_filter(AV1_COMP *cpi, const MACROBLOCKD *xd, aom_writer *w) { AV1_COMMON *const cm = &cpi->common; const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; -#if CONFIG_EC_ADAPT FRAME_CONTEXT *ec_ctx = xd->tile_ctx; -#else - FRAME_CONTEXT *ec_ctx = cm->fc; -#endif if (!av1_is_interp_needed(xd)) { #if CONFIG_DUAL_FILTER @@ -1485,19 +1487,21 @@ static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd, if (mbmi->mode == DC_PRED) { const int n = pmi->palette_size[0]; int palette_y_mode_ctx = 0; - if (above_mi) + if (above_mi) { palette_y_mode_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0); - if (left_mi) + } + if (left_mi) { palette_y_mode_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0); + } aom_write( w, n > 0, av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_y_mode_ctx]); if (n > 0) { - av1_write_token(w, av1_palette_size_tree, - av1_default_palette_y_size_prob[bsize - BLOCK_8X8], - &palette_size_encodings[n - PALETTE_MIN_SIZE]); + aom_write_symbol(w, n - PALETTE_MIN_SIZE, + xd->tile_ctx->palette_y_size_cdf[bsize - BLOCK_8X8], + PALETTE_SIZES); #if CONFIG_PALETTE_DELTA_ENCODING write_palette_colors_y(xd, pmi, cm->bit_depth, w); #else @@ -1506,18 +1510,17 @@ static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd, aom_write_literal(w, pmi->palette_colors[i], cm->bit_depth); } #endif // CONFIG_PALETTE_DELTA_ENCODING - write_uniform(w, n, pmi->palette_first_color_idx[0]); } } - if (mbmi->uv_mode == DC_PRED) { + if (mbmi->uv_mode == UV_DC_PRED) { const int n = pmi->palette_size[1]; const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0); aom_write(w, n > 0, av1_default_palette_uv_mode_prob[palette_uv_mode_ctx]); if (n > 0) { - av1_write_token(w, av1_palette_size_tree, - av1_default_palette_uv_size_prob[bsize - BLOCK_8X8], - &palette_size_encodings[n - PALETTE_MIN_SIZE]); + aom_write_symbol(w, n - PALETTE_MIN_SIZE, + xd->tile_ctx->palette_uv_size_cdf[bsize - BLOCK_8X8], + PALETTE_SIZES); #if CONFIG_PALETTE_DELTA_ENCODING write_palette_colors_uv(xd, pmi, cm->bit_depth, w); #else @@ -1532,7 +1535,6 @@ static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd, cm->bit_depth); } #endif // CONFIG_PALETTE_DELTA_ENCODING - write_uniform(w, n, pmi->palette_first_color_idx[1]); } } } @@ -1543,21 +1545,20 @@ void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd, const int supertx_enabled, #endif #if CONFIG_TXK_SEL - int block, int plane, + int blk_row, int blk_col, int block, int plane, + TX_SIZE tx_size, #endif aom_writer *w) { MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const int is_inter = is_inter_block(mbmi); +#if !CONFIG_TXK_SEL #if CONFIG_VAR_TX const TX_SIZE tx_size = is_inter ? mbmi->min_tx_size : mbmi->tx_size; #else const TX_SIZE tx_size = mbmi->tx_size; #endif // CONFIG_VAR_TX -#if CONFIG_EC_ADAPT +#endif // !CONFIG_TXK_SEL FRAME_CONTEXT *ec_ctx = xd->tile_ctx; -#else - FRAME_CONTEXT *ec_ctx = cm->fc; -#endif #if !CONFIG_TXK_SEL TX_TYPE tx_type = mbmi->tx_type; @@ -1565,7 +1566,8 @@ void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd, // Only y plane's tx_type is transmitted if (plane > 0) return; PLANE_TYPE plane_type = get_plane_type(plane); - TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + TX_TYPE tx_type = + av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size); #endif if (!FIXED_TX_TYPE) { @@ -1583,21 +1585,20 @@ void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd, !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { const int eset = get_ext_tx_set(tx_size, bsize, is_inter, cm->reduced_tx_set_used); + // eset == 0 should correspond to a set with only DCT_DCT and there + // is no need to send the tx_type + assert(eset > 0); if (is_inter) { assert(ext_tx_used_inter[eset][tx_type]); - if (eset > 0) { - aom_write_symbol(w, av1_ext_tx_inter_ind[eset][tx_type], - ec_ctx->inter_ext_tx_cdf[eset][square_tx_size], - ext_tx_cnt_inter[eset]); - } + aom_write_symbol(w, av1_ext_tx_inter_ind[eset][tx_type], + ec_ctx->inter_ext_tx_cdf[eset][square_tx_size], + ext_tx_cnt_inter[eset]); } else if (ALLOW_INTRA_EXT_TX) { assert(ext_tx_used_intra[eset][tx_type]); - if (eset > 0) { - aom_write_symbol( - w, av1_ext_tx_intra_ind[eset][tx_type], - ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][mbmi->mode], - ext_tx_cnt_intra[eset]); - } + aom_write_symbol( + w, av1_ext_tx_intra_ind[eset][tx_type], + ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][mbmi->mode], + ext_tx_cnt_intra[eset]); } } #else @@ -1632,36 +1633,30 @@ static void write_intra_mode(FRAME_CONTEXT *frame_ctx, BLOCK_SIZE bsize, } static void write_intra_uv_mode(FRAME_CONTEXT *frame_ctx, - PREDICTION_MODE uv_mode, PREDICTION_MODE y_mode, - aom_writer *w) { - aom_write_symbol(w, av1_intra_mode_ind[uv_mode], - frame_ctx->uv_mode_cdf[y_mode], INTRA_MODES); + UV_PREDICTION_MODE uv_mode, + PREDICTION_MODE y_mode, aom_writer *w) { + aom_write_symbol(w, av1_intra_mode_ind[get_uv_mode(uv_mode)], + frame_ctx->uv_mode_cdf[y_mode], UV_INTRA_MODES); } #if CONFIG_CFL -static void write_cfl_alphas(FRAME_CONTEXT *const frame_ctx, int skip, int ind, +static void write_cfl_alphas(FRAME_CONTEXT *const frame_ctx, int ind, const CFL_SIGN_TYPE signs[CFL_SIGNS], aom_writer *w) { - if (skip) { - assert(ind == 0); + // Check for uninitialized signs + if (cfl_alpha_codes[ind][CFL_PRED_U] == 0) assert(signs[CFL_PRED_U] == CFL_SIGN_POS); + if (cfl_alpha_codes[ind][CFL_PRED_V] == 0) assert(signs[CFL_PRED_V] == CFL_SIGN_POS); - } else { - // Check for uninitialized signs - if (cfl_alpha_codes[ind][CFL_PRED_U] == 0) - assert(signs[CFL_PRED_U] == CFL_SIGN_POS); - if (cfl_alpha_codes[ind][CFL_PRED_V] == 0) - assert(signs[CFL_PRED_V] == CFL_SIGN_POS); - - // Write a symbol representing a combination of alpha Cb and alpha Cr. - aom_write_symbol(w, ind, frame_ctx->cfl_alpha_cdf, CFL_ALPHABET_SIZE); - - // Signs are only signaled for nonzero codes. - if (cfl_alpha_codes[ind][CFL_PRED_U] != 0) - aom_write_bit(w, signs[CFL_PRED_U]); - if (cfl_alpha_codes[ind][CFL_PRED_V] != 0) - aom_write_bit(w, signs[CFL_PRED_V]); - } + + // Write a symbol representing a combination of alpha Cb and alpha Cr. + aom_write_symbol(w, ind, frame_ctx->cfl_alpha_cdf, CFL_ALPHABET_SIZE); + + // Signs are only signaled for nonzero codes. + if (cfl_alpha_codes[ind][CFL_PRED_U] != 0) + aom_write_bit(w, signs[CFL_PRED_U]); + if (cfl_alpha_codes[ind][CFL_PRED_V] != 0) + aom_write_bit(w, signs[CFL_PRED_V]); } #endif @@ -1672,22 +1667,13 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, #endif aom_writer *w) { AV1_COMMON *const cm = &cpi->common; -#if CONFIG_DELTA_Q || CONFIG_EC_ADAPT MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; -#else - const MACROBLOCK *x = &cpi->td.mb; - const MACROBLOCKD *xd = &x->e_mbd; -#endif -#if CONFIG_EC_ADAPT FRAME_CONTEXT *ec_ctx = xd->tile_ctx; -#else - FRAME_CONTEXT *ec_ctx = cm->fc; -#endif const MODE_INFO *mi = xd->mi[0]; const struct segmentation *const seg = &cm->seg; - struct segmentation_probs *const segp = &cm->fc->seg; + struct segmentation_probs *const segp = &ec_ctx->seg; const MB_MODE_INFO *const mbmi = &mi->mbmi; const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; const PREDICTION_MODE mode = mbmi->mode; @@ -1708,8 +1694,13 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, if (seg->update_map) { if (seg->temporal_update) { const int pred_flag = mbmi->seg_id_predicted; +#if CONFIG_NEW_MULTISYMBOL + aom_cdf_prob *pred_cdf = av1_get_pred_cdf_seg_id(segp, xd); + aom_write_symbol(w, pred_flag, pred_cdf, 2); +#else aom_prob pred_prob = av1_get_pred_prob_seg_id(segp, xd); aom_write(w, pred_flag, pred_prob); +#endif if (!pred_flag) write_segment_id(w, seg, segp, segment_id); } else { write_segment_id(w, seg, segp, segment_id); @@ -1750,8 +1741,7 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, #if CONFIG_SUPERTX if (!supertx_enabled) #endif // CONFIG_SUPERTX - if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) - aom_write(w, is_inter, av1_get_intra_inter_prob(cm, xd)); + write_is_inter(cm, xd, mbmi->segment_id, w, is_inter); if (cm->tx_mode == TX_MODE_SELECT && #if CONFIG_CB4X4 && (CONFIG_VAR_TX || CONFIG_RECT_TX) @@ -1779,6 +1769,15 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, for (idx = 0; idx < width; idx += bw) write_tx_size_vartx(cm, xd, mbmi, max_tx_size, height != width, idy, idx, w); +#if CONFIG_RECT_TX_EXT + if (is_quarter_tx_allowed(xd, mbmi, is_inter_block(mbmi)) && + quarter_txsize_lookup[bsize] != max_tx_size && + (mbmi->tx_size == quarter_txsize_lookup[bsize] || + mbmi->tx_size == max_tx_size)) { + aom_write(w, mbmi->tx_size != max_tx_size, + cm->fc->quarter_tx_size_prob); + } +#endif } else { set_txfm_ctxs(mbmi->tx_size, xd->n8_w, xd->n8_h, skip, xd); write_selected_tx_size(cm, xd, w); @@ -1813,9 +1812,8 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, #endif // CONFIG_CB4X4 #if CONFIG_CFL - if (mbmi->uv_mode == DC_PRED) { - write_cfl_alphas(ec_ctx, mbmi->skip, mbmi->cfl_alpha_idx, - mbmi->cfl_alpha_signs, w); + if (mbmi->uv_mode == UV_DC_PRED) { + write_cfl_alphas(ec_ctx, mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, w); } #endif @@ -1838,11 +1836,25 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, int16_t mode_ctx; write_ref_frames(cm, xd, w); +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { + // NOTE: Handle single ref comp mode + if (!is_compound) + aom_write(w, is_inter_singleref_comp_mode(mode), + av1_get_inter_mode_prob(cm, xd)); + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + #if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + if (is_compound || is_inter_singleref_comp_mode(mode)) +#else // !CONFIG_COMPOUND_SINGLEREF if (is_compound) +#endif // CONFIG_COMPOUND_SINGLEREF mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]]; else #endif // CONFIG_EXT_INTER + mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame, bsize, -1); @@ -1851,18 +1863,25 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, if (bsize >= BLOCK_8X8 || unify_bsize) { #if CONFIG_EXT_INTER if (is_inter_compound_mode(mode)) - write_inter_compound_mode(cm, w, mode, mode_ctx); + write_inter_compound_mode(cm, xd, w, mode, mode_ctx); +#if CONFIG_COMPOUND_SINGLEREF + else if (is_inter_singleref_comp_mode(mode)) + write_inter_singleref_comp_mode(xd, w, mode, mode_ctx); +#endif // CONFIG_COMPOUND_SINGLEREF else if (is_inter_singleref_mode(mode)) #endif // CONFIG_EXT_INTER write_inter_mode(w, mode, ec_ctx, mode_ctx); #if CONFIG_EXT_INTER if (mode == NEWMV || mode == NEW_NEWMV || +#if CONFIG_COMPOUND_SINGLEREF + mbmi->mode == SR_NEW_NEWMV || +#endif // CONFIG_COMPOUND_SINGLEREF have_nearmv_in_inter_mode(mode)) -#else +#else // !CONFIG_EXT_INTER if (mode == NEARMV || mode == NEWMV) -#endif - write_drl_idx(cm, mbmi, mbmi_ext, w); +#endif // CONFIG_EXT_INTER + write_drl_idx(ec_ctx, mbmi, mbmi_ext, w); else assert(mbmi->ref_mv_idx == 0); } @@ -1873,6 +1892,10 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, #endif // !CONFIG_DUAL_FILTER && !CONFIG_WARPED_MOTION if (bsize < BLOCK_8X8 && !unify_bsize) { +#if CONFIG_COMPOUND_SINGLEREF + /// NOTE: Single ref comp mode does not support sub8x8. + assert(is_compound || !is_inter_singleref_comp_mode(mbmi->mode)); +#endif // CONFIG_COMPOUND_SINGLEREF const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; int idx, idy; @@ -1887,7 +1910,7 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, mbmi->ref_frame, bsize, j); #if CONFIG_EXT_INTER if (is_inter_compound_mode(b_mode)) - write_inter_compound_mode(cm, w, b_mode, mode_ctx); + write_inter_compound_mode(cm, xd, w, b_mode, mode_ctx); else if (is_inter_singleref_mode(b_mode)) #endif // CONFIG_EXT_INTER write_inter_mode(w, b_mode, ec_ctx, mode_ctx); @@ -1969,6 +1992,22 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, av1_encode_mv(cpi, w, &mbmi->mv[0].as_mv, &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv, nmvc, allow_hp); +#if CONFIG_COMPOUND_SINGLEREF + } else if ( // mode == SR_NEAREST_NEWMV || + mode == SR_NEAR_NEWMV || mode == SR_ZERO_NEWMV || + mode == SR_NEW_NEWMV) { + int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame); + int nmv_ctx = + av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type], + mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx); + nmv_context *nmvc = &ec_ctx->nmvc[nmv_ctx]; + int_mv ref_mv = mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0]; + if (mode == SR_NEW_NEWMV) + av1_encode_mv(cpi, w, &mbmi->mv[0].as_mv, &ref_mv.as_mv, nmvc, + allow_hp); + av1_encode_mv(cpi, w, &mbmi->mv[1].as_mv, &ref_mv.as_mv, nmvc, + allow_hp); +#endif // CONFIG_COMPOUND_SINGLEREF #endif // CONFIG_EXT_INTER } } @@ -1981,13 +2020,23 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, cpi->common.allow_interintra_compound && is_interintra_allowed(mbmi)) { const int interintra = mbmi->ref_frame[1] == INTRA_FRAME; const int bsize_group = size_group_lookup[bsize]; +#if CONFIG_NEW_MULTISYMBOL + aom_write_symbol(w, interintra, ec_ctx->interintra_cdf[bsize_group], 2); +#else aom_write(w, interintra, cm->fc->interintra_prob[bsize_group]); +#endif if (interintra) { - write_interintra_mode(w, mbmi->interintra_mode, - cm->fc->interintra_mode_prob[bsize_group]); + aom_write_symbol(w, mbmi->interintra_mode, + ec_ctx->interintra_mode_cdf[bsize_group], + INTERINTRA_MODES); if (is_interintra_wedge_used(bsize)) { +#if CONFIG_NEW_MULTISYMBOL + aom_write_symbol(w, mbmi->use_wedge_interintra, + ec_ctx->wedge_interintra_cdf[bsize], 2); +#else aom_write(w, mbmi->use_wedge_interintra, cm->fc->wedge_interintra_prob[bsize]); +#endif if (mbmi->use_wedge_interintra) { aom_write_literal(w, mbmi->interintra_wedge_index, get_wedge_bits_lookup(bsize)); @@ -2005,21 +2054,28 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, #if CONFIG_EXT_INTER if (mbmi->ref_frame[1] != INTRA_FRAME) #endif // CONFIG_EXT_INTER - write_motion_mode(cm, mi, w); + write_motion_mode(cm, xd, mi, w); +#if CONFIG_NCOBMC_ADAPT_WEIGHT + write_ncobmc_mode(xd, mi, w); +#endif #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION #if CONFIG_EXT_INTER - if (cpi->common.reference_mode != SINGLE_REFERENCE && - is_inter_compound_mode(mbmi->mode) + if ( +#if CONFIG_COMPOUND_SINGLEREF + is_inter_anyref_comp_mode(mbmi->mode) && +#else // !CONFIG_COMPOUND_SINGLEREF + cpi->common.reference_mode != SINGLE_REFERENCE && + is_inter_compound_mode(mbmi->mode) && +#endif // CONFIG_COMPOUND_SINGLEREF #if CONFIG_MOTION_VAR - && mbmi->motion_mode == SIMPLE_TRANSLATION + mbmi->motion_mode == SIMPLE_TRANSLATION && #endif // CONFIG_MOTION_VAR - && is_any_masked_compound_used(bsize)) { + is_any_masked_compound_used(bsize)) { #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE if (cm->allow_masked_compound) { - av1_write_token( - w, av1_compound_type_tree, cm->fc->compound_type_prob[bsize], - &compound_type_encodings[mbmi->interinter_compound_type]); + aom_write_symbol(w, mbmi->interinter_compound_type, + ec_ctx->compound_type_cdf[bsize], COMPOUND_TYPES); #if CONFIG_WEDGE if (mbmi->interinter_compound_type == COMPOUND_WEDGE) { aom_write_literal(w, mbmi->wedge_index, get_wedge_bits_lookup(bsize)); @@ -2061,8 +2117,9 @@ static void write_mb_modes_kf(AV1_COMMON *cm, #endif // CONFIG_INTRABC const int mi_row, const int mi_col, aom_writer *w) { + FRAME_CONTEXT *ec_ctx = xd->tile_ctx; const struct segmentation *const seg = &cm->seg; - struct segmentation_probs *const segp = &cm->fc->seg; + struct segmentation_probs *const segp = &ec_ctx->seg; const MODE_INFO *const mi = xd->mi[0]; const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; @@ -2076,12 +2133,6 @@ static void write_mb_modes_kf(AV1_COMMON *cm, (void)mi_row; (void)mi_col; -#if CONFIG_EC_ADAPT - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; -#else - FRAME_CONTEXT *ec_ctx = cm->fc; -#endif - if (seg->update_map) write_segment_id(w, seg, segp, mbmi->segment_id); #if CONFIG_DELTA_Q @@ -2110,18 +2161,17 @@ static void write_mb_modes_kf(AV1_COMMON *cm, write_skip(cm, xd, mbmi->segment_id, mi, w); #endif - if (cm->tx_mode == TX_MODE_SELECT && + int enable_tx_size = cm->tx_mode == TX_MODE_SELECT && #if CONFIG_CB4X4 && (CONFIG_VAR_TX || CONFIG_RECT_TX) #if CONFIG_RECT_TX - bsize > BLOCK_4X4 && + bsize > BLOCK_4X4 && #else - bsize >= BLOCK_8X8 && + bsize >= BLOCK_8X8 && #endif // CONFIG_RECT_TX #else - bsize >= BLOCK_8X8 && + bsize >= BLOCK_8X8 && #endif - !xd->lossless[mbmi->segment_id]) - write_selected_tx_size(cm, xd, w); + !xd->lossless[mbmi->segment_id]; #if CONFIG_INTRABC if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools) { @@ -2129,7 +2179,8 @@ static void write_mb_modes_kf(AV1_COMMON *cm, aom_write(w, use_intrabc, ec_ctx->intrabc_prob); if (use_intrabc) { assert(mbmi->mode == DC_PRED); - assert(mbmi->uv_mode == DC_PRED); + assert(mbmi->uv_mode == UV_DC_PRED); + if (enable_tx_size && !mbmi->skip) write_selected_tx_size(cm, xd, w); int_mv dv_ref = mbmi_ext->ref_mvs[INTRA_FRAME][0]; av1_encode_dv(w, &mbmi->mv[0].as_mv, &dv_ref.as_mv, &ec_ctx->ndvc); #if CONFIG_EXT_TX && !CONFIG_TXK_SEL @@ -2143,6 +2194,7 @@ static void write_mb_modes_kf(AV1_COMMON *cm, } } #endif // CONFIG_INTRABC + if (enable_tx_size) write_selected_tx_size(cm, xd, w); if (bsize >= BLOCK_8X8 || unify_bsize) { write_intra_mode_kf(cm, ec_ctx, mi, above_mi, left_mi, 0, mbmi->mode, w); @@ -2169,9 +2221,8 @@ static void write_mb_modes_kf(AV1_COMMON *cm, #endif // CONFIG_CB4X4 #if CONFIG_CFL - if (mbmi->uv_mode == DC_PRED) { - write_cfl_alphas(ec_ctx, mbmi->skip, mbmi->cfl_alpha_idx, - mbmi->cfl_alpha_signs, w); + if (mbmi->uv_mode == UV_DC_PRED) { + write_cfl_alphas(ec_ctx, mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, w); } #endif @@ -2252,6 +2303,89 @@ static int rd_token_stats_mismatch(RD_STATS *rd_stats, TOKEN_STATS *token_stats, } #endif +#if ENC_MISMATCH_DEBUG +static void enc_dump_logs(AV1_COMP *cpi, int mi_row, int mi_col) { + AV1_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; + MODE_INFO *m; + xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); + m = xd->mi[0]; + if (is_inter_block(&m->mbmi)) { +#define FRAME_TO_CHECK 1 + if (cm->current_video_frame == FRAME_TO_CHECK /* && cm->show_frame == 1*/) { + const MB_MODE_INFO *const mbmi = &m->mbmi; + const BLOCK_SIZE bsize = mbmi->sb_type; + + int_mv mv[2]; + int is_comp_ref = has_second_ref(&m->mbmi); + int ref; + + for (ref = 0; ref < 1 + is_comp_ref; ++ref) + mv[ref].as_mv = m->mbmi.mv[ref].as_mv; + + if (!is_comp_ref) { +#if CONFIG_COMPOUND_SINGLEREF + if (is_inter_singleref_comp_mode(m->mbmi.mode)) + mv[1].as_mv = m->mbmi.mv[1].as_mv; + else +#endif // CONFIG_COMPOUND_SINGLEREF + mv[1].as_int = 0; + } + int interp_ctx[2] = { -1 }; + int interp_filter[2] = { cm->interp_filter }; + if (cm->interp_filter == SWITCHABLE) { + int dir; + for (dir = 0; dir < 2; ++dir) { + if (has_subpel_mv_component(xd->mi[0], xd, dir) || + (mbmi->ref_frame[1] > INTRA_FRAME && + has_subpel_mv_component(xd->mi[0], xd, dir + 2))) { + interp_ctx[dir] = av1_get_pred_context_switchable_interp(xd, dir); + interp_filter[dir] = mbmi->interp_filter[dir]; + } else { + interp_filter[dir] = EIGHTTAP_REGULAR; + } + } + } + + MACROBLOCK *const x = &cpi->td.mb; + const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; + const int16_t mode_ctx = av1_mode_context_analyzer( + mbmi_ext->mode_context, mbmi->ref_frame, bsize, -1); + const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK; + int16_t zeromv_ctx = -1; + int16_t refmv_ctx = -1; + if (mbmi->mode != NEWMV) { + zeromv_ctx = (mode_ctx >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK; + if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) { + assert(mbmi->mode == ZEROMV); + } + if (mbmi->mode != ZEROMV) { + refmv_ctx = (mode_ctx >> REFMV_OFFSET) & REFMV_CTX_MASK; + if (mode_ctx & (1 << SKIP_NEARESTMV_OFFSET)) refmv_ctx = 6; + if (mode_ctx & (1 << SKIP_NEARMV_OFFSET)) refmv_ctx = 7; + if (mode_ctx & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) refmv_ctx = 8; + } + } + + int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); + printf( + "=== ENCODER ===: " + "Frame=%d, (mi_row,mi_col)=(%d,%d), mode=%d, bsize=%d, " + "show_frame=%d, mv[0]=(%d,%d), mv[1]=(%d,%d), ref[0]=%d, " + "ref[1]=%d, motion_mode=%d, inter_mode_ctx=%d, mode_ctx=%d, " + "interp_ctx=(%d,%d), interp_filter=(%d,%d), newmv_ctx=%d, " + "zeromv_ctx=%d, refmv_ctx=%d\n", + cm->current_video_frame, mi_row, mi_col, mbmi->mode, bsize, + cm->show_frame, mv[0].as_mv.row, mv[0].as_mv.col, mv[1].as_mv.row, + mv[1].as_mv.col, mbmi->ref_frame[0], mbmi->ref_frame[1], + mbmi->motion_mode, mbmi_ext->mode_context[ref_frame_type], mode_ctx, + interp_ctx[0], interp_ctx[1], interp_filter[0], interp_filter[1], + newmv_ctx, zeromv_ctx, refmv_ctx); + } + } +} +#endif // ENC_MISMATCH_DEBUG + static void write_mbmi_b(AV1_COMP *cpi, const TileInfo *const tile, aom_writer *w, #if CONFIG_SUPERTX @@ -2265,7 +2399,8 @@ static void write_mbmi_b(AV1_COMP *cpi, const TileInfo *const tile, xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); m = xd->mi[0]; - assert(m->mbmi.sb_type <= cm->sb_size); + assert(m->mbmi.sb_type <= cm->sb_size || + (m->mbmi.sb_type >= BLOCK_4X16 && m->mbmi.sb_type <= BLOCK_32X8)); bh = mi_size_high[m->mbmi.sb_type]; bw = mi_size_wide[m->mbmi.sb_type]; @@ -2291,36 +2426,22 @@ static void write_mbmi_b(AV1_COMP *cpi, const TileInfo *const tile, xd->left_txfm_context = xd->left_txfm_context_buffer + ((mi_row & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2); #endif -#if CONFIG_DUAL_FILTER +#if CONFIG_DUAL_FILTER || CONFIG_WARPED_MOTION // has_subpel_mv_component needs the ref frame buffers set up to look // up if they are scaled. has_subpel_mv_component is in turn needed by // write_switchable_interp_filter, which is called by pack_inter_mode_mvs. set_ref_ptrs(cm, xd, m->mbmi.ref_frame[0], m->mbmi.ref_frame[1]); -#endif // CONFIG_DUAL_FILTER -#if 0 +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(&m->mbmi) && is_inter_singleref_comp_mode(m->mbmi.mode)) + xd->block_refs[1] = xd->block_refs[0]; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF +#endif // CONFIG_DUAL_FILTER || CONFIG_WARPED_MOTION + +#if ENC_MISMATCH_DEBUG // NOTE(zoeliu): For debug - if (cm->current_video_frame == FRAME_TO_CHECK && cm->show_frame == 1) { - const PREDICTION_MODE mode = m->mbmi.mode; - const int segment_id = m->mbmi.segment_id; - const BLOCK_SIZE bsize = m->mbmi.sb_type; - - // For sub8x8, simply dump out the first sub8x8 block info - const PREDICTION_MODE b_mode = - (bsize < BLOCK_8X8) ? m->bmi[0].as_mode : -1; - const int mv_x = (bsize < BLOCK_8X8) ? - m->bmi[0].as_mv[0].as_mv.row : m->mbmi.mv[0].as_mv.row; - const int mv_y = (bsize < BLOCK_8X8) ? - m->bmi[0].as_mv[0].as_mv.col : m->mbmi.mv[0].as_mv.col; - - printf("Before pack_inter_mode_mvs(): " - "Frame=%d, (mi_row,mi_col)=(%d,%d), " - "mode=%d, segment_id=%d, bsize=%d, b_mode=%d, " - "mv[0]=(%d, %d), ref[0]=%d, ref[1]=%d\n", - cm->current_video_frame, mi_row, mi_col, - mode, segment_id, bsize, b_mode, mv_x, mv_y, - m->mbmi.ref_frame[0], m->mbmi.ref_frame[1]); - } -#endif // 0 + enc_dump_logs(cpi, mi_row, mi_col); +#endif // ENC_MISMATCH_DEBUG + pack_inter_mode_mvs(cpi, mi_row, mi_col, #if CONFIG_SUPERTX supertx_enabled, @@ -2335,7 +2456,8 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile, int mi_col) { AV1_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; - MODE_INFO *const m = xd->mi[0]; + const int mi_offset = mi_row * cm->mi_stride + mi_col; + MODE_INFO *const m = *(cm->mi_grid_visible + mi_offset); MB_MODE_INFO *const mbmi = &m->mbmi; int plane; int bh, bw; @@ -2344,9 +2466,10 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile, (void)tok; (void)tok_end; #endif - xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); + xd->mi = cm->mi_grid_visible + mi_offset; - assert(mbmi->sb_type <= cm->sb_size); + assert(mbmi->sb_type <= cm->sb_size || + (mbmi->sb_type >= BLOCK_4X16 && mbmi->sb_type <= BLOCK_32X8)); bh = mi_size_high[mbmi->sb_type]; bw = mi_size_wide[mbmi->sb_type]; @@ -2371,7 +2494,7 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile, av1_get_block_dimensions(mbmi->sb_type, plane, xd, NULL, NULL, &rows, &cols); assert(*tok < tok_end); - pack_palette_tokens(w, tok, palette_size_plane, rows * cols - 1); + pack_palette_tokens(w, tok, palette_size_plane, rows * cols); assert(*tok < tok_end + mbmi->skip); } } @@ -2382,7 +2505,7 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile, const struct macroblockd_plane *const pd_y = &xd->plane[0]; const struct macroblockd_plane *const pd_c = &xd->plane[1]; const TX_SIZE tx_log2_y = mbmi->tx_size; - const TX_SIZE tx_log2_c = get_uv_tx_size(mbmi, pd_c); + const TX_SIZE tx_log2_c = av1_get_uv_tx_size(mbmi, pd_c); const int tx_sz_y = (1 << tx_log2_y); const int tx_sz_c = (1 << tx_log2_c); @@ -2469,13 +2592,11 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile, #if CONFIG_VAR_TX const struct macroblockd_plane *const pd = &xd->plane[plane]; BLOCK_SIZE bsize = mbmi->sb_type; -#if CONFIG_CB4X4 -#if CONFIG_CHROMA_2X2 - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); -#else +#if CONFIG_CHROMA_SUB8X8 const BLOCK_SIZE plane_bsize = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd)); -#endif +#elif CONFIG_CB4X4 + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); #else const BLOCK_SIZE plane_bsize = get_plane_block_size(AOMMAX(bsize, BLOCK_8X8), pd); @@ -2489,6 +2610,15 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile, TOKEN_STATS token_stats; init_token_stats(&token_stats); + const BLOCK_SIZE max_unit_bsize = get_plane_block_size(BLOCK_64X64, pd); + int mu_blocks_wide = + block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0]; + int mu_blocks_high = + block_size_high[max_unit_bsize] >> tx_size_high_log2[0]; + + mu_blocks_wide = AOMMIN(num_4x4_w, mu_blocks_wide); + mu_blocks_high = AOMMIN(num_4x4_h, mu_blocks_high); + if (is_inter_block(mbmi)) { const TX_SIZE max_tx_size = get_vartx_max_txsize(mbmi, plane_bsize); int block = 0; @@ -2496,19 +2626,27 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile, tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size]; const int bkw = tx_size_wide_unit[max_tx_size]; const int bkh = tx_size_high_unit[max_tx_size]; - for (row = 0; row < num_4x4_h; row += bkh) { - for (col = 0; col < num_4x4_w; col += bkw) { - pack_txb_tokens(w, + for (row = 0; row < num_4x4_h; row += mu_blocks_high) { + const int unit_height = AOMMIN(mu_blocks_high + row, num_4x4_h); + for (col = 0; col < num_4x4_w; col += mu_blocks_wide) { + int blk_row, blk_col; + const int unit_width = AOMMIN(mu_blocks_wide + col, num_4x4_w); + for (blk_row = row; blk_row < unit_height; blk_row += bkh) { + for (blk_col = col; blk_col < unit_width; blk_col += bkw) { + pack_txb_tokens(w, #if CONFIG_LV_MAP - cm, + cm, #endif - tok, tok_end, + tok, tok_end, #if CONFIG_PVQ || CONFIG_LV_MAP - x, + x, #endif - xd, mbmi, plane, plane_bsize, cm->bit_depth, block, - row, col, max_tx_size, &token_stats); - block += step; + xd, mbmi, plane, plane_bsize, cm->bit_depth, + block, blk_row, blk_col, max_tx_size, + &token_stats); + block += step; + } + } } } #if CONFIG_RD_DEBUG @@ -2522,22 +2660,32 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile, #if CONFIG_LV_MAP av1_write_coeffs_mb(cm, x, w, plane); #else - TX_SIZE tx = get_tx_size(plane, xd); + const TX_SIZE tx = av1_get_tx_size(plane, xd); const int bkw = tx_size_wide_unit[tx]; const int bkh = tx_size_high_unit[tx]; - for (row = 0; row < num_4x4_h; row += bkh) { - for (col = 0; col < num_4x4_w; col += bkw) { + int blk_row, blk_col; + + for (row = 0; row < num_4x4_h; row += mu_blocks_high) { + for (col = 0; col < num_4x4_w; col += mu_blocks_wide) { + const int unit_height = AOMMIN(mu_blocks_high + row, num_4x4_h); + const int unit_width = AOMMIN(mu_blocks_wide + col, num_4x4_w); + + for (blk_row = row; blk_row < unit_height; blk_row += bkh) { + for (blk_col = col; blk_col < unit_width; blk_col += bkw) { #if !CONFIG_PVQ - pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx, &token_stats); + pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx, + &token_stats); #else - pack_pvq_tokens(w, x, xd, plane, bsize, tx); + pack_pvq_tokens(w, x, xd, plane, bsize, tx); #endif + } + } } } #endif // CONFIG_LV_MAP } #else - TX_SIZE tx = get_tx_size(plane, xd); + const TX_SIZE tx = av1_get_tx_size(plane, xd); TOKEN_STATS token_stats; #if !CONFIG_PVQ init_token_stats(&token_stats); @@ -2570,7 +2718,7 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile, #endif // CONFIG_COEF_INTERLEAVE } -#if CONFIG_MOTION_VAR && CONFIG_NCOBMC +#if CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT) static void write_tokens_sb(AV1_COMP *cpi, const TileInfo *const tile, aom_writer *w, const TOKENEXTRA **tok, const TOKENEXTRA *const tok_end, int mi_row, @@ -2656,7 +2804,7 @@ static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile, supertx_enabled, #endif mi_row, mi_col); -#if CONFIG_MOTION_VAR && CONFIG_NCOBMC +#if CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT) (void)tok; (void)tok_end; #else @@ -2688,12 +2836,8 @@ static void write_partition(const AV1_COMMON *const cm, const aom_prob *const probs = cm->fc->partition_prob[ctx]; #endif -#if CONFIG_EC_ADAPT FRAME_CONTEXT *ec_ctx = xd->tile_ctx; (void)cm; -#else - FRAME_CONTEXT *ec_ctx = cm->fc; -#endif if (!is_partition_point) return; @@ -2738,6 +2882,10 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile, const AV1_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; const int hbs = mi_size_wide[bsize] / 2; +#if CONFIG_EXT_PARTITION_TYPES + const int quarter_step = mi_size_wide[bsize] / 4; + int i; +#endif const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize); const BLOCK_SIZE subsize = get_subsize(bsize, partition); #if CONFIG_CB4X4 @@ -2843,6 +2991,24 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile, write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, mi_row + hbs, mi_col + hbs); break; + case PARTITION_HORZ_4: + for (i = 0; i < 4; ++i) { + int this_mi_row = mi_row + i * quarter_step; + if (i > 0 && this_mi_row >= cm->mi_rows) break; + + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, + this_mi_row, mi_col); + } + break; + case PARTITION_VERT_4: + for (i = 0; i < 4; ++i) { + int this_mi_col = mi_col + i * quarter_step; + if (i > 0 && this_mi_col >= cm->mi_cols) break; + + write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, + mi_row, this_mi_col); + } + break; #endif // CONFIG_EXT_PARTITION_TYPES default: assert(0); } @@ -2865,17 +3031,15 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile, assert(mbmi->segment_id_supertx < MAX_SEGMENTS); skip = write_skip(cm, xd, mbmi->segment_id_supertx, xd->mi[0], w); + + FRAME_CONTEXT *ec_ctx = xd->tile_ctx; + #if CONFIG_EXT_TX if (get_ext_tx_types(supertx_size, bsize, 1, cm->reduced_tx_set_used) > 1 && !skip) { const int eset = get_ext_tx_set(supertx_size, bsize, 1, cm->reduced_tx_set_used); if (eset > 0) { -#if CONFIG_EC_ADAPT - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; -#else - FRAME_CONTEXT *ec_ctx = cm->fc; -#endif aom_write_symbol(w, av1_ext_tx_inter_ind[eset][mbmi->tx_type], ec_ctx->inter_ext_tx_cdf[eset][supertx_size], ext_tx_cnt_inter[eset]); @@ -2883,9 +3047,8 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile, } #else if (supertx_size < TX_32X32 && !skip) { - av1_write_token(w, av1_ext_tx_tree, - cm->fc->inter_ext_tx_prob[supertx_size], - &ext_tx_encodings[mbmi->tx_type]); + aom_write_symbol(w, mbmi->tx_type, ec_ctx->inter_ext_tx_cdf[supertx_size], + TX_TYPES); } #endif // CONFIG_EXT_TX @@ -2900,7 +3063,7 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile, const int max_blocks_high = max_block_high(xd, plane_bsize, plane); int row, col; - TX_SIZE tx = get_tx_size(plane, xd); + const TX_SIZE tx = av1_get_tx_size(plane, xd); BLOCK_SIZE txb_size = txsize_to_bsize[tx]; const int stepr = tx_size_high_unit[txb_size]; @@ -2934,11 +3097,24 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile, #endif // CONFIG_EXT_PARTITION_TYPES #if CONFIG_CDEF - if (bsize == cm->sb_size && !sb_all_skip(cm, mi_row, mi_col) && - cm->cdef_bits != 0) { - aom_write_literal(w, cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col] - ->mbmi.cdef_strength, - cm->cdef_bits); + if (bsize == cm->sb_size && cm->cdef_bits != 0 && !cm->all_lossless) { + int width_step = mi_size_wide[BLOCK_64X64]; + int height_step = mi_size_high[BLOCK_64X64]; + int width, height; + for (height = 0; (height < mi_size_high[cm->sb_size]) && + (mi_row + height < cm->mi_rows); + height += height_step) { + for (width = 0; (width < mi_size_wide[cm->sb_size]) && + (mi_col + width < cm->mi_cols); + width += width_step) { + if (!sb_all_skip(cm, mi_row + height, mi_col + width)) + aom_write_literal( + w, cm->mi_grid_visible[(mi_row + height) * cm->mi_stride + + (mi_col + width)] + ->mbmi.cdef_strength, + cm->cdef_bits); + } + } } #endif } @@ -2955,12 +3131,8 @@ static void write_modes(AV1_COMP *const cpi, const TileInfo *const tile, int mi_row, mi_col; #if CONFIG_DEPENDENT_HORZTILES -#if CONFIG_TILE_GROUPS if (!cm->dependent_horz_tiles || mi_row_start == 0 || tile->tg_horz_boundary) { -#else - if (!cm->dependent_horz_tiles || mi_row_start == 0) { -#endif av1_zero_above_context(cm, mi_col_start, mi_col_end); } #else @@ -2986,7 +3158,7 @@ static void write_modes(AV1_COMP *const cpi, const TileInfo *const tile, for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += cm->mib_size) { write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, 0, mi_row, mi_col, cm->sb_size); -#if CONFIG_MOTION_VAR && CONFIG_NCOBMC +#if CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT) write_tokens_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, cm->sb_size); #endif } @@ -3000,214 +3172,6 @@ static void write_modes(AV1_COMP *const cpi, const TileInfo *const tile, #endif } -#if !CONFIG_LV_MAP -#if !CONFIG_PVQ && !CONFIG_EC_ADAPT -static void build_tree_distribution(AV1_COMP *cpi, TX_SIZE tx_size, - av1_coeff_stats *coef_branch_ct, - av1_coeff_probs_model *coef_probs) { - av1_coeff_count *coef_counts = cpi->td.rd_counts.coef_counts[tx_size]; - unsigned int(*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] = - cpi->common.counts.eob_branch[tx_size]; - int i, j, k, l, m; -#if CONFIG_RECT_TX - assert(!is_rect_tx(tx_size)); -#endif // CONFIG_RECT_TX - - for (i = 0; i < PLANE_TYPES; ++i) { - for (j = 0; j < REF_TYPES; ++j) { - for (k = 0; k < COEF_BANDS; ++k) { - for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { - av1_tree_probs_from_distribution(av1_coef_tree, - coef_branch_ct[i][j][k][l], - coef_counts[i][j][k][l]); - coef_branch_ct[i][j][k][l][0][1] = - eob_branch_ct[i][j][k][l] - coef_branch_ct[i][j][k][l][0][0]; - for (m = 0; m < UNCONSTRAINED_NODES; ++m) - coef_probs[i][j][k][l][m] = - get_binary_prob(coef_branch_ct[i][j][k][l][m][0], - coef_branch_ct[i][j][k][l][m][1]); - } - } - } - } -} - -#if !CONFIG_EC_ADAPT -static void update_coef_probs_common(aom_writer *const bc, AV1_COMP *cpi, - TX_SIZE tx_size, - av1_coeff_stats *frame_branch_ct, - av1_coeff_probs_model *new_coef_probs) { - av1_coeff_probs_model *old_coef_probs = cpi->common.fc->coef_probs[tx_size]; - const aom_prob upd = DIFF_UPDATE_PROB; -#if CONFIG_EC_ADAPT - const int entropy_nodes_update = UNCONSTRAINED_NODES - 1; -#else - const int entropy_nodes_update = UNCONSTRAINED_NODES; -#endif - int i, j, k, l, t; - int stepsize = cpi->sf.coeff_prob_appx_step; -#if CONFIG_TILE_GROUPS - const int probwt = cpi->common.num_tg; -#else - const int probwt = 1; -#endif -#if CONFIG_RECT_TX - assert(!is_rect_tx(tx_size)); -#endif // CONFIG_RECT_TX - - switch (cpi->sf.use_fast_coef_updates) { - case TWO_LOOP: { - /* dry run to see if there is any update at all needed */ - int savings = 0; - int update[2] = { 0, 0 }; - for (i = 0; i < PLANE_TYPES; ++i) { - for (j = 0; j < REF_TYPES; ++j) { - for (k = 0; k < COEF_BANDS; ++k) { - for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { - for (t = 0; t < entropy_nodes_update; ++t) { - aom_prob newp = new_coef_probs[i][j][k][l][t]; - const aom_prob oldp = old_coef_probs[i][j][k][l][t]; - int s; - int u = 0; - if (t == PIVOT_NODE) - s = av1_prob_diff_update_savings_search_model( - frame_branch_ct[i][j][k][l][0], oldp, &newp, upd, - stepsize, probwt); - else - s = av1_prob_diff_update_savings_search( - frame_branch_ct[i][j][k][l][t], oldp, &newp, upd, probwt); - - if (s > 0 && newp != oldp) u = 1; - if (u) - savings += s - (int)(av1_cost_zero(upd)); - else - savings -= (int)(av1_cost_zero(upd)); - update[u]++; - } - } - } - } - } - - /* Is coef updated at all */ - if (update[1] == 0 || savings < 0) { - aom_write_bit(bc, 0); - return; - } - aom_write_bit(bc, 1); - for (i = 0; i < PLANE_TYPES; ++i) { - for (j = 0; j < REF_TYPES; ++j) { - for (k = 0; k < COEF_BANDS; ++k) { - for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { - // calc probs and branch cts for this frame only - for (t = 0; t < entropy_nodes_update; ++t) { - aom_prob newp = new_coef_probs[i][j][k][l][t]; - aom_prob *oldp = old_coef_probs[i][j][k][l] + t; - int s; - int u = 0; - if (t == PIVOT_NODE) - s = av1_prob_diff_update_savings_search_model( - frame_branch_ct[i][j][k][l][0], *oldp, &newp, upd, - stepsize, probwt); - else - s = av1_prob_diff_update_savings_search( - frame_branch_ct[i][j][k][l][t], *oldp, &newp, upd, - probwt); - if (s > 0 && newp != *oldp) u = 1; - aom_write(bc, u, upd); - if (u) { - /* send/use new probability */ - av1_write_prob_diff_update(bc, newp, *oldp); - *oldp = newp; - } - } - } - } - } - } - return; - } - - case ONE_LOOP_REDUCED: { - int updates = 0; - int noupdates_before_first = 0; - for (i = 0; i < PLANE_TYPES; ++i) { - for (j = 0; j < REF_TYPES; ++j) { - for (k = 0; k < COEF_BANDS; ++k) { - for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { - // calc probs and branch cts for this frame only - for (t = 0; t < entropy_nodes_update; ++t) { - aom_prob newp = new_coef_probs[i][j][k][l][t]; - aom_prob *oldp = old_coef_probs[i][j][k][l] + t; - int s; - int u = 0; - if (t == PIVOT_NODE) { - s = av1_prob_diff_update_savings_search_model( - frame_branch_ct[i][j][k][l][0], *oldp, &newp, upd, - stepsize, probwt); - } else { - s = av1_prob_diff_update_savings_search( - frame_branch_ct[i][j][k][l][t], *oldp, &newp, upd, - probwt); - } - - if (s > 0 && newp != *oldp) u = 1; - updates += u; - if (u == 0 && updates == 0) { - noupdates_before_first++; - continue; - } - if (u == 1 && updates == 1) { - int v; - // first update - aom_write_bit(bc, 1); - for (v = 0; v < noupdates_before_first; ++v) - aom_write(bc, 0, upd); - } - aom_write(bc, u, upd); - if (u) { - /* send/use new probability */ - av1_write_prob_diff_update(bc, newp, *oldp); - *oldp = newp; - } - } - } - } - } - } - if (updates == 0) { - aom_write_bit(bc, 0); // no updates - } - return; - } - default: assert(0); - } -} -#endif - -#if !CONFIG_EC_ADAPT -static void update_coef_probs(AV1_COMP *cpi, aom_writer *w) { - const TX_MODE tx_mode = cpi->common.tx_mode; - const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; - TX_SIZE tx_size; - - for (tx_size = 0; tx_size <= max_tx_size; ++tx_size) { - av1_coeff_stats frame_branch_ct[PLANE_TYPES]; - av1_coeff_probs_model frame_coef_probs[PLANE_TYPES]; - if (cpi->td.counts->tx_size_totals[tx_size] <= 20 || CONFIG_RD_DEBUG || - (tx_size >= TX_16X16 && cpi->sf.tx_size_search_method == USE_TX_8X8)) { - aom_write_bit(w, 0); - } else { - build_tree_distribution(cpi, tx_size, frame_branch_ct, frame_coef_probs); - update_coef_probs_common(w, cpi, tx_size, frame_branch_ct, - frame_coef_probs); - } - } -} -#endif // !CONFIG_EC_ADAPT -#endif // !CONFIG_EC_ADAPT -#endif // !CONFIG_LV_MAP - #if CONFIG_LOOP_RESTORATION static void encode_restoration_mode(AV1_COMMON *cm, struct aom_write_bit_buffer *wb) { @@ -3257,6 +3221,23 @@ static void encode_restoration_mode(AV1_COMMON *cm, wb, rsi->restoration_tilesize != (RESTORATION_TILESIZE_MAX >> 1)); } } + int s = AOMMIN(cm->subsampling_x, cm->subsampling_y); + if (s && (cm->rst_info[1].frame_restoration_type != RESTORE_NONE || + cm->rst_info[2].frame_restoration_type != RESTORE_NONE)) { + aom_wb_write_bit(wb, cm->rst_info[1].restoration_tilesize != + cm->rst_info[0].restoration_tilesize); + assert(cm->rst_info[1].restoration_tilesize == + cm->rst_info[0].restoration_tilesize || + cm->rst_info[1].restoration_tilesize == + (cm->rst_info[0].restoration_tilesize >> s)); + assert(cm->rst_info[2].restoration_tilesize == + cm->rst_info[1].restoration_tilesize); + } else if (!s) { + assert(cm->rst_info[1].restoration_tilesize == + cm->rst_info[0].restoration_tilesize); + assert(cm->rst_info[2].restoration_tilesize == + cm->rst_info[1].restoration_tilesize); + } } static void write_wiener_filter(WienerInfo *wiener_info, @@ -3311,16 +3292,23 @@ static void write_sgrproj_filter(SgrprojInfo *sgrproj_info, static void encode_restoration(AV1_COMMON *cm, aom_writer *wb) { int i, p; - const int ntiles = av1_get_rest_ntiles(cm->width, cm->height, - cm->rst_info[0].restoration_tilesize, - NULL, NULL, NULL, NULL); +#if CONFIG_FRAME_SUPERRES + const int width = cm->superres_upscaled_width; + const int height = cm->superres_upscaled_height; +#else + const int width = cm->width; + const int height = cm->height; +#endif // CONFIG_FRAME_SUPERRES + const int ntiles = + av1_get_rest_ntiles(width, height, cm->rst_info[0].restoration_tilesize, + NULL, NULL, NULL, NULL); WienerInfo ref_wiener_info; SgrprojInfo ref_sgrproj_info; set_default_wiener(&ref_wiener_info); set_default_sgrproj(&ref_sgrproj_info); const int ntiles_uv = av1_get_rest_ntiles( - ROUND_POWER_OF_TWO(cm->width, cm->subsampling_x), - ROUND_POWER_OF_TWO(cm->height, cm->subsampling_y), + ROUND_POWER_OF_TWO(width, cm->subsampling_x), + ROUND_POWER_OF_TWO(height, cm->subsampling_y), cm->rst_info[1].restoration_tilesize, NULL, NULL, NULL, NULL); RestorationInfo *rsi = &cm->rst_info[0]; if (rsi->frame_restoration_type != RESTORE_NONE) { @@ -3389,6 +3377,12 @@ static void encode_loopfilter(AV1_COMMON *cm, struct aom_write_bit_buffer *wb) { // Encode the loop filter level and type aom_wb_write_literal(wb, lf->filter_level, 6); +#if CONFIG_UV_LVL + if (lf->filter_level > 0) { + aom_wb_write_literal(wb, lf->filter_level_u, 6); + aom_wb_write_literal(wb, lf->filter_level_v, 6); + } +#endif aom_wb_write_literal(wb, lf->sharpness_level, 3); // Write out loop filter deltas applied at the MB level based on mode or @@ -3509,51 +3503,17 @@ static void encode_segmentation(AV1_COMMON *cm, MACROBLOCKD *xd, } } -#if !CONFIG_EC_ADAPT -static void update_seg_probs(AV1_COMP *cpi, aom_writer *w) { - AV1_COMMON *cm = &cpi->common; -#if CONFIG_TILE_GROUPS - const int probwt = cm->num_tg; -#else - const int probwt = 1; -#endif - - if (!cm->seg.enabled || !cm->seg.update_map) return; - - if (cm->seg.temporal_update) { - int i; - - for (i = 0; i < PREDICTION_PROBS; i++) - av1_cond_prob_diff_update(w, &cm->fc->seg.pred_probs[i], - cm->counts.seg.pred[i], probwt); - - prob_diff_update(av1_segment_tree, cm->fc->seg.tree_probs, - cm->counts.seg.tree_mispred, MAX_SEGMENTS, probwt, w); - } else { - prob_diff_update(av1_segment_tree, cm->fc->seg.tree_probs, - cm->counts.seg.tree_total, MAX_SEGMENTS, probwt, w); - } -} -#endif - -static void write_tx_mode(AV1_COMMON *cm, MACROBLOCKD *xd, TX_MODE *mode, +static void write_tx_mode(AV1_COMMON *cm, TX_MODE *mode, struct aom_write_bit_buffer *wb) { - int i, all_lossless = 1; - - if (cm->seg.enabled) { - for (i = 0; i < MAX_SEGMENTS; ++i) { - if (!xd->lossless[i]) { - all_lossless = 0; - break; - } - } - } else { - all_lossless = xd->lossless[0]; - } - if (all_lossless) { + if (cm->all_lossless) { *mode = ONLY_4X4; return; } +#if CONFIG_VAR_TX_NO_TX_MODE + (void)wb; + *mode = TX_MODE_SELECT; + return; +#else #if CONFIG_TX64X64 aom_wb_write_bit(wb, *mode == TX_MODE_SELECT); if (*mode != TX_MODE_SELECT) { @@ -3564,26 +3524,9 @@ static void write_tx_mode(AV1_COMMON *cm, MACROBLOCKD *xd, TX_MODE *mode, aom_wb_write_bit(wb, *mode == TX_MODE_SELECT); if (*mode != TX_MODE_SELECT) aom_wb_write_literal(wb, *mode, 2); #endif // CONFIG_TX64X64 +#endif // CONFIG_VAR_TX_NO_TX_MODE } -#if !CONFIG_EC_ADAPT -static void update_txfm_probs(AV1_COMMON *cm, aom_writer *w, - FRAME_COUNTS *counts) { -#if CONFIG_TILE_GROUPS - const int probwt = cm->num_tg; -#else - const int probwt = 1; -#endif - if (cm->tx_mode == TX_MODE_SELECT) { - int i, j; - for (i = 0; i < MAX_TX_DEPTH; ++i) - for (j = 0; j < TX_SIZE_CONTEXTS; ++j) - prob_diff_update(av1_tx_size_tree[i], cm->fc->tx_size_probs[i][j], - counts->tx_size[i][j], i + 2, probwt, w); - } -} -#endif - static void write_frame_interp_filter(InterpFilter filter, struct aom_write_bit_buffer *wb) { aom_wb_write_bit(wb, filter == SWITCHABLE); @@ -3624,52 +3567,52 @@ static void fix_interp_filter(AV1_COMMON *cm, FRAME_COUNTS *counts) { static void write_tile_info(const AV1_COMMON *const cm, struct aom_write_bit_buffer *wb) { #if CONFIG_EXT_TILE - const int tile_width = - ALIGN_POWER_OF_TWO(cm->tile_width, cm->mib_size_log2) >> - cm->mib_size_log2; - const int tile_height = - ALIGN_POWER_OF_TWO(cm->tile_height, cm->mib_size_log2) >> - cm->mib_size_log2; - - assert(tile_width > 0); - assert(tile_height > 0); + if (cm->large_scale_tile) { + const int tile_width = + ALIGN_POWER_OF_TWO(cm->tile_width, cm->mib_size_log2) >> + cm->mib_size_log2; + const int tile_height = + ALIGN_POWER_OF_TWO(cm->tile_height, cm->mib_size_log2) >> + cm->mib_size_log2; - aom_wb_write_literal(wb, cm->tile_encoding_mode, 1); + assert(tile_width > 0); + assert(tile_height > 0); // Write the tile sizes #if CONFIG_EXT_PARTITION - if (cm->sb_size == BLOCK_128X128) { - assert(tile_width <= 32); - assert(tile_height <= 32); - aom_wb_write_literal(wb, tile_width - 1, 5); - aom_wb_write_literal(wb, tile_height - 1, 5); - } else + if (cm->sb_size == BLOCK_128X128) { + assert(tile_width <= 32); + assert(tile_height <= 32); + aom_wb_write_literal(wb, tile_width - 1, 5); + aom_wb_write_literal(wb, tile_height - 1, 5); + } else { #endif // CONFIG_EXT_PARTITION - { - assert(tile_width <= 64); - assert(tile_height <= 64); - aom_wb_write_literal(wb, tile_width - 1, 6); - aom_wb_write_literal(wb, tile_height - 1, 6); - } -#if CONFIG_DEPENDENT_HORZTILES - if (tile_height > 1) aom_wb_write_bit(wb, cm->dependent_horz_tiles); -#endif -#else - int min_log2_tile_cols, max_log2_tile_cols, ones; - av1_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); + assert(tile_width <= 64); + assert(tile_height <= 64); + aom_wb_write_literal(wb, tile_width - 1, 6); + aom_wb_write_literal(wb, tile_height - 1, 6); +#if CONFIG_EXT_PARTITION + } +#endif // CONFIG_EXT_PARTITION + } else { +#endif // CONFIG_EXT_TILE + int min_log2_tile_cols, max_log2_tile_cols, ones; + av1_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); - // columns - ones = cm->log2_tile_cols - min_log2_tile_cols; - while (ones--) aom_wb_write_bit(wb, 1); + // columns + ones = cm->log2_tile_cols - min_log2_tile_cols; + while (ones--) aom_wb_write_bit(wb, 1); - if (cm->log2_tile_cols < max_log2_tile_cols) aom_wb_write_bit(wb, 0); + if (cm->log2_tile_cols < max_log2_tile_cols) aom_wb_write_bit(wb, 0); - // rows - aom_wb_write_bit(wb, cm->log2_tile_rows != 0); - if (cm->log2_tile_rows != 0) aom_wb_write_bit(wb, cm->log2_tile_rows != 1); + // rows + aom_wb_write_bit(wb, cm->log2_tile_rows != 0); + if (cm->log2_tile_rows != 0) aom_wb_write_bit(wb, cm->log2_tile_rows != 1); #if CONFIG_DEPENDENT_HORZTILES - if (cm->log2_tile_rows != 0) aom_wb_write_bit(wb, cm->dependent_horz_tiles); + if (cm->log2_tile_rows != 0) aom_wb_write_bit(wb, cm->dependent_horz_tiles); #endif +#if CONFIG_EXT_TILE + } #endif // CONFIG_EXT_TILE #if CONFIG_LOOPFILTERING_ACROSS_TILES @@ -3782,16 +3725,9 @@ static INLINE int find_identical_tile( } #endif // CONFIG_EXT_TILE -#if CONFIG_TILE_GROUPS -static uint32_t write_tiles(AV1_COMP *const cpi, - struct aom_write_bit_buffer *wb, - unsigned int *max_tile_size, - unsigned int *max_tile_col_size) { -#else static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst, unsigned int *max_tile_size, unsigned int *max_tile_col_size) { -#endif const AV1_COMMON *const cm = &cpi->common; #if CONFIG_ANS struct BufAnsCoder *buf_ans = &cpi->buf_ans; @@ -3805,19 +3741,24 @@ static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst, const int tile_cols = cm->tile_cols; const int tile_rows = cm->tile_rows; unsigned int tile_size = 0; -#if CONFIG_TILE_GROUPS + const int have_tiles = tile_cols * tile_rows > 1; + struct aom_write_bit_buffer wb = { dst, 0 }; const int n_log2_tiles = cm->log2_tile_rows + cm->log2_tile_cols; - const int have_tiles = n_log2_tiles > 0; uint32_t comp_hdr_size; // Fixed size tile groups for the moment const int num_tg_hdrs = cm->num_tg; - const int tg_size = (tile_rows * tile_cols + num_tg_hdrs - 1) / num_tg_hdrs; + const int tg_size = +#if CONFIG_EXT_TILE + (cm->large_scale_tile) + ? 1 + : +#endif // CONFIG_EXT_TILE + (tile_rows * tile_cols + num_tg_hdrs - 1) / num_tg_hdrs; int tile_count = 0; int tg_count = 1; int tile_size_bytes = 4; int tile_col_size_bytes; uint32_t uncompressed_hdr_size = 0; - uint8_t *dst = NULL; struct aom_write_bit_buffer comp_hdr_len_wb; struct aom_write_bit_buffer tg_params_wb; struct aom_write_bit_buffer tile_size_bytes_wb; @@ -3825,10 +3766,6 @@ static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst, int mtu_size = cpi->oxcf.mtu; int curr_tg_data_size = 0; int hdr_size; -#endif -#if CONFIG_EXT_TILE - const int have_tiles = tile_cols * tile_rows > 1; -#endif // CONFIG_EXT_TILE *max_tile_size = 0; *max_tile_col_size = 0; @@ -3837,282 +3774,274 @@ static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst, // later compact the data if smaller headers are adequate. #if CONFIG_EXT_TILE - for (tile_col = 0; tile_col < tile_cols; tile_col++) { - TileInfo tile_info; - const int is_last_col = (tile_col == tile_cols - 1); - const uint32_t col_offset = total_size; - - av1_tile_set_col(&tile_info, cm, tile_col); - - // The last column does not have a column header - if (!is_last_col) total_size += 4; - - for (tile_row = 0; tile_row < tile_rows; tile_row++) { - TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col]; - const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col]; - const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col]; - const int data_offset = have_tiles ? 4 : 0; -#if CONFIG_EC_ADAPT - const int tile_idx = tile_row * tile_cols + tile_col; - TileDataEnc *this_tile = &cpi->tile_data[tile_idx]; -#endif - av1_tile_set_row(&tile_info, cm, tile_row); + if (cm->large_scale_tile) { + for (tile_col = 0; tile_col < tile_cols; tile_col++) { + TileInfo tile_info; + const int is_last_col = (tile_col == tile_cols - 1); + const uint32_t col_offset = total_size; - buf->data = dst + total_size; + av1_tile_set_col(&tile_info, cm, tile_col); - // Is CONFIG_EXT_TILE = 1, every tile in the row has a header, - // even for the last one, unless no tiling is used at all. - total_size += data_offset; -#if CONFIG_EC_ADAPT - // Initialise tile context from the frame context - this_tile->tctx = *cm->fc; - cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx; -#endif + // The last column does not have a column header + if (!is_last_col) total_size += 4; + + for (tile_row = 0; tile_row < tile_rows; tile_row++) { + TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col]; + const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col]; + const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col]; + const int data_offset = have_tiles ? 4 : 0; + const int tile_idx = tile_row * tile_cols + tile_col; + TileDataEnc *this_tile = &cpi->tile_data[tile_idx]; + av1_tile_set_row(&tile_info, cm, tile_row); + + buf->data = dst + total_size; + + // Is CONFIG_EXT_TILE = 1, every tile in the row has a header, + // even for the last one, unless no tiling is used at all. + total_size += data_offset; + // Initialise tile context from the frame context + this_tile->tctx = *cm->fc; + cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx; #if CONFIG_PVQ - cpi->td.mb.pvq_q = &this_tile->pvq_q; - cpi->td.mb.daala_enc.state.adapt = &this_tile->tctx.pvq_context; + cpi->td.mb.pvq_q = &this_tile->pvq_q; + cpi->td.mb.daala_enc.state.adapt = &this_tile->tctx.pvq_context; #endif // CONFIG_PVQ #if !CONFIG_ANS - aom_start_encode(&mode_bc, buf->data + data_offset); - write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end); - assert(tok == tok_end); - aom_stop_encode(&mode_bc); - tile_size = mode_bc.pos; + aom_start_encode(&mode_bc, buf->data + data_offset); + write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end); + assert(tok == tok_end); + aom_stop_encode(&mode_bc); + tile_size = mode_bc.pos; #else - buf_ans_write_init(buf_ans, buf->data + data_offset); - write_modes(cpi, &tile_info, buf_ans, &tok, tok_end); - assert(tok == tok_end); - aom_buf_ans_flush(buf_ans); - tile_size = buf_ans_write_end(buf_ans); + buf_ans_write_init(buf_ans, buf->data + data_offset); + write_modes(cpi, &tile_info, buf_ans, &tok, tok_end); + assert(tok == tok_end); + aom_buf_ans_flush(buf_ans); + tile_size = buf_ans_write_end(buf_ans); #endif // !CONFIG_ANS #if CONFIG_PVQ - cpi->td.mb.pvq_q = NULL; + cpi->td.mb.pvq_q = NULL; #endif - buf->size = tile_size; - - // Record the maximum tile size we see, so we can compact headers later. - *max_tile_size = AOMMAX(*max_tile_size, tile_size); - - if (have_tiles) { - // tile header: size of this tile, or copy offset - uint32_t tile_header = tile_size; + buf->size = tile_size; - // If the tile_encoding_mode is 1 (i.e. TILE_VR), check if this tile is - // a copy tile. - // Very low chances to have copy tiles on the key frames, so don't - // search on key frames to reduce unnecessary search. - if (cm->frame_type != KEY_FRAME && cm->tile_encoding_mode) { - const int idendical_tile_offset = - find_identical_tile(tile_row, tile_col, tile_buffers); + // Record the maximum tile size we see, so we can compact headers later. + *max_tile_size = AOMMAX(*max_tile_size, tile_size); - if (idendical_tile_offset > 0) { - tile_size = 0; - tile_header = idendical_tile_offset | 0x80; - tile_header <<= 24; + if (have_tiles) { + // tile header: size of this tile, or copy offset + uint32_t tile_header = tile_size; + const int tile_copy_mode = + ((AOMMAX(cm->tile_width, cm->tile_height) << MI_SIZE_LOG2) <= 256) + ? 1 + : 0; + + // If tile_copy_mode = 1, check if this tile is a copy tile. + // Very low chances to have copy tiles on the key frames, so don't + // search on key frames to reduce unnecessary search. + if (cm->frame_type != KEY_FRAME && tile_copy_mode) { + const int idendical_tile_offset = + find_identical_tile(tile_row, tile_col, tile_buffers); + + if (idendical_tile_offset > 0) { + tile_size = 0; + tile_header = idendical_tile_offset | 0x80; + tile_header <<= 24; + } } + + mem_put_le32(buf->data, tile_header); } - mem_put_le32(buf->data, tile_header); + total_size += tile_size; } - total_size += tile_size; - } - - if (!is_last_col) { - uint32_t col_size = total_size - col_offset - 4; - mem_put_le32(dst + col_offset, col_size); + if (!is_last_col) { + uint32_t col_size = total_size - col_offset - 4; + mem_put_le32(dst + col_offset, col_size); - // If it is not final packing, record the maximum tile column size we see, - // otherwise, check if the tile size is out of the range. - *max_tile_col_size = AOMMAX(*max_tile_col_size, col_size); + // If it is not final packing, record the maximum tile column size we + // see, otherwise, check if the tile size is out of the range. + *max_tile_col_size = AOMMAX(*max_tile_col_size, col_size); + } } - } -#else -#if CONFIG_TILE_GROUPS - write_uncompressed_header(cpi, wb); + } else { +#endif // CONFIG_EXT_TILE + write_uncompressed_header(cpi, &wb); #if CONFIG_EXT_REFS - if (cm->show_existing_frame) { - total_size = aom_wb_bytes_written(wb); - return (uint32_t)total_size; - } + if (cm->show_existing_frame) { + total_size = aom_wb_bytes_written(&wb); + return (uint32_t)total_size; + } #endif // CONFIG_EXT_REFS - // Write the tile length code - tile_size_bytes_wb = *wb; - aom_wb_write_literal(wb, 3, 2); + // Write the tile length code + tile_size_bytes_wb = wb; + aom_wb_write_literal(&wb, 3, 2); - /* Write a placeholder for the number of tiles in each tile group */ - tg_params_wb = *wb; - saved_offset = wb->bit_offset; - if (have_tiles) { - aom_wb_overwrite_literal(wb, 3, n_log2_tiles); - aom_wb_overwrite_literal(wb, (1 << n_log2_tiles) - 1, n_log2_tiles); - } - - /* Write a placeholder for the compressed header length */ - comp_hdr_len_wb = *wb; - aom_wb_write_literal(wb, 0, 16); - - uncompressed_hdr_size = aom_wb_bytes_written(wb); - dst = wb->bit_buffer; - comp_hdr_size = write_compressed_header(cpi, dst + uncompressed_hdr_size); - aom_wb_overwrite_literal(&comp_hdr_len_wb, (int)(comp_hdr_size), 16); - hdr_size = uncompressed_hdr_size + comp_hdr_size; - total_size += hdr_size; -#endif + /* Write a placeholder for the number of tiles in each tile group */ + tg_params_wb = wb; + saved_offset = wb.bit_offset; + if (have_tiles) { + aom_wb_overwrite_literal(&wb, 3, n_log2_tiles); + aom_wb_overwrite_literal(&wb, (1 << n_log2_tiles) - 1, n_log2_tiles); + } - for (tile_row = 0; tile_row < tile_rows; tile_row++) { - TileInfo tile_info; - const int is_last_row = (tile_row == tile_rows - 1); - av1_tile_set_row(&tile_info, cm, tile_row); + /* Write a placeholder for the compressed header length */ + comp_hdr_len_wb = wb; + aom_wb_write_literal(&wb, 0, 16); - for (tile_col = 0; tile_col < tile_cols; tile_col++) { - const int tile_idx = tile_row * tile_cols + tile_col; - TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col]; -#if CONFIG_PVQ || CONFIG_EC_ADAPT - TileDataEnc *this_tile = &cpi->tile_data[tile_idx]; -#endif - const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col]; - const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col]; - const int is_last_col = (tile_col == tile_cols - 1); - const int is_last_tile = is_last_col && is_last_row; -#if !CONFIG_TILE_GROUPS - (void)tile_idx; -#else + uncompressed_hdr_size = aom_wb_bytes_written(&wb); + comp_hdr_size = write_compressed_header(cpi, dst + uncompressed_hdr_size); + aom_wb_overwrite_literal(&comp_hdr_len_wb, (int)(comp_hdr_size), 16); + hdr_size = uncompressed_hdr_size + comp_hdr_size; + total_size += hdr_size; - if ((!mtu_size && tile_count > tg_size) || - (mtu_size && tile_count && curr_tg_data_size >= mtu_size)) { - // New tile group - tg_count++; - // We've exceeded the packet size - if (tile_count > 1) { - /* The last tile exceeded the packet size. The tile group size - should therefore be tile_count-1. - Move the last tile and insert headers before it - */ - uint32_t old_total_size = total_size - tile_size - 4; - memmove(dst + old_total_size + hdr_size, dst + old_total_size, - (tile_size + 4) * sizeof(uint8_t)); - // Copy uncompressed header - memmove(dst + old_total_size, dst, - uncompressed_hdr_size * sizeof(uint8_t)); - // Write the number of tiles in the group into the last uncompressed - // header before the one we've just inserted - aom_wb_overwrite_literal(&tg_params_wb, tile_idx - tile_count, - n_log2_tiles); - aom_wb_overwrite_literal(&tg_params_wb, tile_count - 2, n_log2_tiles); - // Update the pointer to the last TG params - tg_params_wb.bit_offset = saved_offset + 8 * old_total_size; - // Copy compressed header - memmove(dst + old_total_size + uncompressed_hdr_size, - dst + uncompressed_hdr_size, comp_hdr_size * sizeof(uint8_t)); - total_size += hdr_size; - tile_count = 1; - curr_tg_data_size = hdr_size + tile_size + 4; + for (tile_row = 0; tile_row < tile_rows; tile_row++) { + TileInfo tile_info; + const int is_last_row = (tile_row == tile_rows - 1); + av1_tile_set_row(&tile_info, cm, tile_row); - } else { - // We exceeded the packet size in just one tile - // Copy uncompressed header - memmove(dst + total_size, dst, - uncompressed_hdr_size * sizeof(uint8_t)); - // Write the number of tiles in the group into the last uncompressed - // header - aom_wb_overwrite_literal(&tg_params_wb, tile_idx - tile_count, - n_log2_tiles); - aom_wb_overwrite_literal(&tg_params_wb, tile_count - 1, n_log2_tiles); - tg_params_wb.bit_offset = saved_offset + 8 * total_size; - // Copy compressed header - memmove(dst + total_size + uncompressed_hdr_size, - dst + uncompressed_hdr_size, comp_hdr_size * sizeof(uint8_t)); - total_size += hdr_size; - tile_count = 0; - curr_tg_data_size = hdr_size; + for (tile_col = 0; tile_col < tile_cols; tile_col++) { + const int tile_idx = tile_row * tile_cols + tile_col; + TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col]; + TileDataEnc *this_tile = &cpi->tile_data[tile_idx]; + const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col]; + const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col]; + const int is_last_col = (tile_col == tile_cols - 1); + const int is_last_tile = is_last_col && is_last_row; + + if ((!mtu_size && tile_count > tg_size) || + (mtu_size && tile_count && curr_tg_data_size >= mtu_size)) { + // New tile group + tg_count++; + // We've exceeded the packet size + if (tile_count > 1) { + /* The last tile exceeded the packet size. The tile group size + should therefore be tile_count-1. + Move the last tile and insert headers before it + */ + uint32_t old_total_size = total_size - tile_size - 4; + memmove(dst + old_total_size + hdr_size, dst + old_total_size, + (tile_size + 4) * sizeof(uint8_t)); + // Copy uncompressed header + memmove(dst + old_total_size, dst, + uncompressed_hdr_size * sizeof(uint8_t)); + // Write the number of tiles in the group into the last uncompressed + // header before the one we've just inserted + aom_wb_overwrite_literal(&tg_params_wb, tile_idx - tile_count, + n_log2_tiles); + aom_wb_overwrite_literal(&tg_params_wb, tile_count - 2, + n_log2_tiles); + // Update the pointer to the last TG params + tg_params_wb.bit_offset = saved_offset + 8 * old_total_size; + // Copy compressed header + memmove(dst + old_total_size + uncompressed_hdr_size, + dst + uncompressed_hdr_size, + comp_hdr_size * sizeof(uint8_t)); + total_size += hdr_size; + tile_count = 1; + curr_tg_data_size = hdr_size + tile_size + 4; + } else { + // We exceeded the packet size in just one tile + // Copy uncompressed header + memmove(dst + total_size, dst, + uncompressed_hdr_size * sizeof(uint8_t)); + // Write the number of tiles in the group into the last uncompressed + // header + aom_wb_overwrite_literal(&tg_params_wb, tile_idx - tile_count, + n_log2_tiles); + aom_wb_overwrite_literal(&tg_params_wb, tile_count - 1, + n_log2_tiles); + tg_params_wb.bit_offset = saved_offset + 8 * total_size; + // Copy compressed header + memmove(dst + total_size + uncompressed_hdr_size, + dst + uncompressed_hdr_size, + comp_hdr_size * sizeof(uint8_t)); + total_size += hdr_size; + tile_count = 0; + curr_tg_data_size = hdr_size; + } } - } - tile_count++; -#endif - av1_tile_set_col(&tile_info, cm, tile_col); + tile_count++; + av1_tile_set_col(&tile_info, cm, tile_col); -#if CONFIG_DEPENDENT_HORZTILES && CONFIG_TILE_GROUPS - av1_tile_set_tg_boundary(&tile_info, cm, tile_row, tile_col); +#if CONFIG_DEPENDENT_HORZTILES + av1_tile_set_tg_boundary(&tile_info, cm, tile_row, tile_col); #endif - buf->data = dst + total_size; + buf->data = dst + total_size; - // The last tile does not have a header. - if (!is_last_tile) total_size += 4; + // The last tile does not have a header. + if (!is_last_tile) total_size += 4; -#if CONFIG_EC_ADAPT - // Initialise tile context from the frame context - this_tile->tctx = *cm->fc; - cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx; -#endif + // Initialise tile context from the frame context + this_tile->tctx = *cm->fc; + cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx; #if CONFIG_PVQ - cpi->td.mb.pvq_q = &this_tile->pvq_q; - cpi->td.mb.daala_enc.state.adapt = &this_tile->tctx.pvq_context; + cpi->td.mb.pvq_q = &this_tile->pvq_q; + cpi->td.mb.daala_enc.state.adapt = &this_tile->tctx.pvq_context; #endif // CONFIG_PVQ #if CONFIG_ANS - buf_ans_write_init(buf_ans, dst + total_size); - write_modes(cpi, &tile_info, buf_ans, &tok, tok_end); - assert(tok == tok_end); - aom_buf_ans_flush(buf_ans); - tile_size = buf_ans_write_end(buf_ans); + buf_ans_write_init(buf_ans, dst + total_size); + write_modes(cpi, &tile_info, buf_ans, &tok, tok_end); + assert(tok == tok_end); + aom_buf_ans_flush(buf_ans); + tile_size = buf_ans_write_end(buf_ans); #else aom_start_encode(&mode_bc, dst + total_size); write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end); #if !CONFIG_LV_MAP +#if !CONFIG_PVQ assert(tok == tok_end); +#endif // !CONFIG_PVQ #endif // !CONFIG_LV_MAP aom_stop_encode(&mode_bc); tile_size = mode_bc.pos; #endif // CONFIG_ANS #if CONFIG_PVQ - cpi->td.mb.pvq_q = NULL; + cpi->td.mb.pvq_q = NULL; #endif - assert(tile_size > 0); + assert(tile_size > 0); -#if CONFIG_TILE_GROUPS - curr_tg_data_size += tile_size + 4; -#endif - buf->size = tile_size; + curr_tg_data_size += tile_size + 4; + buf->size = tile_size; - if (!is_last_tile) { - *max_tile_size = AOMMAX(*max_tile_size, tile_size); - // size of this tile - mem_put_le32(buf->data, tile_size); - } + if (!is_last_tile) { + *max_tile_size = AOMMAX(*max_tile_size, tile_size); + // size of this tile + mem_put_le32(buf->data, tile_size); + } - total_size += tile_size; + total_size += tile_size; + } + } + // Write the final tile group size + if (n_log2_tiles) { + aom_wb_overwrite_literal(&tg_params_wb, (1 << n_log2_tiles) - tile_count, + n_log2_tiles); + aom_wb_overwrite_literal(&tg_params_wb, tile_count - 1, n_log2_tiles); + } + // Remux if possible. TODO (Thomas Davies): do this for more than one tile + // group + if (have_tiles && tg_count == 1) { + int data_size = total_size - (uncompressed_hdr_size + comp_hdr_size); + data_size = remux_tiles(cm, dst + uncompressed_hdr_size + comp_hdr_size, + data_size, *max_tile_size, *max_tile_col_size, + &tile_size_bytes, &tile_col_size_bytes); + total_size = data_size + uncompressed_hdr_size + comp_hdr_size; + aom_wb_overwrite_literal(&tile_size_bytes_wb, tile_size_bytes - 1, 2); } - } -#if CONFIG_TILE_GROUPS - // Write the final tile group size - if (n_log2_tiles) { - aom_wb_overwrite_literal(&tg_params_wb, (1 << n_log2_tiles) - tile_count, - n_log2_tiles); - aom_wb_overwrite_literal(&tg_params_wb, tile_count - 1, n_log2_tiles); - } - // Remux if possible. TODO (Thomas Davies): do this for more than one tile - // group - if (have_tiles && tg_count == 1) { - int data_size = total_size - (uncompressed_hdr_size + comp_hdr_size); - data_size = remux_tiles(cm, dst + uncompressed_hdr_size + comp_hdr_size, - data_size, *max_tile_size, *max_tile_col_size, - &tile_size_bytes, &tile_col_size_bytes); - total_size = data_size + uncompressed_hdr_size + comp_hdr_size; - aom_wb_overwrite_literal(&tile_size_bytes_wb, tile_size_bytes - 1, 2); - } -#endif +#if CONFIG_EXT_TILE + } #endif // CONFIG_EXT_TILE return (uint32_t)total_size; } static void write_render_size(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) { - const int scaling_active = - cm->width != cm->render_width || cm->height != cm->render_height; + const int scaling_active = !av1_resize_unscaled(cm); aom_wb_write_bit(wb, scaling_active); if (scaling_active) { aom_wb_write_literal(wb, cm->render_width - 1, 16); @@ -4124,11 +4053,10 @@ static void write_render_size(const AV1_COMMON *cm, static void write_superres_scale(const AV1_COMMON *const cm, struct aom_write_bit_buffer *wb) { // First bit is whether to to scale or not - if (cm->superres_scale_numerator == SUPERRES_SCALE_DENOMINATOR) { + if (cm->superres_scale_numerator == SCALE_DENOMINATOR) { aom_wb_write_bit(wb, 0); // no scaling } else { aom_wb_write_bit(wb, 1); // scaling, write scale factor - // TODO(afergs): write factor to the compressed header instead aom_wb_write_literal( wb, cm->superres_scale_numerator - SUPERRES_SCALE_NUMERATOR_MIN, SUPERRES_SCALE_BITS); @@ -4138,13 +4066,15 @@ static void write_superres_scale(const AV1_COMMON *const cm, static void write_frame_size(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) { - aom_wb_write_literal(wb, cm->width - 1, 16); - aom_wb_write_literal(wb, cm->height - 1, 16); - - write_render_size(cm, wb); #if CONFIG_FRAME_SUPERRES + aom_wb_write_literal(wb, cm->superres_upscaled_width - 1, 16); + aom_wb_write_literal(wb, cm->superres_upscaled_height - 1, 16); write_superres_scale(cm, wb); +#else + aom_wb_write_literal(wb, cm->width - 1, 16); + aom_wb_write_literal(wb, cm->height - 1, 16); #endif // CONFIG_FRAME_SUPERRES + write_render_size(cm, wb); } static void write_frame_size_with_refs(AV1_COMP *cpi, @@ -4157,20 +4087,26 @@ static void write_frame_size_with_refs(AV1_COMP *cpi, YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, ref_frame); if (cfg != NULL) { +#if CONFIG_FRAME_SUPERRES + found = cm->superres_upscaled_width == cfg->y_crop_width && + cm->superres_upscaled_height == cfg->y_crop_height; +#else found = cm->width == cfg->y_crop_width && cm->height == cfg->y_crop_height; +#endif found &= cm->render_width == cfg->render_width && cm->render_height == cfg->render_height; } aom_wb_write_bit(wb, found); if (found) { +#if CONFIG_FRAME_SUPERRES + write_superres_scale(cm, wb); +#endif // CONFIG_FRAME_SUPERRES break; } } - if (!found) { - write_frame_size(cm, wb); - } + if (!found) write_frame_size(cm, wb); } static void write_sync_code(struct aom_write_bit_buffer *wb) { @@ -4196,7 +4132,12 @@ static void write_bitdepth_colorspace_sampling( assert(cm->bit_depth > AOM_BITS_8); aom_wb_write_bit(wb, cm->bit_depth == AOM_BITS_10 ? 0 : 1); } +#if CONFIG_COLORSPACE_HEADERS + aom_wb_write_literal(wb, cm->color_space, 5); + aom_wb_write_literal(wb, cm->transfer_function, 5); +#else aom_wb_write_literal(wb, cm->color_space, 3); +#endif if (cm->color_space != AOM_CS_SRGB) { // 0: [16, 235] (i.e. xvYCC), 1: [0, 255] aom_wb_write_bit(wb, cm->color_range); @@ -4208,6 +4149,11 @@ static void write_bitdepth_colorspace_sampling( } else { assert(cm->subsampling_x == 1 && cm->subsampling_y == 1); } +#if CONFIG_COLORSPACE_HEADERS + if (cm->subsampling_x == 1 && cm->subsampling_y == 1) { + aom_wb_write_literal(wb, cm->chroma_sample_position, 2); + } +#endif } else { assert(cm->profile == PROFILE_1 || cm->profile == PROFILE_3); aom_wb_write_bit(wb, 0); // unused @@ -4215,9 +4161,17 @@ static void write_bitdepth_colorspace_sampling( } #if CONFIG_REFERENCE_BUFFER -void write_sequence_header(SequenceHeader *seq_params) { +void write_sequence_header( +#if CONFIG_EXT_TILE + AV1_COMMON *const cm, +#endif // CONFIG_EXT_TILE + SequenceHeader *seq_params) { /* Placeholder for actually writing to the bitstream */ - seq_params->frame_id_numbers_present_flag = FRAME_ID_NUMBERS_PRESENT_FLAG; + seq_params->frame_id_numbers_present_flag = +#if CONFIG_EXT_TILE + cm->large_scale_tile ? 0 : +#endif // CONFIG_EXT_TILE + FRAME_ID_NUMBERS_PRESENT_FLAG; seq_params->frame_id_length_minus7 = FRAME_ID_LENGTH_MINUS7; seq_params->delta_frame_id_length_minus2 = DELTA_FRAME_ID_LENGTH_MINUS2; } @@ -4236,7 +4190,11 @@ static void write_compound_tools(const AV1_COMMON *cm, } #endif // CONFIG_INTERINTRA #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT +#if CONFIG_COMPOUND_SINGLEREF + if (!frame_is_intra_only(cm)) { +#else // !CONFIG_COMPOUND_SINGLEREF if (!frame_is_intra_only(cm) && cm->reference_mode != SINGLE_REFERENCE) { +#endif // CONFIG_COMPOUND_SINGLEREF aom_wb_write_bit(wb, cm->allow_masked_compound); } else { assert(cm->allow_masked_compound == 0); @@ -4252,13 +4210,21 @@ static void write_uncompressed_header(AV1_COMP *cpi, #if CONFIG_REFERENCE_BUFFER /* TODO: Move outside frame loop or inside key-frame branch */ - write_sequence_header(&cpi->seq_params); + write_sequence_header( +#if CONFIG_EXT_TILE + cm, +#endif // CONFIG_EXT_TILE + &cpi->seq_params); #endif aom_wb_write_literal(wb, AOM_FRAME_MARKER, 2); write_profile(cm->profile, wb); +#if CONFIG_EXT_TILE + aom_wb_write_literal(wb, cm->large_scale_tile, 1); +#endif // CONFIG_EXT_TILE + #if CONFIG_EXT_REFS // NOTE: By default all coded frames to be used as a reference cm->is_reference_frame = 1; @@ -4309,11 +4275,6 @@ static void write_uncompressed_header(AV1_COMP *cpi, } #endif -#if CONFIG_FRAME_SUPERRES - // TODO(afergs): Remove - this is just to stop superres from breaking - cm->superres_scale_numerator = SUPERRES_SCALE_DENOMINATOR; -#endif // CONFIG_FRAME_SUPERRES - if (cm->frame_type == KEY_FRAME) { write_sync_code(wb); write_bitdepth_colorspace_sampling(cm, wb); @@ -4447,12 +4408,6 @@ static void write_uncompressed_header(AV1_COMP *cpi, #endif // CONFIG_EXT_PARTITION encode_loopfilter(cm, wb); -#if CONFIG_CDEF - encode_cdef(cm, wb); -#endif -#if CONFIG_LOOP_RESTORATION - encode_restoration_mode(cm, wb); -#endif // CONFIG_LOOP_RESTORATION encode_quantization(cm, wb); encode_segmentation(cm, xd, wb); #if CONFIG_DELTA_Q @@ -4485,8 +4440,15 @@ static void write_uncompressed_header(AV1_COMP *cpi, } } #endif - - write_tx_mode(cm, xd, &cm->tx_mode, wb); +#if CONFIG_CDEF + if (!cm->all_lossless) { + encode_cdef(cm, wb); + } +#endif +#if CONFIG_LOOP_RESTORATION + encode_restoration_mode(cm, wb); +#endif // CONFIG_LOOP_RESTORATION + write_tx_mode(cm, &cm->tx_mode, wb); if (cpi->allow_comp_inter_inter) { const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT; @@ -4513,13 +4475,13 @@ static void write_uncompressed_header(AV1_COMP *cpi, #if CONFIG_GLOBAL_MOTION static void write_global_motion_params(WarpedMotionParams *params, WarpedMotionParams *ref_params, - aom_prob *probs, aom_writer *w, - int allow_hp) { + aom_writer *w, int allow_hp) { TransformationType type = params->wmtype; int trans_bits; int trans_prec_diff; - av1_write_token(w, av1_global_motion_types_tree, probs, - &global_motion_types_encodings[type]); + aom_write_bit(w, type != IDENTITY); + if (type != IDENTITY) aom_write_literal(w, type - 1, GLOBAL_TYPE_BITS); + switch (type) { case HOMOGRAPHY: case HORTRAPEZOID: @@ -4584,10 +4546,18 @@ static void write_global_motion_params(WarpedMotionParams *params, static void write_global_motion(AV1_COMP *cpi, aom_writer *w) { AV1_COMMON *const cm = &cpi->common; int frame; + YV12_BUFFER_CONFIG *ref_buf; for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) { - write_global_motion_params( - &cm->global_motion[frame], &cm->prev_frame->global_motion[frame], - cm->fc->global_motion_types_prob, w, cm->allow_high_precision_mv); + ref_buf = get_ref_frame_buffer(cpi, frame); + if (cpi->source->y_crop_width == ref_buf->y_crop_width && + cpi->source->y_crop_height == ref_buf->y_crop_height) { + write_global_motion_params(&cm->global_motion[frame], + &cm->prev_frame->global_motion[frame], w, + cm->allow_high_precision_mv); + } else { + assert(cm->global_motion[frame].wmtype == IDENTITY && + "Invalid warp type for frames of different resolutions"); + } /* printf("Frame %d/%d: Enc Ref %d (used %d): %d %d %d %d\n", cm->current_video_frame, cm->show_frame, frame, @@ -4605,15 +4575,17 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) { MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; #endif // CONFIG_SUPERTX FRAME_CONTEXT *const fc = cm->fc; - FRAME_COUNTS *counts = cpi->td.counts; aom_writer *header_bc; - int i, j; + int i; +#if !CONFIG_NEW_MULTISYMBOL + FRAME_COUNTS *counts = cpi->td.counts; + int j; +#endif -#if CONFIG_TILE_GROUPS const int probwt = cm->num_tg; -#else - const int probwt = 1; -#endif + (void)probwt; + (void)i; + (void)fc; #if CONFIG_ANS int header_size; @@ -4628,96 +4600,26 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) { #if CONFIG_LOOP_RESTORATION encode_restoration(cm, header_bc); #endif // CONFIG_LOOP_RESTORATION -#if !CONFIG_EC_ADAPT - update_txfm_probs(cm, header_bc, counts); -#endif -#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX) if (cm->tx_mode == TX_MODE_SELECT) av1_cond_prob_diff_update(header_bc, &cm->fc->quarter_tx_size_prob, cm->counts.quarter_tx_size, probwt); -#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#endif #if CONFIG_LV_MAP av1_write_txb_probs(cpi, header_bc); -#else -#if !CONFIG_PVQ -#if !CONFIG_EC_ADAPT - update_coef_probs(cpi, header_bc); -#endif // !CONFIG_EC_ADAPT -#endif // CONFIG_PVQ #endif // CONFIG_LV_MAP -#if CONFIG_VAR_TX +#if CONFIG_VAR_TX && !CONFIG_NEW_MULTISYMBOL update_txfm_partition_probs(cm, header_bc, counts, probwt); #endif +#if !CONFIG_NEW_MULTISYMBOL update_skip_probs(cm, header_bc, counts); -#if !CONFIG_EC_ADAPT && CONFIG_DELTA_Q - update_delta_q_probs(cm, header_bc, counts); -#if CONFIG_EXT_DELTA_Q - update_delta_lf_probs(cm, header_bc, counts); -#endif -#endif -#if !CONFIG_EC_ADAPT - update_seg_probs(cpi, header_bc); - - for (i = 0; i < INTRA_MODES; ++i) { - prob_diff_update(av1_intra_mode_tree, fc->uv_mode_prob[i], - counts->uv_mode[i], INTRA_MODES, probwt, header_bc); - } - -#if CONFIG_EXT_PARTITION_TYPES - for (i = 0; i < PARTITION_PLOFFSET; ++i) - prob_diff_update(av1_partition_tree, fc->partition_prob[i], - counts->partition[i], PARTITION_TYPES, probwt, header_bc); - for (; i < PARTITION_CONTEXTS_PRIMARY; ++i) - prob_diff_update(av1_ext_partition_tree, fc->partition_prob[i], - counts->partition[i], EXT_PARTITION_TYPES, probwt, - header_bc); -#else - for (i = 0; i < PARTITION_CONTEXTS_PRIMARY; ++i) - prob_diff_update(av1_partition_tree, fc->partition_prob[i], - counts->partition[i], PARTITION_TYPES, probwt, header_bc); -#endif // CONFIG_EXT_PARTITION_TYPES -#if CONFIG_UNPOISON_PARTITION_CTX - for (; i < PARTITION_CONTEXTS_PRIMARY + PARTITION_BLOCK_SIZES; ++i) { - unsigned int ct[2] = { counts->partition[i][PARTITION_VERT], - counts->partition[i][PARTITION_SPLIT] }; - assert(counts->partition[i][PARTITION_NONE] == 0); - assert(counts->partition[i][PARTITION_HORZ] == 0); - assert(fc->partition_prob[i][PARTITION_NONE] == 0); - assert(fc->partition_prob[i][PARTITION_HORZ] == 0); - av1_cond_prob_diff_update(header_bc, &fc->partition_prob[i][PARTITION_VERT], - ct, probwt); - } - for (; i < PARTITION_CONTEXTS_PRIMARY + 2 * PARTITION_BLOCK_SIZES; ++i) { - unsigned int ct[2] = { counts->partition[i][PARTITION_HORZ], - counts->partition[i][PARTITION_SPLIT] }; - assert(counts->partition[i][PARTITION_NONE] == 0); - assert(counts->partition[i][PARTITION_VERT] == 0); - assert(fc->partition_prob[i][PARTITION_NONE] == 0); - assert(fc->partition_prob[i][PARTITION_VERT] == 0); - av1_cond_prob_diff_update(header_bc, &fc->partition_prob[i][PARTITION_HORZ], - ct, probwt); - } #endif -#if CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP - for (i = 0; i < INTRA_FILTERS + 1; ++i) - prob_diff_update(av1_intra_filter_tree, fc->intra_filter_probs[i], - counts->intra_filter[i], INTRA_FILTERS, probwt, header_bc); -#endif // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP -#endif // !CONFIG_EC_ADAPT if (frame_is_intra_only(cm)) { - av1_copy(cm->kf_y_prob, av1_kf_y_mode_prob); av1_copy(cm->fc->kf_y_cdf, av1_kf_y_mode_cdf); -#if !CONFIG_EC_ADAPT - for (i = 0; i < INTRA_MODES; ++i) - for (j = 0; j < INTRA_MODES; ++j) - prob_diff_update(av1_intra_mode_tree, cm->kf_y_prob[i][j], - counts->kf_y_mode[i][j], INTRA_MODES, probwt, - header_bc); -#endif // CONFIG_EC_ADAPT #if CONFIG_INTRABC if (cm->allow_screen_content_tools) { av1_cond_prob_diff_update(header_bc, &fc->intrabc_prob, @@ -4725,56 +4627,54 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) { } #endif } else { +#if !CONFIG_NEW_MULTISYMBOL update_inter_mode_probs(cm, header_bc, counts); +#endif #if CONFIG_EXT_INTER - update_inter_compound_mode_probs(cm, probwt, header_bc); #if CONFIG_INTERINTRA if (cm->reference_mode != COMPOUND_REFERENCE && cm->allow_interintra_compound) { +#if !CONFIG_NEW_MULTISYMBOL for (i = 0; i < BLOCK_SIZE_GROUPS; i++) { if (is_interintra_allowed_bsize_group(i)) { av1_cond_prob_diff_update(header_bc, &fc->interintra_prob[i], cm->counts.interintra[i], probwt); } } - for (i = 0; i < BLOCK_SIZE_GROUPS; i++) { - prob_diff_update( - av1_interintra_mode_tree, cm->fc->interintra_mode_prob[i], - counts->interintra_mode[i], INTERINTRA_MODES, probwt, header_bc); - } -#if CONFIG_WEDGE - for (i = 0; i < BLOCK_SIZES; i++) { +#endif +#if CONFIG_WEDGE && !CONFIG_NEW_MULTISYMBOL +#if CONFIG_EXT_PARTITION_TYPES + int block_sizes_to_update = BLOCK_SIZES_ALL; +#else + int block_sizes_to_update = BLOCK_SIZES; +#endif + for (i = 0; i < block_sizes_to_update; i++) { if (is_interintra_allowed_bsize(i) && is_interintra_wedge_used(i)) av1_cond_prob_diff_update(header_bc, &fc->wedge_interintra_prob[i], cm->counts.wedge_interintra[i], probwt); } -#endif // CONFIG_WEDGE +#endif // CONFIG_WEDGE && CONFIG_NEW_MULTISYMBOL } #endif // CONFIG_INTERINTRA -#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE - if (cm->reference_mode != SINGLE_REFERENCE && cm->allow_masked_compound) { - for (i = 0; i < BLOCK_SIZES; i++) - prob_diff_update(av1_compound_type_tree, fc->compound_type_prob[i], - cm->counts.compound_interinter[i], COMPOUND_TYPES, - probwt, header_bc); - } -#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE #endif // CONFIG_EXT_INTER #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION - for (i = BLOCK_8X8; i < BLOCK_SIZES; ++i) - prob_diff_update(av1_motion_mode_tree, fc->motion_mode_prob[i], - counts->motion_mode[i], MOTION_MODES, probwt, header_bc); -#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION -#if !CONFIG_EC_ADAPT - if (cm->interp_filter == SWITCHABLE) - update_switchable_interp_probs(cm, header_bc, counts); +#if CONFIG_NCOBMC_ADAPT_WEIGHT + for (i = ADAPT_OVERLAP_BLOCK_8X8; i < ADAPT_OVERLAP_BLOCKS; ++i) { + prob_diff_update(av1_ncobmc_mode_tree, fc->ncobmc_mode_prob[i], + counts->ncobmc_mode[i], MAX_NCOBMC_MODES, probwt, + header_bc); + } #endif +#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION +#if !CONFIG_NEW_MULTISYMBOL for (i = 0; i < INTRA_INTER_CONTEXTS; i++) av1_cond_prob_diff_update(header_bc, &fc->intra_inter_prob[i], counts->intra_inter[i], probwt); +#endif +#if !CONFIG_NEW_MULTISYMBOL if (cpi->allow_comp_inter_inter) { const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT; if (use_hybrid_pred) @@ -4791,7 +4691,19 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) { } } } + if (cm->reference_mode != SINGLE_REFERENCE) { +#if CONFIG_EXT_COMP_REFS + for (i = 0; i < COMP_REF_TYPE_CONTEXTS; i++) + av1_cond_prob_diff_update(header_bc, &fc->comp_ref_type_prob[i], + counts->comp_ref_type[i], probwt); + + for (i = 0; i < UNI_COMP_REF_CONTEXTS; i++) + for (j = 0; j < (UNIDIR_COMP_REFS - 1); j++) + av1_cond_prob_diff_update(header_bc, &fc->uni_comp_ref_prob[i][j], + counts->uni_comp_ref[i][j], probwt); +#endif // CONFIG_EXT_COMP_REFS + for (i = 0; i < REF_CONTEXTS; i++) { #if CONFIG_EXT_REFS for (j = 0; j < (FWD_REFS - 1); j++) { @@ -4810,17 +4722,16 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) { #endif // CONFIG_EXT_REFS } } +#endif // CONFIG_NEW_MULTISYMBOL -#if !CONFIG_EC_ADAPT - for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) { - prob_diff_update(av1_intra_mode_tree, cm->fc->y_mode_prob[i], - counts->y_mode[i], INTRA_MODES, probwt, header_bc); - } -#endif +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + for (i = 0; i < COMP_INTER_MODE_CONTEXTS; i++) + av1_cond_prob_diff_update(header_bc, &fc->comp_inter_mode_prob[i], + counts->comp_inter_mode[i], probwt); +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF +#if !CONFIG_NEW_MULTISYMBOL av1_write_nmv_probs(cm, cm->allow_high_precision_mv, header_bc, counts->mv); -#if !CONFIG_EC_ADAPT - update_ext_tx_probs(cm, header_bc); #endif #if CONFIG_SUPERTX if (!xd->lossless[0]) update_supertx_probs(cm, probwt, header_bc); @@ -4829,12 +4740,6 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) { write_global_motion(cpi, header_bc); #endif // CONFIG_GLOBAL_MOTION } -#if !CONFIG_EC_ADAPT - av1_coef_head_cdfs(fc); - av1_coef_pareto_cdfs(fc); - for (i = 0; i < NMV_CONTEXTS; ++i) av1_set_mv_cdfs(&fc->nmvc[i]); - av1_set_mode_cdfs(cm); -#endif // !CONFIG_EC_ADAPT #if CONFIG_ANS aom_buf_ans_flush(header_bc); header_size = buf_ans_write_end(header_bc); @@ -4881,16 +4786,23 @@ static int remux_tiles(const AV1_COMMON *const cm, uint8_t *dst, const uint32_t max_tile_col_size, int *const tile_size_bytes, int *const tile_col_size_bytes) { -// Choose the tile size bytes (tsb) and tile column size bytes (tcsb) + // Choose the tile size bytes (tsb) and tile column size bytes (tcsb) + int tsb; + int tcsb; + #if CONFIG_EXT_TILE - // The top bit in the tile size field indicates tile copy mode, so we - // have 1 less bit to code the tile size - const int tsb = choose_size_bytes(max_tile_size, 1); - const int tcsb = choose_size_bytes(max_tile_col_size, 0); -#else - const int tsb = choose_size_bytes(max_tile_size, 0); - const int tcsb = 4; // This is ignored - (void)max_tile_col_size; + if (cm->large_scale_tile) { + // The top bit in the tile size field indicates tile copy mode, so we + // have 1 less bit to code the tile size + tsb = choose_size_bytes(max_tile_size, 1); + tcsb = choose_size_bytes(max_tile_col_size, 0); + } else { +#endif // CONFIG_EXT_TILE + tsb = choose_size_bytes(max_tile_size, 0); + tcsb = 4; // This is ignored + (void)max_tile_col_size; +#if CONFIG_EXT_TILE + } #endif // CONFIG_EXT_TILE assert(tsb > 0); @@ -4906,64 +4818,68 @@ static int remux_tiles(const AV1_COMMON *const cm, uint8_t *dst, uint32_t rpos = 0; #if CONFIG_EXT_TILE - int tile_row; - int tile_col; - - for (tile_col = 0; tile_col < cm->tile_cols; tile_col++) { - // All but the last column has a column header - if (tile_col < cm->tile_cols - 1) { - uint32_t tile_col_size = mem_get_le32(dst + rpos); - rpos += 4; + if (cm->large_scale_tile) { + int tile_row; + int tile_col; + + for (tile_col = 0; tile_col < cm->tile_cols; tile_col++) { + // All but the last column has a column header + if (tile_col < cm->tile_cols - 1) { + uint32_t tile_col_size = mem_get_le32(dst + rpos); + rpos += 4; + + // Adjust the tile column size by the number of bytes removed + // from the tile size fields. + tile_col_size -= (4 - tsb) * cm->tile_rows; + + mem_put_varsize(dst + wpos, tcsb, tile_col_size); + wpos += tcsb; + } - // Adjust the tile column size by the number of bytes removed - // from the tile size fields. - tile_col_size -= (4 - tsb) * cm->tile_rows; + for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) { + // All, including the last row has a header + uint32_t tile_header = mem_get_le32(dst + rpos); + rpos += 4; + + // If this is a copy tile, we need to shift the MSB to the + // top bit of the new width, and there is no data to copy. + if (tile_header >> 31 != 0) { + if (tsb < 4) tile_header >>= 32 - 8 * tsb; + mem_put_varsize(dst + wpos, tsb, tile_header); + wpos += tsb; + } else { + mem_put_varsize(dst + wpos, tsb, tile_header); + wpos += tsb; - mem_put_varsize(dst + wpos, tcsb, tile_col_size); - wpos += tcsb; + memmove(dst + wpos, dst + rpos, tile_header); + rpos += tile_header; + wpos += tile_header; + } + } } + } else { +#endif // CONFIG_EXT_TILE + const int n_tiles = cm->tile_cols * cm->tile_rows; + int n; - for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) { - // All, including the last row has a header - uint32_t tile_header = mem_get_le32(dst + rpos); - rpos += 4; + for (n = 0; n < n_tiles; n++) { + int tile_size; - // If this is a copy tile, we need to shift the MSB to the - // top bit of the new width, and there is no data to copy. - if (tile_header >> 31 != 0) { - if (tsb < 4) tile_header >>= 32 - 8 * tsb; - mem_put_varsize(dst + wpos, tsb, tile_header); - wpos += tsb; + if (n == n_tiles - 1) { + tile_size = data_size - rpos; } else { - mem_put_varsize(dst + wpos, tsb, tile_header); + tile_size = mem_get_le32(dst + rpos); + rpos += 4; + mem_put_varsize(dst + wpos, tsb, tile_size); wpos += tsb; - - memmove(dst + wpos, dst + rpos, tile_header); - rpos += tile_header; - wpos += tile_header; } - } - } -#else - const int n_tiles = cm->tile_cols * cm->tile_rows; - int n; - for (n = 0; n < n_tiles; n++) { - int tile_size; + memmove(dst + wpos, dst + rpos, tile_size); - if (n == n_tiles - 1) { - tile_size = data_size - rpos; - } else { - tile_size = mem_get_le32(dst + rpos); - rpos += 4; - mem_put_varsize(dst + wpos, tsb, tile_size); - wpos += tsb; + rpos += tile_size; + wpos += tile_size; } - - memmove(dst + wpos, dst + rpos, tile_size); - - rpos += tile_size; - wpos += tile_size; +#if CONFIG_EXT_TILE } #endif // CONFIG_EXT_TILE @@ -4976,14 +4892,17 @@ static int remux_tiles(const AV1_COMMON *const cm, uint8_t *dst, void av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dst, size_t *size) { uint8_t *data = dst; -#if !CONFIG_TILE_GROUPS - uint32_t compressed_header_size; + uint32_t data_size; +#if CONFIG_EXT_TILE + AV1_COMMON *const cm = &cpi->common; + uint32_t compressed_header_size = 0; uint32_t uncompressed_header_size; struct aom_write_bit_buffer saved_wb; -#endif - uint32_t data_size; struct aom_write_bit_buffer wb = { data, 0 }; - + const int have_tiles = cm->tile_cols * cm->tile_rows > 1; + int tile_size_bytes; + int tile_col_size_bytes; +#endif // CONFIG_EXT_TILE unsigned int max_tile_size; unsigned int max_tile_col_size; @@ -4991,76 +4910,77 @@ void av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dst, size_t *size) { bitstream_queue_reset_write(); #endif -#if !CONFIG_TILE_GROUPS - int tile_size_bytes; - int tile_col_size_bytes; - AV1_COMMON *const cm = &cpi->common; - const int have_tiles = cm->tile_cols * cm->tile_rows > 1; - - // Write the uncompressed header - write_uncompressed_header(cpi, &wb); +#if CONFIG_EXT_TILE + if (cm->large_scale_tile) { + // Write the uncompressed header + write_uncompressed_header(cpi, &wb); #if CONFIG_EXT_REFS - if (cm->show_existing_frame) { - *size = aom_wb_bytes_written(&wb); - return; - } + if (cm->show_existing_frame) { + *size = aom_wb_bytes_written(&wb); + return; + } #endif // CONFIG_EXT_REFS - // We do not know these in advance. Output placeholder bit. - saved_wb = wb; - // Write tile size magnitudes - if (have_tiles) { -// Note that the last item in the uncompressed header is the data -// describing tile configuration. -#if CONFIG_EXT_TILE - // Number of bytes in tile column size - 1 - aom_wb_write_literal(&wb, 0, 2); -#endif // CONFIG_EXT_TILE - // Number of bytes in tile size - 1 - aom_wb_write_literal(&wb, 0, 2); - } - // Size of compressed header - aom_wb_write_literal(&wb, 0, 16); + // We do not know these in advance. Output placeholder bit. + saved_wb = wb; + // Write tile size magnitudes + if (have_tiles) { + // Note that the last item in the uncompressed header is the data + // describing tile configuration. + // Number of bytes in tile column size - 1 + aom_wb_write_literal(&wb, 0, 2); + + // Number of bytes in tile size - 1 + aom_wb_write_literal(&wb, 0, 2); + } + // Size of compressed header + aom_wb_write_literal(&wb, 0, 16); - uncompressed_header_size = (uint32_t)aom_wb_bytes_written(&wb); - data += uncompressed_header_size; + uncompressed_header_size = (uint32_t)aom_wb_bytes_written(&wb); + data += uncompressed_header_size; - aom_clear_system_state(); + aom_clear_system_state(); - // Write the compressed header - compressed_header_size = write_compressed_header(cpi, data); - data += compressed_header_size; + // Write the compressed header + compressed_header_size = write_compressed_header(cpi, data); + data += compressed_header_size; - // Write the encoded tile data - data_size = write_tiles(cpi, data, &max_tile_size, &max_tile_col_size); -#else - data_size = write_tiles(cpi, &wb, &max_tile_size, &max_tile_col_size); -#endif -#if !CONFIG_TILE_GROUPS - if (have_tiles) { - data_size = - remux_tiles(cm, data, data_size, max_tile_size, max_tile_col_size, - &tile_size_bytes, &tile_col_size_bytes); + // Write the encoded tile data + data_size = write_tiles(cpi, data, &max_tile_size, &max_tile_col_size); + } else { +#endif // CONFIG_EXT_TILE + data_size = write_tiles(cpi, data, &max_tile_size, &max_tile_col_size); +#if CONFIG_EXT_TILE } +#endif // CONFIG_EXT_TILE +#if CONFIG_EXT_TILE + if (cm->large_scale_tile) { + if (have_tiles) { + data_size = + remux_tiles(cm, data, data_size, max_tile_size, max_tile_col_size, + &tile_size_bytes, &tile_col_size_bytes); + } - data += data_size; + data += data_size; - // Now fill in the gaps in the uncompressed header. - if (have_tiles) { -#if CONFIG_EXT_TILE - assert(tile_col_size_bytes >= 1 && tile_col_size_bytes <= 4); - aom_wb_write_literal(&saved_wb, tile_col_size_bytes - 1, 2); + // Now fill in the gaps in the uncompressed header. + if (have_tiles) { + assert(tile_col_size_bytes >= 1 && tile_col_size_bytes <= 4); + aom_wb_write_literal(&saved_wb, tile_col_size_bytes - 1, 2); + + assert(tile_size_bytes >= 1 && tile_size_bytes <= 4); + aom_wb_write_literal(&saved_wb, tile_size_bytes - 1, 2); + } + // TODO(jbb): Figure out what to do if compressed_header_size > 16 bits. + assert(compressed_header_size <= 0xffff); + aom_wb_write_literal(&saved_wb, compressed_header_size, 16); + } else { #endif // CONFIG_EXT_TILE - assert(tile_size_bytes >= 1 && tile_size_bytes <= 4); - aom_wb_write_literal(&saved_wb, tile_size_bytes - 1, 2); + data += data_size; +#if CONFIG_EXT_TILE } - // TODO(jbb): Figure out what to do if compressed_header_size > 16 bits. - assert(compressed_header_size <= 0xffff); - aom_wb_write_literal(&saved_wb, compressed_header_size, 16); -#else - data += data_size; -#endif +#endif // CONFIG_EXT_TILE #if CONFIG_ANS && ANS_REVERSE // Avoid aliasing the superframe index *data++ = 0; diff --git a/third_party/aom/av1/encoder/bitstream.h b/third_party/aom/av1/encoder/bitstream.h index c75d80891..29c930356 100644 --- a/third_party/aom/av1/encoder/bitstream.h +++ b/third_party/aom/av1/encoder/bitstream.h @@ -19,7 +19,11 @@ extern "C" { #include "av1/encoder/encoder.h" #if CONFIG_REFERENCE_BUFFER -void write_sequence_header(SequenceHeader *seq_params); +void write_sequence_header( +#if CONFIG_EXT_TILE + AV1_COMMON *const cm, +#endif // CONFIG_EXT_TILE + SequenceHeader *seq_params); #endif void av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dest, size_t *size); @@ -42,7 +46,8 @@ void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd, const int supertx_enabled, #endif #if CONFIG_TXK_SEL - int block, int plane, + int blk_row, int blk_col, int block, int plane, + TX_SIZE tx_size, #endif aom_writer *w); diff --git a/third_party/aom/av1/encoder/block.h b/third_party/aom/av1/encoder/block.h index e16479e64..7b6eb0b0e 100644 --- a/third_party/aom/av1/encoder/block.h +++ b/third_party/aom/av1/encoder/block.h @@ -116,7 +116,6 @@ struct macroblock { // The equivalend SAD error of one (whole) bit at the current quantizer // for sub-8x8 blocks. int sadperbit4; - int rddiv; int rdmult; int mb_energy; int *m_search_count_ptr; @@ -206,16 +205,15 @@ struct macroblock { int pvq_speed; int pvq_coded; // Indicates whether pvq_info needs be stored to tokenize #endif -#if CONFIG_DAALA_DIST - // Keep rate of each 4x4 block in the current macroblock during RDO - // This is needed when using the 8x8 Daala distortion metric during RDO, - // because it evaluates distortion in a different order than the underlying - // 4x4 blocks are coded. - int rate_4x4[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; +#if CONFIG_DIST_8X8 #if CONFIG_CB4X4 +#if CONFIG_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, decoded_8x8[8 * 8]); +#else DECLARE_ALIGNED(16, uint8_t, decoded_8x8[8 * 8]); +#endif #endif // CONFIG_CB4X4 -#endif // CONFIG_DAALA_DIST +#endif // CONFIG_DIST_8X8 #if CONFIG_CFL // Whether luma needs to be stored during RDO. int cfl_store_y; diff --git a/third_party/aom/av1/encoder/context_tree.c b/third_party/aom/av1/encoder/context_tree.c index 4c7d6ff00..b1c01b28e 100644 --- a/third_party/aom/av1/encoder/context_tree.c +++ b/third_party/aom/av1/encoder/context_tree.c @@ -65,12 +65,10 @@ static void alloc_mode_context(AV1_COMMON *cm, int num_4x4_blk, } #if CONFIG_PALETTE - if (cm->allow_screen_content_tools) { - for (i = 0; i < 2; ++i) { - CHECK_MEM_ERROR( - cm, ctx->color_index_map[i], - aom_memalign(32, num_pix * sizeof(*ctx->color_index_map[i]))); - } + for (i = 0; i < 2; ++i) { + CHECK_MEM_ERROR( + cm, ctx->color_index_map[i], + aom_memalign(32, num_pix * sizeof(*ctx->color_index_map[i]))); } #endif // CONFIG_PALETTE } @@ -141,7 +139,13 @@ static void alloc_tree_contexts(AV1_COMMON *cm, PC_TREE *tree, &tree->verticalb[1]); alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_VERT_B, &tree->verticalb[2]); -#ifdef CONFIG_SUPERTX + for (int i = 0; i < 4; ++i) { + alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_4, + &tree->horizontal4[i]); + alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_4, + &tree->vertical4[i]); + } +#if CONFIG_SUPERTX alloc_mode_context(cm, num_4x4_blk, PARTITION_HORZ, &tree->horizontal_supertx); alloc_mode_context(cm, num_4x4_blk, PARTITION_VERT, &tree->vertical_supertx); @@ -159,7 +163,7 @@ static void alloc_tree_contexts(AV1_COMMON *cm, PC_TREE *tree, alloc_mode_context(cm, num_4x4_blk, &tree->none); alloc_mode_context(cm, num_4x4_blk / 2, &tree->horizontal[0]); alloc_mode_context(cm, num_4x4_blk / 2, &tree->vertical[0]); -#ifdef CONFIG_SUPERTX +#if CONFIG_SUPERTX alloc_mode_context(cm, num_4x4_blk, &tree->horizontal_supertx); alloc_mode_context(cm, num_4x4_blk, &tree->vertical_supertx); alloc_mode_context(cm, num_4x4_blk, &tree->split_supertx); @@ -184,13 +188,17 @@ static void free_tree_contexts(PC_TREE *tree) { free_mode_context(&tree->verticala[i]); free_mode_context(&tree->verticalb[i]); } + for (i = 0; i < 4; ++i) { + free_mode_context(&tree->horizontal4[i]); + free_mode_context(&tree->vertical4[i]); + } #endif // CONFIG_EXT_PARTITION_TYPES free_mode_context(&tree->none); free_mode_context(&tree->horizontal[0]); free_mode_context(&tree->horizontal[1]); free_mode_context(&tree->vertical[0]); free_mode_context(&tree->vertical[1]); -#ifdef CONFIG_SUPERTX +#if CONFIG_SUPERTX free_mode_context(&tree->horizontal_supertx); free_mode_context(&tree->vertical_supertx); free_mode_context(&tree->split_supertx); diff --git a/third_party/aom/av1/encoder/context_tree.h b/third_party/aom/av1/encoder/context_tree.h index 4f9d5e374..bcfcc274a 100644 --- a/third_party/aom/av1/encoder/context_tree.h +++ b/third_party/aom/av1/encoder/context_tree.h @@ -81,12 +81,14 @@ typedef struct PC_TREE { PICK_MODE_CONTEXT horizontalb[3]; PICK_MODE_CONTEXT verticala[3]; PICK_MODE_CONTEXT verticalb[3]; + PICK_MODE_CONTEXT horizontal4[4]; + PICK_MODE_CONTEXT vertical4[4]; #endif union { struct PC_TREE *split[4]; PICK_MODE_CONTEXT *leaf_split[4]; }; -#ifdef CONFIG_SUPERTX +#if CONFIG_SUPERTX PICK_MODE_CONTEXT horizontal_supertx; PICK_MODE_CONTEXT vertical_supertx; PICK_MODE_CONTEXT split_supertx; diff --git a/third_party/aom/av1/encoder/cost.c b/third_party/aom/av1/encoder/cost.c index e3151a597..e33df53e4 100644 --- a/third_party/aom/av1/encoder/cost.c +++ b/third_party/aom/av1/encoder/cost.c @@ -65,3 +65,21 @@ void av1_cost_tokens_skip(int *costs, const aom_prob *probs, aom_tree tree) { costs[-tree[0]] = av1_cost_bit(probs[0], 0); cost(costs, tree, probs, 2, 0); } + +void av1_cost_tokens_from_cdf(int *costs, const aom_cdf_prob *cdf, + const int *inv_map) { + int i; + aom_cdf_prob prev_cdf = 0; + for (i = 0;; ++i) { + const aom_cdf_prob p15 = AOM_ICDF(cdf[i]) - prev_cdf; + prev_cdf = AOM_ICDF(cdf[i]); + + if (inv_map) + costs[inv_map[i]] = av1_cost_symbol(p15); + else + costs[i] = av1_cost_symbol(p15); + + // Stop once we reach the end of the CDF + if (cdf[i] == AOM_ICDF(CDF_PROB_TOP)) break; + } +} diff --git a/third_party/aom/av1/encoder/cost.h b/third_party/aom/av1/encoder/cost.h index d8fb357e6..e60632005 100644 --- a/third_party/aom/av1/encoder/cost.h +++ b/third_party/aom/av1/encoder/cost.h @@ -34,6 +34,14 @@ extern const uint16_t av1_prob_cost[256]; // for each bit. #define av1_cost_literal(n) ((n) * (1 << AV1_PROB_COST_SHIFT)) +// Calculate the cost of a symbol with probability p15 / 2^15 +static INLINE int av1_cost_symbol(aom_cdf_prob p15) { + assert(0 < p15 && p15 < CDF_PROB_TOP); + const int shift = CDF_PROB_BITS - 1 - get_msb(p15); + return av1_cost_zero(get_prob(p15 << shift, CDF_PROB_TOP)) + + av1_cost_literal(shift); +} + static INLINE unsigned int cost_branch256(const unsigned int ct[2], aom_prob p) { return ct[0] * av1_cost_zero(p) + ct[1] * av1_cost_one(p); @@ -55,6 +63,8 @@ static INLINE int treed_cost(aom_tree tree, const aom_prob *probs, int bits, void av1_cost_tokens(int *costs, const aom_prob *probs, aom_tree tree); void av1_cost_tokens_skip(int *costs, const aom_prob *probs, aom_tree tree); +void av1_cost_tokens_from_cdf(int *costs, const aom_cdf_prob *cdf, + const int *inv_map); #ifdef __cplusplus } // extern "C" diff --git a/third_party/aom/av1/encoder/dct.c b/third_party/aom/av1/encoder/dct.c index f6b64f0f7..850b84ca9 100644 --- a/third_party/aom/av1/encoder/dct.c +++ b/third_party/aom/av1/encoder/dct.c @@ -21,6 +21,9 @@ #include "av1/common/av1_fwd_txfm1d.h" #include "av1/common/av1_fwd_txfm1d_cfg.h" #include "av1/common/idct.h" +#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 +#include "av1/common/daala_tx.h" +#endif static INLINE void range_check(const tran_low_t *input, const int size, const int bit) { @@ -39,6 +42,18 @@ static INLINE void range_check(const tran_low_t *input, const int size, #endif } +#if CONFIG_DAALA_DCT4 +static void fdct4(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[4]; + od_coeff y[4]; + for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i]; + od_bin_fdct4(y, x, 1); + for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i]; +} + +#else + static void fdct4(const tran_low_t *input, tran_low_t *output) { tran_high_t temp; tran_low_t step[4]; @@ -74,6 +89,19 @@ static void fdct4(const tran_low_t *input, tran_low_t *output) { range_check(output, 4, 16); } +#endif + +#if CONFIG_DAALA_DCT8 +static void fdct8(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[8]; + od_coeff y[8]; + for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i]; + od_bin_fdct8(y, x, 1); + for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i]; +} + +#else static void fdct8(const tran_low_t *input, tran_low_t *output) { tran_high_t temp; @@ -152,6 +180,7 @@ static void fdct8(const tran_low_t *input, tran_low_t *output) { range_check(output, 8, 16); } +#endif static void fdct16(const tran_low_t *input, tran_low_t *output) { tran_high_t temp; @@ -767,6 +796,18 @@ static void fadst4(const tran_low_t *input, tran_low_t *output) { output[3] = (tran_low_t)fdct_round_shift(s3); } +#if CONFIG_DAALA_DCT8 +static void fadst8(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[8]; + od_coeff y[8]; + for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i]; + od_bin_fdst8(y, x, 1); + for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i]; +} + +#else + static void fadst8(const tran_low_t *input, tran_low_t *output) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; @@ -837,6 +878,7 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) { output[6] = (tran_low_t)x5; output[7] = (tran_low_t)-x1; } +#endif static void fadst16(const tran_low_t *input, tran_low_t *output) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; @@ -1021,6 +1063,83 @@ static void fhalfright32(const tran_low_t *input, tran_low_t *output) { // Note overall scaling factor is 4 times orthogonal } +#if CONFIG_MRC_TX +static void get_masked_residual32(const int16_t **input, int *input_stride, + const uint8_t *pred, int pred_stride, + int16_t *masked_input) { + int mrc_mask[32 * 32]; + get_mrc_mask(pred, pred_stride, mrc_mask, 32, 32, 32); + int32_t sum = 0; + int16_t avg; + // Get the masked average of the prediction + for (int i = 0; i < 32; ++i) { + for (int j = 0; j < 32; ++j) { + sum += mrc_mask[i * 32 + j] * (*input)[i * (*input_stride) + j]; + } + } + avg = ROUND_POWER_OF_TWO_SIGNED(sum, 10); + // Replace all of the unmasked pixels in the prediction with the average + // of the masked pixels + for (int i = 0; i < 32; ++i) { + for (int j = 0; j < 32; ++j) + masked_input[i * 32 + j] = + (mrc_mask[i * 32 + j]) ? (*input)[i * (*input_stride) + j] : avg; + } + *input = masked_input; + *input_stride = 32; +} +#endif // CONFIG_MRC_TX + +#if CONFIG_LGT +static void flgt4(const tran_low_t *input, tran_low_t *output, + const tran_high_t *lgtmtx) { + if (!(input[0] | input[1] | input[2] | input[3])) { + output[0] = output[1] = output[2] = output[3] = 0; + return; + } + + // evaluate s[j] = sum of all lgtmtx[j][i]*input[i] over i=1,...,4 + tran_high_t s[4] = { 0 }; + for (int i = 0; i < 4; ++i) + for (int j = 0; j < 4; ++j) s[j] += lgtmtx[j * 4 + i] * input[i]; + + for (int i = 0; i < 4; ++i) output[i] = (tran_low_t)fdct_round_shift(s[i]); +} + +static void flgt8(const tran_low_t *input, tran_low_t *output, + const tran_high_t *lgtmtx) { + // evaluate s[j] = sum of all lgtmtx[j][i]*input[i] over i=1,...,8 + tran_high_t s[8] = { 0 }; + for (int i = 0; i < 8; ++i) + for (int j = 0; j < 8; ++j) s[j] += lgtmtx[j * 8 + i] * input[i]; + + for (int i = 0; i < 8; ++i) output[i] = (tran_low_t)fdct_round_shift(s[i]); +} + +// The get_fwd_lgt functions return 1 if LGT is chosen to apply, and 0 otherwise +int get_fwd_lgt4(transform_1d tx_orig, TxfmParam *txfm_param, + const tran_high_t *lgtmtx[], int ntx) { + // inter/intra split + if (tx_orig == &fadst4) { + for (int i = 0; i < ntx; ++i) + lgtmtx[i] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0]; + return 1; + } + return 0; +} + +int get_fwd_lgt8(transform_1d tx_orig, TxfmParam *txfm_param, + const tran_high_t *lgtmtx[], int ntx) { + // inter/intra split + if (tx_orig == &fadst8) { + for (int i = 0; i < ntx; ++i) + lgtmtx[i] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0]; + return 1; + } + return 0; +} +#endif // CONFIG_LGT + #if CONFIG_EXT_TX // TODO(sarahparker) these functions will be removed once the highbitdepth // codepath works properly for rectangular transforms. They have almost @@ -1028,13 +1147,24 @@ static void fhalfright32(const tran_low_t *input, tran_low_t *output) { // being used for square transforms. static void fidtx4(const tran_low_t *input, tran_low_t *output) { int i; - for (i = 0; i < 4; ++i) + for (i = 0; i < 4; ++i) { +#if CONFIG_DAALA_DCT4 + output[i] = input[i]; +#else output[i] = (tran_low_t)fdct_round_shift(input[i] * Sqrt2); +#endif + } } static void fidtx8(const tran_low_t *input, tran_low_t *output) { int i; - for (i = 0; i < 8; ++i) output[i] = input[i] * 2; + for (i = 0; i < 8; ++i) { +#if CONFIG_DAALA_DCT8 + output[i] = input[i]; +#else + output[i] = input[i] * 2; +#endif + } } static void fidtx16(const tran_low_t *input, tran_low_t *output) { @@ -1110,6 +1240,9 @@ static void copy_fliplrud(const int16_t *src, int src_stride, int l, int w, static void maybe_flip_input(const int16_t **src, int *src_stride, int l, int w, int16_t *buff, int tx_type) { switch (tx_type) { +#if CONFIG_MRC_TX + case MRC_DCT: +#endif // CONFIG_MRC_TX case DCT_DCT: case ADST_DCT: case DCT_ADST: @@ -1144,10 +1277,21 @@ static void maybe_flip_input(const int16_t **src, int *src_stride, int l, int w, #endif // CONFIG_EXT_TX void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif +#if CONFIG_DCT_ONLY + assert(tx_type == DCT_DCT); +#endif +#if !CONFIG_DAALA_DCT4 if (tx_type == DCT_DCT) { aom_fdct4x4_c(input, output, stride); - } else { + return; + } +#endif + { static const transform_2d FHT[] = { { fdct4, fdct4 }, // DCT_DCT { fadst4, fdct4 }, // ADST_DCT @@ -1166,7 +1310,7 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, { fidtx4, fadst4 }, // H_ADST { fadst4, fidtx4 }, // V_FLIPADST { fidtx4, fadst4 }, // H_FLIPADST -#endif // CONFIG_EXT_TX +#endif }; const transform_2d ht = FHT[tx_type]; tran_low_t out[4 * 4]; @@ -1178,25 +1322,60 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, maybe_flip_input(&input, &stride, 4, 4, flipped_input, tx_type); #endif +#if CONFIG_LGT + // Choose LGT adaptive to the prediction. We may apply different LGTs for + // different rows/columns, indicated by the pointers to 2D arrays + const tran_high_t *lgtmtx_col[4]; + const tran_high_t *lgtmtx_row[4]; + int use_lgt_col = get_fwd_lgt4(ht.cols, txfm_param, lgtmtx_col, 4); + int use_lgt_row = get_fwd_lgt4(ht.rows, txfm_param, lgtmtx_row, 4); +#endif + // Columns for (i = 0; i < 4; ++i) { + /* A C99-safe upshift by 4 for both Daala and VPx TX. */ for (j = 0; j < 4; ++j) temp_in[j] = input[j * stride + i] * 16; +#if !CONFIG_DAALA_DCT4 if (i == 0 && temp_in[0]) temp_in[0] += 1; - ht.cols(temp_in, temp_out); +#endif +#if CONFIG_LGT + if (use_lgt_col) + flgt4(temp_in, temp_out, lgtmtx_col[i]); + else +#endif + ht.cols(temp_in, temp_out); for (j = 0; j < 4; ++j) out[j * 4 + i] = temp_out[j]; } // Rows for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) temp_in[j] = out[j + i * 4]; - ht.rows(temp_in, temp_out); +#if CONFIG_LGT + if (use_lgt_row) + flgt4(temp_in, temp_out, lgtmtx_row[i]); + else +#endif + ht.rows(temp_in, temp_out); +#if CONFIG_DAALA_DCT4 + /* Daala TX has orthonormal scaling; shift down by only 1 to achieve + the usual VPx coefficient left-shift of 3. */ + for (j = 0; j < 4; ++j) output[j + i * 4] = temp_out[j] >> 1; +#else for (j = 0; j < 4; ++j) output[j + i * 4] = (temp_out[j] + 1) >> 2; +#endif } } } void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif // CONFIG_MRC_TX +#if CONFIG_DCT_ONLY + assert(tx_type == DCT_DCT); +#endif static const transform_2d FHT[] = { { fdct8, fdct4 }, // DCT_DCT { fadst8, fdct4 }, // ADST_DCT @@ -1228,19 +1407,36 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type); #endif +#if CONFIG_LGT + const tran_high_t *lgtmtx_col[4]; + const tran_high_t *lgtmtx_row[8]; + int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 4); + int use_lgt_row = get_fwd_lgt4(ht.rows, txfm_param, lgtmtx_row, 8); +#endif + // Rows for (i = 0; i < n2; ++i) { for (j = 0; j < n; ++j) temp_in[j] = (tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2); - ht.rows(temp_in, temp_out); +#if CONFIG_LGT + if (use_lgt_row) + flgt4(temp_in, temp_out, lgtmtx_row[i]); + else +#endif + ht.rows(temp_in, temp_out); for (j = 0; j < n; ++j) out[j * n2 + i] = temp_out[j]; } // Columns for (i = 0; i < n; ++i) { for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2]; - ht.cols(temp_in, temp_out); +#if CONFIG_LGT + if (use_lgt_col) + flgt8(temp_in, temp_out, lgtmtx_col[i]); + else +#endif + ht.cols(temp_in, temp_out); for (j = 0; j < n2; ++j) output[i + j * n] = (temp_out[j] + (temp_out[j] < 0)) >> 1; } @@ -1248,7 +1444,14 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, } void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif // CONFIG_MRC_TX +#if CONFIG_DCT_ONLY + assert(tx_type == DCT_DCT); +#endif static const transform_2d FHT[] = { { fdct4, fdct8 }, // DCT_DCT { fadst4, fdct8 }, // ADST_DCT @@ -1280,19 +1483,36 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type); #endif +#if CONFIG_LGT + const tran_high_t *lgtmtx_col[8]; + const tran_high_t *lgtmtx_row[4]; + int use_lgt_col = get_fwd_lgt4(ht.cols, txfm_param, lgtmtx_col, 8); + int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 4); +#endif + // Columns for (i = 0; i < n2; ++i) { for (j = 0; j < n; ++j) temp_in[j] = (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2); - ht.cols(temp_in, temp_out); +#if CONFIG_LGT + if (use_lgt_col) + flgt4(temp_in, temp_out, lgtmtx_col[i]); + else +#endif + ht.cols(temp_in, temp_out); for (j = 0; j < n; ++j) out[j * n2 + i] = temp_out[j]; } // Rows for (i = 0; i < n; ++i) { for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2]; - ht.rows(temp_in, temp_out); +#if CONFIG_LGT + if (use_lgt_row) + flgt8(temp_in, temp_out, lgtmtx_row[i]); + else +#endif + ht.rows(temp_in, temp_out); for (j = 0; j < n2; ++j) output[j + i * n2] = (temp_out[j] + (temp_out[j] < 0)) >> 1; } @@ -1300,7 +1520,14 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, } void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif // CONFIG_MRC_TX +#if CONFIG_DCT_ONLY + assert(tx_type == DCT_DCT); +#endif static const transform_2d FHT[] = { { fdct16, fdct4 }, // DCT_DCT { fadst16, fdct4 }, // ADST_DCT @@ -1332,10 +1559,20 @@ void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride, maybe_flip_input(&input, &stride, n4, n, flipped_input, tx_type); #endif +#if CONFIG_LGT + const tran_high_t *lgtmtx_row[16]; + int use_lgt_row = get_fwd_lgt4(ht.rows, txfm_param, lgtmtx_row, 16); +#endif + // Rows for (i = 0; i < n4; ++i) { for (j = 0; j < n; ++j) temp_in[j] = input[i * stride + j] * 4; - ht.rows(temp_in, temp_out); +#if CONFIG_LGT + if (use_lgt_row) + flgt4(temp_in, temp_out, lgtmtx_row[i]); + else +#endif + ht.rows(temp_in, temp_out); for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j]; } @@ -1350,7 +1587,14 @@ void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride, } void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif // CONFIG_MRC_TX +#if CONFIG_DCT_ONLY + assert(tx_type == DCT_DCT); +#endif static const transform_2d FHT[] = { { fdct4, fdct16 }, // DCT_DCT { fadst4, fdct16 }, // ADST_DCT @@ -1382,10 +1626,20 @@ void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride, maybe_flip_input(&input, &stride, n, n4, flipped_input, tx_type); #endif +#if CONFIG_LGT + const tran_high_t *lgtmtx_col[16]; + int use_lgt_col = get_fwd_lgt4(ht.cols, txfm_param, lgtmtx_col, 16); +#endif + // Columns for (i = 0; i < n4; ++i) { for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 4; - ht.cols(temp_in, temp_out); +#if CONFIG_LGT + if (use_lgt_col) + flgt4(temp_in, temp_out, lgtmtx_col[i]); + else +#endif + ht.cols(temp_in, temp_out); for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j]; } @@ -1400,7 +1654,14 @@ void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride, } void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif // CONFIG_MRC_TX +#if CONFIG_DCT_ONLY + assert(tx_type == DCT_DCT); +#endif static const transform_2d FHT[] = { { fdct16, fdct8 }, // DCT_DCT { fadst16, fdct8 }, // ADST_DCT @@ -1432,12 +1693,22 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type); #endif +#if CONFIG_LGT + const tran_high_t *lgtmtx_row[16]; + int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 16); +#endif + // Rows for (i = 0; i < n2; ++i) { for (j = 0; j < n; ++j) temp_in[j] = (tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2); - ht.rows(temp_in, temp_out); +#if CONFIG_LGT + if (use_lgt_row) + flgt8(temp_in, temp_out, lgtmtx_row[i]); + else +#endif + ht.rows(temp_in, temp_out); for (j = 0; j < n; ++j) out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2); } @@ -1452,7 +1723,14 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, } void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif // CONFIG_MRC_TX +#if CONFIG_DCT_ONLY + assert(tx_type == DCT_DCT); +#endif static const transform_2d FHT[] = { { fdct8, fdct16 }, // DCT_DCT { fadst8, fdct16 }, // ADST_DCT @@ -1484,12 +1762,22 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type); #endif +#if CONFIG_LGT + const tran_high_t *lgtmtx_col[16]; + int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 16); +#endif + // Columns for (i = 0; i < n2; ++i) { for (j = 0; j < n; ++j) temp_in[j] = (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2); - ht.cols(temp_in, temp_out); +#if CONFIG_LGT + if (use_lgt_col) + flgt8(temp_in, temp_out, lgtmtx_col[i]); + else +#endif + ht.cols(temp_in, temp_out); for (j = 0; j < n; ++j) out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2); } @@ -1504,7 +1792,14 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, } void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif // CONFIG_MRC_TX +#if CONFIG_DCT_ONLY + assert(tx_type == DCT_DCT); +#endif static const transform_2d FHT[] = { { fdct32, fdct8 }, // DCT_DCT { fhalfright32, fdct8 }, // ADST_DCT @@ -1536,10 +1831,20 @@ void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride, maybe_flip_input(&input, &stride, n4, n, flipped_input, tx_type); #endif +#if CONFIG_LGT + const tran_high_t *lgtmtx_row[32]; + int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 32); +#endif + // Rows for (i = 0; i < n4; ++i) { for (j = 0; j < n; ++j) temp_in[j] = input[i * stride + j] * 4; - ht.rows(temp_in, temp_out); +#if CONFIG_LGT + if (use_lgt_row) + flgt8(temp_in, temp_out, lgtmtx_row[i]); + else +#endif + ht.rows(temp_in, temp_out); for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j]; } @@ -1554,7 +1859,14 @@ void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride, } void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif // CONFIG_MRC_TX +#if CONFIG_DCT_ONLY + assert(tx_type == DCT_DCT); +#endif static const transform_2d FHT[] = { { fdct8, fdct32 }, // DCT_DCT { fadst8, fdct32 }, // ADST_DCT @@ -1586,10 +1898,20 @@ void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride, maybe_flip_input(&input, &stride, n, n4, flipped_input, tx_type); #endif +#if CONFIG_LGT + const tran_high_t *lgtmtx_col[32]; + int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 32); +#endif + // Columns for (i = 0; i < n4; ++i) { for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 4; - ht.cols(temp_in, temp_out); +#if CONFIG_LGT + if (use_lgt_col) + flgt8(temp_in, temp_out, lgtmtx_col[i]); + else +#endif + ht.cols(temp_in, temp_out); for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j]; } @@ -1604,7 +1926,14 @@ void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride, } void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif // CONFIG_MRC_TX +#if CONFIG_DCT_ONLY + assert(tx_type == DCT_DCT); +#endif static const transform_2d FHT[] = { { fdct32, fdct16 }, // DCT_DCT { fhalfright32, fdct16 }, // ADST_DCT @@ -1656,7 +1985,14 @@ void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride, } void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif // CONFIG_MRC_TX +#if CONFIG_DCT_ONLY + assert(tx_type == DCT_DCT); +#endif static const transform_2d FHT[] = { { fdct16, fdct32 }, // DCT_DCT { fadst16, fdct32 }, // ADST_DCT @@ -1833,10 +2169,21 @@ void av1_fdct8x8_quant_c(const int16_t *input, int stride, } void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif // CONFIG_MRC_TX +#if CONFIG_DCT_ONLY + assert(tx_type == DCT_DCT); +#endif +#if !CONFIG_DAALA_DCT8 if (tx_type == DCT_DCT) { aom_fdct8x8_c(input, output, stride); - } else { + return; + } +#endif + { static const transform_2d FHT[] = { { fdct8, fdct8 }, // DCT_DCT { fadst8, fdct8 }, // ADST_DCT @@ -1855,7 +2202,7 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, { fidtx8, fadst8 }, // H_ADST { fadst8, fidtx8 }, // V_FLIPADST { fidtx8, fadst8 }, // H_FLIPADST -#endif // CONFIG_EXT_TX +#endif }; const transform_2d ht = FHT[tx_type]; tran_low_t out[64]; @@ -1867,19 +2214,45 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, maybe_flip_input(&input, &stride, 8, 8, flipped_input, tx_type); #endif +#if CONFIG_LGT + const tran_high_t *lgtmtx_col[8]; + const tran_high_t *lgtmtx_row[8]; + int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 8); + int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 8); +#endif + // Columns for (i = 0; i < 8; ++i) { +#if CONFIG_DAALA_DCT8 + for (j = 0; j < 8; ++j) temp_in[j] = input[j * stride + i] * 16; +#else for (j = 0; j < 8; ++j) temp_in[j] = input[j * stride + i] * 4; - ht.cols(temp_in, temp_out); +#endif +#if CONFIG_LGT + if (use_lgt_col) + flgt8(temp_in, temp_out, lgtmtx_col[i]); + else +#endif + ht.cols(temp_in, temp_out); for (j = 0; j < 8; ++j) out[j * 8 + i] = temp_out[j]; } // Rows for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j + i * 8]; - ht.rows(temp_in, temp_out); +#if CONFIG_LGT + if (use_lgt_row) + flgt8(temp_in, temp_out, lgtmtx_row[i]); + else +#endif + ht.rows(temp_in, temp_out); +#if CONFIG_DAALA_DCT8 + for (j = 0; j < 8; ++j) + output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; +#else for (j = 0; j < 8; ++j) output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; +#endif } } } @@ -1941,7 +2314,14 @@ void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) { } void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif // CONFIG_MRC_TX +#if CONFIG_DCT_ONLY + assert(tx_type == DCT_DCT); +#endif static const transform_2d FHT[] = { { fdct16, fdct16 }, // DCT_DCT { fadst16, fdct16 }, // ADST_DCT @@ -1960,9 +2340,8 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, { fidtx16, fadst16 }, // H_ADST { fadst16, fidtx16 }, // V_FLIPADST { fidtx16, fadst16 }, // H_FLIPADST -#endif // CONFIG_EXT_TX +#endif }; - const transform_2d ht = FHT[tx_type]; tran_low_t out[256]; int i, j; @@ -1989,80 +2368,17 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, } } -#if CONFIG_HIGHBITDEPTH -void av1_highbd_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { - av1_fht4x4_c(input, output, stride, tx_type); -} - -void av1_highbd_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { - av1_fht4x8_c(input, output, stride, tx_type); -} - -void av1_highbd_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { - av1_fht8x4_c(input, output, stride, tx_type); -} - -void av1_highbd_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { - av1_fht8x16_c(input, output, stride, tx_type); -} - -void av1_highbd_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { - av1_fht16x8_c(input, output, stride, tx_type); -} - -void av1_highbd_fht16x32_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { - av1_fht16x32_c(input, output, stride, tx_type); -} - -void av1_highbd_fht32x16_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { - av1_fht32x16_c(input, output, stride, tx_type); -} - -void av1_highbd_fht4x16_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { - av1_fht4x16_c(input, output, stride, tx_type); -} - -void av1_highbd_fht16x4_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { - av1_fht16x4_c(input, output, stride, tx_type); -} - -void av1_highbd_fht8x32_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { - av1_fht8x32_c(input, output, stride, tx_type); -} - -void av1_highbd_fht32x8_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { - av1_fht32x8_c(input, output, stride, tx_type); -} - -void av1_highbd_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { - av1_fht8x8_c(input, output, stride, tx_type); -} - void av1_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) { av1_fwht4x4_c(input, output, stride); } -void av1_highbd_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { - av1_fht16x16_c(input, output, stride, tx_type); -} -#endif // CONFIG_HIGHBITDEPTH - void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { + int tx_type = txfm_param->tx_type; +#if CONFIG_DCT_ONLY + assert(tx_type == DCT_DCT); +#endif static const transform_2d FHT[] = { { fdct32, fdct32 }, // DCT_DCT #if CONFIG_EXT_TX @@ -2082,6 +2398,9 @@ void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride, { fhalfright32, fidtx32 }, // V_FLIPADST { fidtx32, fhalfright32 }, // H_FLIPADST #endif +#if CONFIG_MRC_TX + { fdct32, fdct32 }, // MRC_TX +#endif // CONFIG_MRC_TX }; const transform_2d ht = FHT[tx_type]; tran_low_t out[1024]; @@ -2093,6 +2412,14 @@ void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride, maybe_flip_input(&input, &stride, 32, 32, flipped_input, tx_type); #endif +#if CONFIG_MRC_TX + if (tx_type == MRC_DCT) { + int16_t masked_input[32 * 32]; + get_masked_residual32(&input, &stride, txfm_param->dst, txfm_param->stride, + masked_input); + } +#endif // CONFIG_MRC_TX + // Columns for (i = 0; i < 32; ++i) { for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4; @@ -2150,7 +2477,14 @@ static void fdct64_row(const tran_low_t *input, tran_low_t *output) { } void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif // CONFIG_MRC_TX +#if CONFIG_DCT_ONLY + assert(tx_type == DCT_DCT); +#endif static const transform_2d FHT[] = { { fdct64_col, fdct64_row }, // DCT_DCT #if CONFIG_EXT_TX @@ -2179,6 +2513,7 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride, int16_t flipped_input[64 * 64]; maybe_flip_input(&input, &stride, 64, 64, flipped_input, tx_type); #endif + // Columns for (i = 0; i < 64; ++i) { for (j = 0; j < 64; ++j) temp_in[j] = input[j * stride + i]; @@ -2214,20 +2549,6 @@ void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, } #endif // CONFIG_EXT_TX -#if CONFIG_HIGHBITDEPTH -void av1_highbd_fht32x32_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { - av1_fht32x32_c(input, output, stride, tx_type); -} - -#if CONFIG_TX64X64 -void av1_highbd_fht64x64_c(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { - av1_fht64x64_c(input, output, stride, tx_type); -} -#endif // CONFIG_TX64X64 -#endif // CONFIG_HIGHBITDEPTH - #if CONFIG_DPCM_INTRA void av1_dpcm_ft4_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, tran_low_t *output) { @@ -2271,5 +2592,54 @@ void av1_dpcm_ft32_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, for (int i = 0; i < 32; ++i) temp_in[i] = input[i * stride]; ft(temp_in, output); } + +#if CONFIG_HIGHBITDEPTH +void av1_hbd_dpcm_ft4_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, + tran_low_t *output, int dir) { + (void)dir; + assert(tx_type < TX_TYPES_1D); + static const transform_1d FHT[] = { fdct4, fadst4, fadst4, fidtx4 }; + const transform_1d ft = FHT[tx_type]; + tran_low_t temp_in[4]; + for (int i = 0; i < 4; ++i) + temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 4 * Sqrt2); + ft(temp_in, output); +} + +void av1_hbd_dpcm_ft8_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, + tran_low_t *output, int dir) { + (void)dir; + assert(tx_type < TX_TYPES_1D); + static const transform_1d FHT[] = { fdct8, fadst8, fadst8, fidtx8 }; + const transform_1d ft = FHT[tx_type]; + tran_low_t temp_in[8]; + for (int i = 0; i < 8; ++i) temp_in[i] = input[i * stride] * 4; + ft(temp_in, output); +} + +void av1_hbd_dpcm_ft16_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, + tran_low_t *output, int dir) { + (void)dir; + assert(tx_type < TX_TYPES_1D); + static const transform_1d FHT[] = { fdct16, fadst16, fadst16, fidtx16 }; + const transform_1d ft = FHT[tx_type]; + tran_low_t temp_in[16]; + for (int i = 0; i < 16; ++i) + temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 2 * Sqrt2); + ft(temp_in, output); +} + +void av1_hbd_dpcm_ft32_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, + tran_low_t *output, int dir) { + (void)dir; + assert(tx_type < TX_TYPES_1D); + static const transform_1d FHT[] = { fdct32, fhalfright32, fhalfright32, + fidtx32 }; + const transform_1d ft = FHT[tx_type]; + tran_low_t temp_in[32]; + for (int i = 0; i < 32; ++i) temp_in[i] = input[i * stride]; + ft(temp_in, output); +} +#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_DPCM_INTRA #endif // !AV1_DCT_GTEST diff --git a/third_party/aom/av1/encoder/encodeframe.c b/third_party/aom/av1/encoder/encodeframe.c index 36d09c02a..d13eb42fb 100644 --- a/third_party/aom/av1/encoder/encodeframe.c +++ b/third_party/aom/av1/encoder/encodeframe.c @@ -84,7 +84,7 @@ static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_EXT_INTER int mi_row_ori, int mi_col_ori, #endif // CONFIG_EXT_INTER - int mi_row_pred, int mi_col_pred, + int mi_row_pred, int mi_col_pred, int plane, BLOCK_SIZE bsize_pred, int b_sub8x8, int block); static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size, PC_TREE *pc_tree); @@ -308,7 +308,6 @@ static void set_offsets_without_segment_id(const AV1_COMP *const cpi, av1_setup_src_planes(x, cpi->source, mi_row, mi_col); // R/D setup. - x->rddiv = cpi->rd.RDDIV; x->rdmult = cpi->rd.RDMULT; // required by av1_append_sub8x8_mvs_for_idx() and av1_find_best_ref_mvs() @@ -326,6 +325,10 @@ static void set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile, set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize); mbmi = &xd->mi[0]->mbmi; +#if CONFIG_CFL + xd->cfl->mi_row = mi_row; + xd->cfl->mi_col = mi_col; +#endif // Setup segment ID. if (seg->enabled) { @@ -413,7 +416,6 @@ static void set_offsets_extend(const AV1_COMP *const cpi, ThreadData *td, xd->left_available = (mi_col_ori > tile->mi_col_start); // R/D setup. - x->rddiv = cpi->rd.RDDIV; x->rdmult = cpi->rd.RDMULT; } @@ -539,6 +541,21 @@ static void set_ref_and_pred_mvs(MACROBLOCK *const x, int_mv *const mi_pred_mv, mbmi->pred_mv[1] = this_mv; mi_pred_mv[1] = this_mv; } +#if CONFIG_COMPOUND_SINGLEREF + } else if (is_inter_singleref_comp_mode(mbmi->mode)) { + // Special case: SR_NEAR_NEWMV uses 1 + mbmi->ref_mv_idx + // (like NEARMV) instead + if (mbmi->mode == SR_NEAR_NEWMV) ref_mv_idx += 1; + + if (compound_ref0_mode(mbmi->mode) == NEWMV || + compound_ref1_mode(mbmi->mode) == NEWMV) { + int_mv this_mv = curr_ref_mv_stack[ref_mv_idx].this_mv; + clamp_mv_ref(&this_mv.as_mv, bw, bh, xd); + mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv; + mbmi->pred_mv[0] = this_mv; + mi_pred_mv[0] = this_mv; + } +#endif // CONFIG_COMPOUND_SINGLEREF } else { #endif // CONFIG_EXT_INTER if (mbmi->mode == NEWMV) { @@ -635,7 +652,6 @@ static void update_state(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_PALETTE for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i]; #endif // CONFIG_PALETTE - // Restore the coding context of the MB to that that was in place // when the mode was picked for it for (y = 0; y < mi_height; y++) @@ -814,7 +830,6 @@ static void update_state_supertx(const AV1_COMP *const cpi, ThreadData *td, } mi_addr->mbmi.segment_id_supertx = MAX_SEGMENTS; } - // Restore the coding context of the MB to that that was in place // when the mode was picked for it for (y = 0; y < mi_height; y++) @@ -1147,7 +1162,7 @@ static void update_supertx_param_sb(const AV1_COMP *const cpi, ThreadData *td, } #endif // CONFIG_SUPERTX -#if CONFIG_MOTION_VAR && CONFIG_NCOBMC +#if CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT) static void set_mode_info_b(const AV1_COMP *const cpi, const TileInfo *const tile, ThreadData *td, int mi_row, int mi_col, BLOCK_SIZE bsize, @@ -1167,6 +1182,7 @@ static void set_mode_info_sb(const AV1_COMP *const cpi, ThreadData *td, BLOCK_SIZE subsize = get_subsize(bsize, partition); #if CONFIG_EXT_PARTITION_TYPES const BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT); + const int quarter_step = mi_size_wide[bsize] / 4; #endif #if CONFIG_CB4X4 const int unify_bsize = 1; @@ -1245,6 +1261,24 @@ static void set_mode_info_sb(const AV1_COMP *const cpi, ThreadData *td, set_mode_info_b(cpi, tile, td, mi_row + hbs, mi_col + hbs, bsize2, &pc_tree->verticalb[2]); break; + case PARTITION_HORZ_4: + for (int i = 0; i < 4; ++i) { + int this_mi_row = mi_row + i * quarter_step; + if (i > 0 && this_mi_row >= cm->mi_rows) break; + + set_mode_info_b(cpi, tile, td, this_mi_row, mi_col, subsize, + &pc_tree->horizontal4[i]); + } + break; + case PARTITION_VERT_4: + for (int i = 0; i < 4; ++i) { + int this_mi_col = mi_col + i * quarter_step; + if (i > 0 && this_mi_col >= cm->mi_cols) break; + + set_mode_info_b(cpi, tile, td, mi_row, this_mi_col, subsize, + &pc_tree->vertical4[i]); + } + break; #endif // CONFIG_EXT_PARTITION_TYPES default: assert(0 && "Invalid partition type."); break; } @@ -1281,10 +1315,10 @@ static int set_segment_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x, return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q); } -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 -static void daala_dist_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8, - BLOCK_SIZE bsize, int bw, int bh, - int mi_row, int mi_col) { +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 +static void dist_8x8_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8, + BLOCK_SIZE bsize, int bw, int bh, + int mi_row, int mi_col) { MACROBLOCKD *const xd = &x->e_mbd; struct macroblockd_plane *const pd = &xd->plane[0]; const int dst_stride = pd->dst.stride; @@ -1294,12 +1328,24 @@ static void daala_dist_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8, if (bsize < BLOCK_8X8) { int i, j; - uint8_t *dst_sub8x8 = &dst8x8[((mi_row & 1) * 8 + (mi_col & 1)) << 2]; +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + uint16_t *dst8x8_16 = (uint16_t *)dst8x8; + uint16_t *dst_sub8x8 = &dst8x8_16[((mi_row & 1) * 8 + (mi_col & 1)) << 2]; - for (j = 0; j < bh; ++j) - for (i = 0; i < bw; ++i) { - dst_sub8x8[j * 8 + i] = dst[j * dst_stride + i]; - } + for (j = 0; j < bh; ++j) + for (i = 0; i < bw; ++i) + dst_sub8x8[j * 8 + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i]; + } else { +#endif + uint8_t *dst_sub8x8 = &dst8x8[((mi_row & 1) * 8 + (mi_col & 1)) << 2]; + + for (j = 0; j < bh; ++j) + for (i = 0; i < bw; ++i) + dst_sub8x8[j * 8 + i] = dst[j * dst_stride + i]; +#if CONFIG_HIGHBITDEPTH + } +#endif } } #endif @@ -1330,10 +1376,6 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data, x->pvq_speed = 1; x->pvq_coded = 0; #endif -#if CONFIG_CFL - // Don't store luma during RDO (we will store the best mode later). - x->cfl_store_y = 0; -#endif set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); mbmi = &xd->mi[0]->mbmi; @@ -1342,6 +1384,10 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data, mbmi->mi_row = mi_row; mbmi->mi_col = mi_col; #endif +#if CONFIG_CFL + // Don't store luma during RDO. Only store luma when best luma is known + x->cfl_store_y = 0; +#endif #if CONFIG_SUPERTX // We set tx_size here as skip blocks would otherwise not set it. // tx_size needs to be set at this point as supertx_enable in @@ -1542,6 +1588,7 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, #endif if (!frame_is_intra_only(cm)) { FRAME_COUNTS *const counts = td->counts; + RD_COUNTS *rdc = &td->rd_counts; const int inter_block = is_inter_block(mbmi); const int seg_ref_active = segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME); @@ -1560,6 +1607,12 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, #endif // CONFIG_EXT_REFS if (cm->reference_mode == REFERENCE_MODE_SELECT) { + if (has_second_ref(mbmi)) + // This flag is also updated for 4x4 blocks + rdc->compound_ref_used_flag = 1; + else + // This flag is also updated for 4x4 blocks + rdc->single_ref_used_flag = 1; #if !SUB8X8_COMP_REF if (mbmi->sb_type != BLOCK_4X4) counts->comp_inter[av1_get_reference_mode_context(cm, xd)] @@ -1571,24 +1624,53 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, } if (has_second_ref(mbmi)) { +#if CONFIG_EXT_COMP_REFS + const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi) + ? UNIDIR_COMP_REFERENCE + : BIDIR_COMP_REFERENCE; +#if !USE_UNI_COMP_REFS + // TODO(zoeliu): Temporarily turn off uni-directional comp refs + assert(comp_ref_type == BIDIR_COMP_REFERENCE); +#endif // !USE_UNI_COMP_REFS + counts->comp_ref_type[av1_get_comp_reference_type_context(xd)] + [comp_ref_type]++; + + if (comp_ref_type == UNIDIR_COMP_REFERENCE) { + const int bit = (ref0 == BWDREF_FRAME); + counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p(xd)][0] + [bit]++; + if (!bit) { + const int bit1 = (ref1 == LAST3_FRAME || ref1 == GOLDEN_FRAME); + counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p1(xd)][1] + [bit1]++; + if (bit1) { + counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p2(xd)] + [2][ref1 == GOLDEN_FRAME]++; + } + } + } else { +#endif // CONFIG_EXT_COMP_REFS #if CONFIG_EXT_REFS - const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME); + const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME); - counts->comp_ref[av1_get_pred_context_comp_ref_p(cm, xd)][0][bit]++; - if (!bit) { - counts->comp_ref[av1_get_pred_context_comp_ref_p1(cm, xd)][1] - [ref0 == LAST_FRAME]++; - } else { - counts->comp_ref[av1_get_pred_context_comp_ref_p2(cm, xd)][2] - [ref0 == GOLDEN_FRAME]++; - } + counts->comp_ref[av1_get_pred_context_comp_ref_p(cm, xd)][0][bit]++; + if (!bit) { + counts->comp_ref[av1_get_pred_context_comp_ref_p1(cm, xd)][1] + [ref0 == LAST_FRAME]++; + } else { + counts->comp_ref[av1_get_pred_context_comp_ref_p2(cm, xd)][2] + [ref0 == GOLDEN_FRAME]++; + } - counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(cm, xd)][0] - [ref1 == ALTREF_FRAME]++; -#else + counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(cm, xd)][0] + [ref1 == ALTREF_FRAME]++; +#else // !CONFIG_EXT_REFS counts->comp_ref[av1_get_pred_context_comp_ref_p(cm, xd)][0] [ref0 == GOLDEN_FRAME]++; #endif // CONFIG_EXT_REFS +#if CONFIG_EXT_COMP_REFS + } +#endif // CONFIG_EXT_COMP_REFS } else { #if CONFIG_EXT_REFS const int bit = (ref0 == ALTREF_FRAME || ref0 == BWDREF_FRAME); @@ -1609,7 +1691,7 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, [ref0 != LAST3_FRAME]++; } } -#else +#else // !CONFIG_EXT_REFS counts->single_ref[av1_get_pred_context_single_ref_p1(xd)][0] [ref0 != LAST_FRAME]++; if (ref0 != LAST_FRAME) { @@ -1619,7 +1701,14 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, #endif // CONFIG_EXT_REFS } -#if CONFIG_EXT_INTER && CONFIG_INTERINTRA +#if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) + counts->comp_inter_mode[av1_get_inter_mode_context(xd)] + [is_inter_singleref_comp_mode(mbmi->mode)]++; +#endif // CONFIG_COMPOUND_SINGLEREF + +#if CONFIG_INTERINTRA if (cm->reference_mode != COMPOUND_REFERENCE && #if CONFIG_SUPERTX !supertx_enabled && @@ -1635,14 +1724,33 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, counts->interintra[bsize_group][0]++; } } -#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA +#endif // CONFIG_INTERINTRA +#endif // CONFIG_EXT_INTER #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION +#if CONFIG_WARPED_MOTION + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); +#endif +#if CONFIG_NCOBMC_ADAPT_WEIGHT + const MOTION_MODE motion_allowed = + motion_mode_allowed_wrapper(0, +#if CONFIG_GLOBAL_MOTION + 0, xd->global_motion, +#endif // CONFIG_GLOBAL_MOTION +#if CONFIG_WARPED_MOTION + xd, +#endif + mi); +#else const MOTION_MODE motion_allowed = motion_mode_allowed( -#if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#if CONFIG_GLOBAL_MOTION 0, xd->global_motion, -#endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#endif // CONFIG_GLOBAL_MOTION +#if CONFIG_WARPED_MOTION + xd, +#endif mi); +#endif // CONFIG_NCOBMC_ADAPT_WEIGHT #if CONFIG_SUPERTX if (!supertx_enabled) #endif // CONFIG_SUPERTX @@ -1660,11 +1768,28 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, if (motion_allowed > SIMPLE_TRANSLATION) counts->motion_mode[mbmi->sb_type][mbmi->motion_mode]++; #endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION + +#if CONFIG_NCOBMC_ADAPT_WEIGHT + if (mbmi->motion_mode == NCOBMC_ADAPT_WEIGHT) { + ADAPT_OVERLAP_BLOCK ao_block = + adapt_overlap_block_lookup[mbmi->sb_type]; + ++counts->ncobmc_mode[ao_block][mbmi->ncobmc_mode[0]]; + if (mi_size_wide[mbmi->sb_type] != mi_size_high[mbmi->sb_type]) { + ++counts->ncobmc_mode[ao_block][mbmi->ncobmc_mode[1]]; + } + } +#endif + #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION #if CONFIG_EXT_INTER - if (cm->reference_mode != SINGLE_REFERENCE && + if ( +#if CONFIG_COMPOUND_SINGLEREF + is_inter_anyref_comp_mode(mbmi->mode) +#else // !CONFIG_COMPOUND_SINGLEREF + cm->reference_mode != SINGLE_REFERENCE && is_inter_compound_mode(mbmi->mode) +#endif // CONFIG_COMPOUND_SINGLEREF #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION && mbmi->motion_mode == SIMPLE_TRANSLATION #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION @@ -1683,6 +1808,12 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, if (has_second_ref(mbmi)) { mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]]; ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)]; +#if CONFIG_COMPOUND_SINGLEREF + } else if (is_inter_singleref_comp_mode(mode)) { + mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]]; + ++counts->inter_singleref_comp_mode[mode_ctx] + [INTER_SINGLEREF_COMP_OFFSET(mode)]; +#endif // CONFIG_COMPOUND_SINGLEREF } else { #endif // CONFIG_EXT_INTER mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context, @@ -1693,10 +1824,15 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, #endif // CONFIG_EXT_INTER #if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV || + mbmi->mode == SR_NEW_NEWMV) { +#else // !CONFIG_COMPOUND_SINGLEREF if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) { -#else +#endif // CONFIG_COMPOUND_SINGLEREF +#else // !CONFIG_EXT_INTER if (mbmi->mode == NEWMV) { -#endif +#endif // CONFIG_EXT_INTER uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); int idx; @@ -1871,10 +2007,16 @@ static void encode_b(const AV1_COMP *const cpi, const TileInfo *const tile, update_state(cpi, td, ctx, mi_row, mi_col, bsize, dry_run); #if CONFIG_MOTION_VAR && CONFIG_NCOBMC mbmi = &xd->mi[0]->mbmi; +#if CONFIG_WARPED_MOTION + set_ref_ptrs(&cpi->common, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); +#endif const MOTION_MODE motion_allowed = motion_mode_allowed( -#if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#if CONFIG_GLOBAL_MOTION 0, xd->global_motion, -#endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#endif // CONFIG_GLOBAL_MOTION +#if CONFIG_WARPED_MOTION + xd, +#endif xd->mi[0]); check_ncobmc = is_inter_block(mbmi) && motion_allowed >= OBMC_CAUSAL; if (!dry_run && check_ncobmc) { @@ -1922,6 +2064,8 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td, const BLOCK_SIZE subsize = get_subsize(bsize, partition); #if CONFIG_EXT_PARTITION_TYPES const BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT); + int quarter_step = mi_size_wide[bsize] / 4; + int i; #endif #if CONFIG_CB4X4 @@ -1933,6 +2077,11 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; +#if CONFIG_SPEED_REFS + // First scanning pass of an SB is dry run only. + if (cpi->sb_scanning_pass_idx == 0) assert(dry_run == DRY_RUN_NORMAL); +#endif // CONFIG_SPEED_REFS + if (!dry_run && ctx >= 0) td->counts->partition[ctx][partition]++; #if CONFIG_SUPERTX @@ -2115,6 +2264,24 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td, encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col + hbs, dry_run, bsize2, partition, &pc_tree->verticalb[2], rate); break; + case PARTITION_HORZ_4: + for (i = 0; i < 4; ++i) { + int this_mi_row = mi_row + i * quarter_step; + if (i > 0 && this_mi_row >= cm->mi_rows) break; + + encode_b(cpi, tile, td, tp, this_mi_row, mi_col, dry_run, subsize, + partition, &pc_tree->horizontal4[i], rate); + } + break; + case PARTITION_VERT_4: + for (i = 0; i < 4; ++i) { + int this_mi_col = mi_col + i * quarter_step; + if (i > 0 && this_mi_col >= cm->mi_cols) break; + + encode_b(cpi, tile, td, tp, mi_row, this_mi_col, dry_run, subsize, + partition, &pc_tree->vertical4[i], rate); + } + break; #endif // CONFIG_EXT_PARTITION_TYPES default: assert(0 && "Invalid partition type."); break; } @@ -2302,8 +2469,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, if (none_rdc.rate < INT_MAX) { none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; - none_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist); + none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist); #if CONFIG_SUPERTX none_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE]; #endif @@ -2473,7 +2639,9 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, case PARTITION_VERT_A: case PARTITION_VERT_B: case PARTITION_HORZ_A: - case PARTITION_HORZ_B: assert(0 && "Cannot handle extended partiton types"); + case PARTITION_HORZ_B: + case PARTITION_HORZ_4: + case PARTITION_VERT_4: assert(0 && "Cannot handle extended partiton types"); #endif // CONFIG_EXT_PARTITION_TYPES default: assert(0); break; } @@ -2481,7 +2649,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, if (last_part_rdc.rate < INT_MAX) { last_part_rdc.rate += cpi->partition_cost[pl][partition]; last_part_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, last_part_rdc.rate, last_part_rdc.dist); + RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist); #if CONFIG_SUPERTX last_part_rate_nocoef += cpi->partition_cost[pl][partition]; #endif @@ -2565,8 +2733,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, } if (chosen_rdc.rate < INT_MAX) { chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; - chosen_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, chosen_rdc.rate, chosen_rdc.dist); + chosen_rdc.rdcost = RDCOST(x->rdmult, chosen_rdc.rate, chosen_rdc.dist); #if CONFIG_SUPERTX chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE]; #endif @@ -2624,8 +2791,8 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, } /* clang-format off */ -static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { -#if CONFIG_CB4X4 +static const BLOCK_SIZE min_partition_size[BLOCK_SIZES_ALL] = { +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 BLOCK_2X2, BLOCK_2X2, BLOCK_2X2, // 2x2, 2x4, 4x2 #endif BLOCK_4X4, // 4x4 @@ -2634,12 +2801,14 @@ static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, // 16x32, 32x16, 32x32 BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 32x64, 64x32, 64x64 #if CONFIG_EXT_PARTITION - BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 // 64x128, 128x64, 128x128 + BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 64x128, 128x64, 128x128 #endif // CONFIG_EXT_PARTITION + BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x16, 16x4, 8x32 + BLOCK_8X8 // 32x8 }; -static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { -#if CONFIG_CB4X4 +static const BLOCK_SIZE max_partition_size[BLOCK_SIZES_ALL] = { +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, // 2x2, 2x4, 4x2 #endif BLOCK_8X8, // 4x4 @@ -2648,13 +2817,15 @@ static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, // 16x32, 32x16, 32x32 BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST, // 32x64, 64x32, 64x64 #if CONFIG_EXT_PARTITION - BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST // 64x128, 128x64, 128x128 + BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST, // 64x128, 128x64, 128x128 #endif // CONFIG_EXT_PARTITION + BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, // 4x16, 16x4, 8x32 + BLOCK_32X32 // 32x8 }; // Next square block size less or equal than current block size. -static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { -#if CONFIG_CB4X4 +static const BLOCK_SIZE next_square_size[BLOCK_SIZES_ALL] = { +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 BLOCK_2X2, BLOCK_2X2, BLOCK_2X2, // 2x2, 2x4, 4x2 #endif BLOCK_4X4, // 4x4 @@ -2663,8 +2834,10 @@ static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, // 16x32, 32x16, 32x32 BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, // 32x64, 64x32, 64x64 #if CONFIG_EXT_PARTITION - BLOCK_64X64, BLOCK_64X64, BLOCK_128X128 // 64x128, 128x64, 128x128 + BLOCK_64X64, BLOCK_64X64, BLOCK_128X128, // 64x128, 128x64, 128x128 #endif // CONFIG_EXT_PARTITION + BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x16, 16x4, 8x32 + BLOCK_8X8 // 32x8 }; /* clang-format on */ @@ -3055,8 +3228,7 @@ static void rd_test_partition3( cm->fc->supertx_prob[partition_supertx_context_lookup[partition]] [supertx_size], 0); - sum_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) { TX_TYPE best_tx = DCT_DCT; @@ -3071,8 +3243,7 @@ static void rd_test_partition3( cm->fc->supertx_prob[partition_supertx_context_lookup[partition]] [supertx_size], 1); - tmp_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist); + tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist); if (tmp_rdc.rdcost < sum_rdc.rdcost) { sum_rdc = tmp_rdc; update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx, @@ -3091,8 +3262,7 @@ static void rd_test_partition3( #endif bsize); sum_rdc.rate += cpi->partition_cost[pl][partition]; - sum_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); #if CONFIG_SUPERTX sum_rate_nocoef += cpi->partition_cost[pl][partition]; #endif @@ -3161,7 +3331,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_SUPERTX int this_rate_nocoef, sum_rate_nocoef = 0, best_rate_nocoef = INT_MAX; int abort_flag; - const int supertx_allowed = !frame_is_intra_only(cm) && + const int supertx_allowed = !frame_is_intra_only(cm) && bsize >= BLOCK_8X8 && bsize <= MAX_SUPERTX_BLOCK_SIZE && !xd->lossless[0]; #endif // CONFIG_SUPERTX @@ -3341,6 +3511,17 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, } #endif +#if CONFIG_SPEED_REFS + if (cpi->sb_scanning_pass_idx == 0) { + // NOTE: For the 1st pass of scanning, check all the subblocks of equal size + // only. + partition_none_allowed = (bsize == MIN_SPEED_REFS_BLKSIZE); + partition_horz_allowed = 0; + partition_vert_allowed = 0; + do_square_split = (bsize > MIN_SPEED_REFS_BLKSIZE); + } +#endif // CONFIG_SPEED_REFS + // PARTITION_NONE if (partition_none_allowed) { rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, @@ -3354,8 +3535,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, if (this_rdc.rate != INT_MAX) { if (bsize_at_least_8x8) { this_rdc.rate += partition_cost[PARTITION_NONE]; - this_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); + this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist); #if CONFIG_SUPERTX this_rate_nocoef += partition_cost[PARTITION_NONE]; #endif @@ -3494,8 +3674,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, cm->fc->supertx_prob[partition_supertx_context_lookup [PARTITION_SPLIT]][supertx_size], 0); - sum_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); if (is_inter_mode(pc_tree->leaf_split[0]->mic.mbmi.mode)) { TX_TYPE best_tx = DCT_DCT; @@ -3512,8 +3691,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, cm->fc->supertx_prob[partition_supertx_context_lookup [PARTITION_SPLIT]][supertx_size], 1); - tmp_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist); + tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist); if (tmp_rdc.rdcost < sum_rdc.rdcost) { sum_rdc = tmp_rdc; update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx, @@ -3551,6 +3729,11 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, &this_rdc, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[idx]); #endif // CONFIG_SUPERTX +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if (bsize == BLOCK_8X8 && this_rdc.rate != INT_MAX) { + assert(this_rdc.dist_y < INT64_MAX); + } +#endif if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; #if CONFIG_SUPERTX @@ -3564,28 +3747,40 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_SUPERTX sum_rate_nocoef += this_rate_nocoef; #endif // CONFIG_SUPERTX -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 - sum_rdc.dist_y += this_rdc.dist_y; +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if (bsize == BLOCK_8X8) { + assert(this_rdc.dist_y < INT64_MAX); + sum_rdc.dist_y += this_rdc.dist_y; + } #endif } } reached_last_index = (idx == 4); -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (reached_last_index && sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) { - int use_activity_masking = 0; - int64_t daala_dist; + int64_t dist_8x8; const int src_stride = x->plane[0].src.stride; - daala_dist = av1_daala_dist(x->plane[0].src.buf - 4 * src_stride - 4, - src_stride, x->decoded_8x8, 8, 8, 8, 1, - use_activity_masking, x->qindex) - << 4; - sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist; - sum_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + uint8_t *decoded_8x8; + +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8); + else +#endif + decoded_8x8 = (uint8_t *)x->decoded_8x8; + + dist_8x8 = + av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4 * src_stride - 4, + src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8, + x->qindex) + << 4; + assert(sum_rdc.dist_y < INT64_MAX); + sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8; + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); } -#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 +#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4 #if CONFIG_SUPERTX if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && reached_last_index) { @@ -3598,8 +3793,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, cm->fc->supertx_prob[partition_supertx_context_lookup [PARTITION_SPLIT]][supertx_size], 0); - sum_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) { TX_TYPE best_tx = DCT_DCT; @@ -3616,8 +3810,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, cm->fc->supertx_prob[partition_supertx_context_lookup [PARTITION_SPLIT]][supertx_size], 1); - tmp_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist); + tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist); if (tmp_rdc.rdcost < sum_rdc.rdcost) { sum_rdc = tmp_rdc; update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx, @@ -3632,7 +3825,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) { sum_rdc.rate += partition_cost[PARTITION_SPLIT]; - sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); #if CONFIG_SUPERTX sum_rate_nocoef += partition_cost[PARTITION_SPLIT]; #endif // CONFIG_SUPERTX @@ -3725,14 +3918,14 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, best_rdc.rdcost - sum_rdc.rdcost); #endif // CONFIG_SUPERTX -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) { update_state(cpi, td, &pc_tree->horizontal[1], mi_row + mi_step, mi_col, subsize, DRY_RUN_NORMAL); encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row + mi_step, mi_col, subsize, NULL); } -#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 +#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4 if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; @@ -3746,24 +3939,31 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_SUPERTX sum_rate_nocoef += this_rate_nocoef; #endif // CONFIG_SUPERTX -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 sum_rdc.dist_y += this_rdc.dist_y; #endif } -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) { - int use_activity_masking = 0; - int64_t daala_dist; + int64_t dist_8x8; const int src_stride = x->plane[0].src.stride; - daala_dist = av1_daala_dist(x->plane[0].src.buf - 4 * src_stride, - src_stride, x->decoded_8x8, 8, 8, 8, 1, - use_activity_masking, x->qindex) - << 4; - sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist; - sum_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + uint8_t *decoded_8x8; + +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8); + else +#endif + decoded_8x8 = (uint8_t *)x->decoded_8x8; + + dist_8x8 = av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4 * src_stride, + src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, + 8, x->qindex) + << 4; + sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8; + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); } -#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 +#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4 } #if CONFIG_SUPERTX @@ -3777,7 +3977,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, cm->fc->supertx_prob[partition_supertx_context_lookup[PARTITION_HORZ]] [supertx_size], 0); - sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) { TX_TYPE best_tx = DCT_DCT; @@ -3795,8 +3995,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, ->supertx_prob[partition_supertx_context_lookup[PARTITION_HORZ]] [supertx_size], 1); - tmp_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist); + tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist); if (tmp_rdc.rdcost < sum_rdc.rdcost) { sum_rdc = tmp_rdc; update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx, @@ -3810,7 +4009,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, if (sum_rdc.rdcost < best_rdc.rdcost) { sum_rdc.rate += partition_cost[PARTITION_HORZ]; - sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); #if CONFIG_SUPERTX sum_rate_nocoef += partition_cost[PARTITION_HORZ]; #endif // CONFIG_SUPERTX @@ -3899,14 +4098,14 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, best_rdc.rdcost - sum_rdc.rdcost); #endif // CONFIG_SUPERTX -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) { update_state(cpi, td, &pc_tree->vertical[1], mi_row, mi_col + mi_step, subsize, DRY_RUN_NORMAL); encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col + mi_step, subsize, NULL); } -#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 +#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4 if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; @@ -3920,24 +4119,31 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_SUPERTX sum_rate_nocoef += this_rate_nocoef; #endif // CONFIG_SUPERTX -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 sum_rdc.dist_y += this_rdc.dist_y; #endif } -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) { - int use_activity_masking = 0; - int64_t daala_dist; + int64_t dist_8x8; const int src_stride = x->plane[0].src.stride; - daala_dist = - av1_daala_dist(x->plane[0].src.buf - 4, src_stride, x->decoded_8x8, - 8, 8, 8, 1, use_activity_masking, x->qindex) + uint8_t *decoded_8x8; + +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8); + else +#endif + decoded_8x8 = (uint8_t *)x->decoded_8x8; + + dist_8x8 = + av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4, src_stride, + decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8, x->qindex) << 4; - sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist; - sum_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8; + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); } -#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 +#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4 } #if CONFIG_SUPERTX if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && !abort_flag) { @@ -3950,7 +4156,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, cm->fc->supertx_prob[partition_supertx_context_lookup[PARTITION_VERT]] [supertx_size], 0); - sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) { TX_TYPE best_tx = DCT_DCT; @@ -3968,8 +4174,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, ->supertx_prob[partition_supertx_context_lookup[PARTITION_VERT]] [supertx_size], 1); - tmp_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist); + tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist); if (tmp_rdc.rdcost < sum_rdc.rdcost) { sum_rdc = tmp_rdc; update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx, @@ -3983,7 +4188,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, if (sum_rdc.rdcost < best_rdc.rdcost) { sum_rdc.rate += partition_cost[PARTITION_VERT]; - sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); #if CONFIG_SUPERTX sum_rate_nocoef += partition_cost[PARTITION_VERT]; #endif // CONFIG_SUPERTX @@ -4060,14 +4265,139 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, bsize2, mi_row + mi_step, mi_col + mi_step, bsize2); restore_context(x, &x_ctx, mi_row, mi_col, bsize); } + + // PARTITION_HORZ_4 + // TODO(david.barker): For this and PARTITION_VERT_4, + // * Add support for BLOCK_16X16 once we support 2x8 and 8x2 blocks for the + // chroma plane + // * Add support for supertx + if (bsize == BLOCK_32X32 && partition_horz_allowed && !force_horz_split && + (do_rectangular_split || av1_active_h_edge(cpi, mi_row, mi_step))) { + int i; + const int quarter_step = mi_size_high[bsize] / 4; + PICK_MODE_CONTEXT *ctx_prev = ctx_none; + + subsize = get_subsize(bsize, PARTITION_HORZ_4); + av1_zero(sum_rdc); + + for (i = 0; i < 4; ++i) { + int this_mi_row = mi_row + i * quarter_step; + + if (i > 0 && this_mi_row >= cm->mi_rows) break; + + if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_prev); + + ctx_prev = &pc_tree->horizontal4[i]; + + rd_pick_sb_modes(cpi, tile_data, x, this_mi_row, mi_col, &this_rdc, + PARTITION_HORZ_4, subsize, ctx_prev, + best_rdc.rdcost - sum_rdc.rdcost); + + if (this_rdc.rate == INT_MAX) { + sum_rdc.rdcost = INT64_MAX; + break; + } else { + sum_rdc.rate += this_rdc.rate; + sum_rdc.dist += this_rdc.dist; + sum_rdc.rdcost += this_rdc.rdcost; + } + + if (sum_rdc.rdcost >= best_rdc.rdcost) break; + + if (i < 3) { + update_state(cpi, td, ctx_prev, this_mi_row, mi_col, subsize, 1); + encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, this_mi_row, mi_col, + subsize, NULL); + } + } + + if (sum_rdc.rdcost < best_rdc.rdcost) { + sum_rdc.rate += partition_cost[PARTITION_HORZ_4]; + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); + if (sum_rdc.rdcost < best_rdc.rdcost) { + best_rdc = sum_rdc; + pc_tree->partitioning = PARTITION_HORZ_4; + } + } +#if !CONFIG_PVQ + restore_context(x, &x_ctx, mi_row, mi_col, bsize); +#else + restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize); +#endif + } + // PARTITION_VERT_4 + if (bsize == BLOCK_32X32 && partition_vert_allowed && !force_vert_split && + (do_rectangular_split || av1_active_v_edge(cpi, mi_row, mi_step))) { + int i; + const int quarter_step = mi_size_wide[bsize] / 4; + PICK_MODE_CONTEXT *ctx_prev = ctx_none; + + subsize = get_subsize(bsize, PARTITION_VERT_4); + av1_zero(sum_rdc); + + for (i = 0; i < 4; ++i) { + int this_mi_col = mi_col + i * quarter_step; + + if (i > 0 && this_mi_col >= cm->mi_cols) break; + + if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_prev); + + ctx_prev = &pc_tree->vertical4[i]; + + rd_pick_sb_modes(cpi, tile_data, x, mi_row, this_mi_col, &this_rdc, + PARTITION_VERT_4, subsize, ctx_prev, + best_rdc.rdcost - sum_rdc.rdcost); + + if (this_rdc.rate == INT_MAX) { + sum_rdc.rdcost = INT64_MAX; + } else { + sum_rdc.rate += this_rdc.rate; + sum_rdc.dist += this_rdc.dist; + sum_rdc.rdcost += this_rdc.rdcost; + } + + if (sum_rdc.rdcost >= best_rdc.rdcost) break; + + if (i < 3) { + update_state(cpi, td, ctx_prev, mi_row, this_mi_col, subsize, 1); + encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, this_mi_col, + subsize, NULL); + } + } + + if (sum_rdc.rdcost < best_rdc.rdcost) { + sum_rdc.rate += partition_cost[PARTITION_VERT_4]; + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); + if (sum_rdc.rdcost < best_rdc.rdcost) { + best_rdc = sum_rdc; + pc_tree->partitioning = PARTITION_VERT_4; + } + } +#if !CONFIG_PVQ + restore_context(x, &x_ctx, mi_row, mi_col, bsize); +#else + restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize); +#endif + } #endif // CONFIG_EXT_PARTITION_TYPES +#if CONFIG_SPEED_REFS + // First scanning is done. + if (cpi->sb_scanning_pass_idx == 0 && bsize == cm->sb_size) return; +#endif // CONFIG_SPEED_REFS + // TODO(jbb): This code added so that we avoid static analysis // warning related to the fact that best_rd isn't used after this // point. This code should be refactored so that the duplicate // checks occur in some sub function and thus are used... (void)best_rd; *rd_cost = best_rdc; + +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if (bsize <= BLOCK_8X8 && rd_cost->rate != INT_MAX) { + assert(rd_cost->dist_y < INT64_MAX); + } +#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4 #if CONFIG_SUPERTX *rate_nocoef = best_rate_nocoef; #endif // CONFIG_SUPERTX @@ -4093,13 +4423,13 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, x->cfl_store_y = 0; #endif -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && bsize == BLOCK_4X4 && pc_tree->index == 3) { encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, pc_tree, NULL); } -#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 +#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4 if (bsize == cm->sb_size) { #if !CONFIG_PVQ && !CONFIG_LV_MAP @@ -4112,6 +4442,22 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, } } +#if CONFIG_SPEED_REFS +static void restore_mi(const AV1_COMP *const cpi, MACROBLOCK *const x, + int mi_row, int mi_col) { + const AV1_COMMON *cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + set_mode_info_offsets(cpi, x, xd, mi_row, mi_col); + int x_idx, y; + for (y = 0; y < mi_size_high[cm->sb_size]; y++) + for (x_idx = 0; x_idx < mi_size_wide[cm->sb_size]; x_idx++) + if (mi_col + x_idx < cm->mi_cols && mi_row + y < cm->mi_rows) { + memset(xd->mi + y * cm->mi_stride + x_idx, 0, sizeof(*xd->mi)); + memset(x->mbmi_ext + y * cm->mi_cols + x_idx, 0, sizeof(*x->mbmi_ext)); + } +} +#endif // CONFIG_SPEED_REFS + static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, int mi_row, TOKENEXTRA **tp) { @@ -4157,8 +4503,6 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td, MODE_INFO **mi = cm->mi_grid_visible + idx_str; PC_TREE *const pc_root = td->pc_root[cm->mib_size_log2 - MIN_MIB_SIZE_LOG2]; - av1_update_boundary_info(cm, tile_info, mi_row, mi_col); - if (sf->adaptive_pred_interp_filter) { for (i = 0; i < leaf_nodes; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE; @@ -4258,12 +4602,35 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td, rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col, &x->min_partition_size, &x->max_partition_size); } +#if CONFIG_SPEED_REFS + // NOTE: Two scanning passes for the current superblock - the first pass + // is only targeted to collect stats. + int m_search_count_backup = *(x->m_search_count_ptr); + for (int sb_pass_idx = 0; sb_pass_idx < 2; ++sb_pass_idx) { + cpi->sb_scanning_pass_idx = sb_pass_idx; + if (frame_is_intra_only(cm) && sb_pass_idx == 0) continue; + + rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, cm->sb_size, + &dummy_rdc, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif // CONFIG_SUPERTX + INT64_MAX, pc_root); + if (sb_pass_idx == 0) { + av1_zero(x->pred_mv); + pc_root->index = 0; + restore_mi(cpi, x, mi_row, mi_col); + *(x->m_search_count_ptr) = m_search_count_backup; + } + } +#else // !CONFIG_SPEED_REFS rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, cm->sb_size, &dummy_rdc, #if CONFIG_SUPERTX &dummy_rate_nocoef, #endif // CONFIG_SUPERTX INT64_MAX, pc_root); +#endif // CONFIG_SPEED_REFS } } } @@ -4329,20 +4696,11 @@ static MV_REFERENCE_FRAME get_frame_type(const AV1_COMP *cpi) { return LAST_FRAME; } -static TX_MODE select_tx_mode(const AV1_COMP *cpi, MACROBLOCKD *const xd) { - int i, all_lossless = 1; - - if (cpi->common.seg.enabled) { - for (i = 0; i < MAX_SEGMENTS; ++i) { - if (!xd->lossless[i]) { - all_lossless = 0; - break; - } - } - } else { - all_lossless = xd->lossless[0]; - } - if (all_lossless) return ONLY_4X4; +static TX_MODE select_tx_mode(const AV1_COMP *cpi) { + if (cpi->common.all_lossless) return ONLY_4X4; +#if CONFIG_VAR_TX_NO_TX_MODE + return TX_MODE_SELECT; +#else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) return ALLOW_32X32 + CONFIG_TX64X64; else if (cpi->sf.tx_size_search_method == USE_FULL_RD || @@ -4350,6 +4708,7 @@ static TX_MODE select_tx_mode(const AV1_COMP *cpi, MACROBLOCKD *const xd) { return TX_MODE_SELECT; else return cpi->common.tx_mode; +#endif // CONFIG_VAR_TX_NO_TX_MODE } void av1_init_tile_data(AV1_COMP *cpi) { @@ -4372,7 +4731,7 @@ void av1_init_tile_data(AV1_COMP *cpi) { TileDataEnc *const tile_data = &cpi->tile_data[tile_row * tile_cols + tile_col]; int i, j; - for (i = 0; i < BLOCK_SIZES; ++i) { + for (i = 0; i < BLOCK_SIZES_ALL; ++i) { for (j = 0; j < MAX_MODES; ++j) { tile_data->thresh_freq_fact[i][j] = 32; tile_data->mode_map[i][j] = j; @@ -4415,12 +4774,8 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row, int mi_row; #if CONFIG_DEPENDENT_HORZTILES -#if CONFIG_TILE_GROUPS if ((!cm->dependent_horz_tiles) || (tile_row == 0) || tile_info->tg_horz_boundary) { -#else - if ((!cm->dependent_horz_tiles) || (tile_row == 0)) { -#endif av1_zero_above_context(cm, tile_info->mi_col_start, tile_info->mi_col_end); } #else @@ -4504,22 +4859,21 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row, #endif #endif // #if CONFIG_PVQ -#if CONFIG_EC_ADAPT this_tile->tctx = *cm->fc; td->mb.e_mbd.tile_ctx = &this_tile->tctx; -#endif // #if CONFIG_EC_ADAPT #if CONFIG_CFL MACROBLOCKD *const xd = &td->mb.e_mbd; xd->cfl = &this_tile->cfl; - cfl_init(xd->cfl, cm, xd->plane[AOM_PLANE_U].subsampling_x, - xd->plane[AOM_PLANE_U].subsampling_y); + cfl_init(xd->cfl, cm); #endif #if CONFIG_PVQ td->mb.daala_enc.state.adapt = &this_tile->tctx.pvq_context; #endif // CONFIG_PVQ + av1_setup_across_tile_boundary_info(cm, tile_info); + for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end; mi_row += cm->mib_size) { encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok); @@ -4656,6 +5010,36 @@ static int do_gm_search_logic(SPEED_FEATURES *const sf, int num_refs_using_gm, } #endif // CONFIG_GLOBAL_MOTION +#if CONFIG_PALETTE +// Estimate if the source frame is screen content, based on the portion of +// blocks that have no more than 4 (experimentally selected) luma colors. +static int is_screen_content(const uint8_t *src, +#if CONFIG_HIGHBITDEPTH + int use_hbd, int bd, +#endif // CONFIG_HIGHBITDEPTH + int stride, int width, int height) { + assert(src != NULL); + int counts = 0; + const int blk_w = 16; + const int blk_h = 16; + const int limit = 4; + for (int r = 0; r + blk_h <= height; r += blk_h) { + for (int c = 0; c + blk_w <= width; c += blk_w) { + const int n_colors = +#if CONFIG_HIGHBITDEPTH + use_hbd ? av1_count_colors_highbd(src + r * stride + c, stride, blk_w, + blk_h, bd) + : +#endif // CONFIG_HIGHBITDEPTH + av1_count_colors(src + r * stride + c, stride, blk_w, blk_h); + if (n_colors > 1 && n_colors <= limit) counts++; + } + } + // The threshold is 10%. + return counts * blk_h * blk_w * 10 > width * height; +} +#endif // CONFIG_PALETTE + static void encode_frame_internal(AV1_COMP *cpi) { ThreadData *const td = &cpi->td; MACROBLOCK *const x = &td->mb; @@ -4682,6 +5066,23 @@ static void encode_frame_internal(AV1_COMP *cpi) { av1_zero(rdc->coef_counts); av1_zero(rdc->comp_pred_diff); +#if CONFIG_PALETTE || CONFIG_INTRABC + if (frame_is_intra_only(cm)) { +#if CONFIG_PALETTE + cm->allow_screen_content_tools = + cpi->oxcf.content == AOM_CONTENT_SCREEN || + is_screen_content(cpi->source->y_buffer, +#if CONFIG_HIGHBITDEPTH + cpi->source->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd, +#endif // CONFIG_HIGHBITDEPTH + cpi->source->y_stride, cpi->source->y_width, + cpi->source->y_height); +#else + cm->allow_screen_content_tools = cpi->oxcf.content == AOM_CONTENT_SCREEN; +#endif // CONFIG_PALETTE + } +#endif // CONFIG_PALETTE || CONFIG_INTRABC + #if CONFIG_GLOBAL_MOTION av1_zero(rdc->global_motion_used); av1_zero(cpi->gmparams_cost); @@ -4709,6 +5110,8 @@ static void encode_frame_internal(AV1_COMP *cpi) { memcpy(&cm->global_motion[frame], &cm->global_motion[pframe], sizeof(WarpedMotionParams)); } else if (ref_buf[frame] && + ref_buf[frame]->y_crop_width == cpi->source->y_crop_width && + ref_buf[frame]->y_crop_height == cpi->source->y_crop_height && do_gm_search_logic(&cpi->sf, num_refs_using_gm, frame)) { TransformationType model; const int64_t ref_frame_error = av1_frame_error( @@ -4716,8 +5119,8 @@ static void encode_frame_internal(AV1_COMP *cpi) { xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd, #endif // CONFIG_HIGHBITDEPTH ref_buf[frame]->y_buffer, ref_buf[frame]->y_stride, - cpi->source->y_buffer, 0, 0, cpi->source->y_width, - cpi->source->y_height, cpi->source->y_stride); + cpi->source->y_buffer, cpi->source->y_width, cpi->source->y_height, + cpi->source->y_stride); if (ref_frame_error == 0) continue; @@ -4752,7 +5155,8 @@ static void encode_frame_internal(AV1_COMP *cpi) { ref_buf[frame]->y_buffer, ref_buf[frame]->y_width, ref_buf[frame]->y_height, ref_buf[frame]->y_stride, cpi->source->y_buffer, cpi->source->y_width, - cpi->source->y_height, cpi->source->y_stride, 3); + cpi->source->y_height, cpi->source->y_stride, 5, + best_warp_error); if (warp_error < best_warp_error) { best_warp_error = warp_error; // Save the wm_params modified by refine_integerized_param() @@ -4812,10 +5216,10 @@ static void encode_frame_internal(AV1_COMP *cpi) { cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; xd->qindex[i] = qindex; } - + cm->all_lossless = all_lossless(cm, xd); if (!cm->seg.enabled && xd->lossless[0]) x->optimize = 0; - cm->tx_mode = select_tx_mode(cpi, xd); + cm->tx_mode = select_tx_mode(cpi); #if CONFIG_DELTA_Q // Fix delta q resolution for the moment @@ -4859,18 +5263,32 @@ static void encode_frame_internal(AV1_COMP *cpi) { #if CONFIG_TEMPMV_SIGNALING if (cm->prev_frame) { - cm->use_prev_frame_mvs &= !cm->error_resilient_mode && - cm->width == cm->prev_frame->buf.y_width && - cm->height == cm->prev_frame->buf.y_height && - !cm->intra_only && !cm->prev_frame->intra_only; + cm->use_prev_frame_mvs &= + !cm->error_resilient_mode && +#if CONFIG_FRAME_SUPERRES + cm->width == cm->last_width && cm->height == cm->last_height && +#else + cm->width == cm->prev_frame->buf.y_crop_width && + cm->height == cm->prev_frame->buf.y_crop_height && +#endif // CONFIG_FRAME_SUPERRES + !cm->intra_only && !cm->prev_frame->intra_only && cm->last_show_frame; } else { cm->use_prev_frame_mvs = 0; } #else - cm->use_prev_frame_mvs = !cm->error_resilient_mode && cm->prev_frame && - cm->width == cm->prev_frame->buf.y_crop_width && - cm->height == cm->prev_frame->buf.y_crop_height && - !cm->intra_only && cm->last_show_frame; + if (cm->prev_frame) { + cm->use_prev_frame_mvs = !cm->error_resilient_mode && +#if CONFIG_FRAME_SUPERRES + cm->width == cm->last_width && + cm->height == cm->last_height && +#else + cm->width == cm->prev_frame->buf.y_crop_width && + cm->height == cm->prev_frame->buf.y_crop_height && +#endif // CONFIG_FRAME_SUPERRES + !cm->intra_only && cm->last_show_frame; + } else { + cm->use_prev_frame_mvs = 0; + } #endif // CONFIG_TEMPMV_SIGNALING // Special case: set prev_mi to NULL when the previous mode info @@ -4894,6 +5312,8 @@ static void encode_frame_internal(AV1_COMP *cpi) { } #endif + av1_setup_frame_boundary_info(cm); + // If allowed, encoding tiles in parallel with one thread handling one tile. // TODO(geza.lore): The multi-threaded encoder is not safe with more than // 1 tile rows, as it uses the single above_context et al arrays from @@ -4921,7 +5341,11 @@ static void make_consistent_compound_tools(AV1_COMMON *cm) { cm->allow_interintra_compound = 0; #endif // CONFIG_INTERINTRA #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE +#if CONFIG_COMPOUND_SINGLEREF + if (frame_is_intra_only(cm)) +#else // !CONFIG_COMPOUND_SINGLEREF if (frame_is_intra_only(cm) || cm->reference_mode == SINGLE_REFERENCE) +#endif // CONFIG_COMPOUND_SINGLEREF cm->allow_masked_compound = 0; #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE } @@ -4942,14 +5366,14 @@ void av1_encode_frame(AV1_COMP *cpi) { // side behavior is where the ALT ref buffer has opposite sign bias to // the other two. if (!frame_is_intra_only(cm)) { -#if !CONFIG_ONE_SIDED_COMPOUND +#if !(CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS) if ((cm->ref_frame_sign_bias[ALTREF_FRAME] == cm->ref_frame_sign_bias[GOLDEN_FRAME]) || (cm->ref_frame_sign_bias[ALTREF_FRAME] == cm->ref_frame_sign_bias[LAST_FRAME])) { cpi->allow_comp_inter_inter = 0; } else { -#endif +#endif // !(CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS) cpi->allow_comp_inter_inter = 1; #if CONFIG_EXT_REFS cm->comp_fwd_ref[0] = LAST_FRAME; @@ -4962,10 +5386,11 @@ void av1_encode_frame(AV1_COMP *cpi) { cm->comp_fixed_ref = ALTREF_FRAME; cm->comp_var_ref[0] = LAST_FRAME; cm->comp_var_ref[1] = GOLDEN_FRAME; -#endif // CONFIG_EXT_REFS -#if !CONFIG_ONE_SIDED_COMPOUND // Normative in encoder +#endif // CONFIG_EXT_REFS +#if !(CONFIG_ONE_SIDED_COMPOUND || \ + CONFIG_EXT_COMP_REFS) // Normative in encoder } -#endif +#endif // !(CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS) } else { cpi->allow_comp_inter_inter = 0; } @@ -4998,7 +5423,12 @@ void av1_encode_frame(AV1_COMP *cpi) { else cm->reference_mode = REFERENCE_MODE_SELECT; #else +#if CONFIG_BGSPRITE + (void)is_alt_ref; + if (!cpi->allow_comp_inter_inter) +#else if (is_alt_ref || !cpi->allow_comp_inter_inter) +#endif // CONFIG_BGSPRITE cm->reference_mode = SINGLE_REFERENCE; else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] && mode_thrs[COMPOUND_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT] && @@ -5017,25 +5447,23 @@ void av1_encode_frame(AV1_COMP *cpi) { #if CONFIG_EXT_INTER make_consistent_compound_tools(cm); #endif // CONFIG_EXT_INTER + + rdc->single_ref_used_flag = 0; + rdc->compound_ref_used_flag = 0; + encode_frame_internal(cpi); for (i = 0; i < REFERENCE_MODES; ++i) mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2; if (cm->reference_mode == REFERENCE_MODE_SELECT) { - int single_count_zero = 0; - int comp_count_zero = 0; - - for (i = 0; i < COMP_INTER_CONTEXTS; i++) { - single_count_zero += counts->comp_inter[i][0]; - comp_count_zero += counts->comp_inter[i][1]; - } - - if (comp_count_zero == 0) { + // Use a flag that includes 4x4 blocks + if (rdc->compound_ref_used_flag == 0) { cm->reference_mode = SINGLE_REFERENCE; av1_zero(counts->comp_inter); #if !CONFIG_REF_ADAPT - } else if (single_count_zero == 0) { + // Use a flag that includes 4x4 blocks + } else if (rdc->single_ref_used_flag == 0) { cm->reference_mode = COMPOUND_REFERENCE; av1_zero(counts->comp_inter); #endif // !CONFIG_REF_ADAPT @@ -5046,10 +5474,15 @@ void av1_encode_frame(AV1_COMP *cpi) { #endif // CONFIG_EXT_INTER #if CONFIG_VAR_TX +#if CONFIG_RECT_TX_EXT + if (cm->tx_mode == TX_MODE_SELECT && cpi->td.mb.txb_split_count == 0 && + counts->quarter_tx_size[1] == 0) +#else if (cm->tx_mode == TX_MODE_SELECT && cpi->td.mb.txb_split_count == 0) +#endif cm->tx_mode = ALLOW_32X32 + CONFIG_TX64X64; #else -#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#if CONFIG_RECT_TX_EXT && CONFIG_EXT_TX if (cm->tx_mode == TX_MODE_SELECT && counts->quarter_tx_size[1] == 0) { #else if (cm->tx_mode == TX_MODE_SELECT) { @@ -5232,12 +5665,20 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd, const MODE_INFO *left_mi, const int intraonly, const int mi_row, const int mi_col) { const MB_MODE_INFO *const mbmi = &mi->mbmi; +#if CONFIG_ENTROPY_STATS const PREDICTION_MODE y_mode = mbmi->mode; - const PREDICTION_MODE uv_mode = mbmi->uv_mode; + const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode; +#else // CONFIG_ENTROPY_STATS + (void)counts; + (void)above_mi; + (void)left_mi; + (void)intraonly; +#endif // CONFIG_ENTROPY_STATS const BLOCK_SIZE bsize = mbmi->sb_type; const int unify_bsize = CONFIG_CB4X4; if (bsize < BLOCK_8X8 && !unify_bsize) { +#if CONFIG_ENTROPY_STATS int idx, idy; const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; @@ -5253,7 +5694,9 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd, ++counts->y_mode[0][bmode]; } } +#endif // CONFIG_ENTROPY_STATS } else { +#if CONFIG_ENTROPY_STATS if (intraonly) { const PREDICTION_MODE above = av1_above_block_mode(mi, above_mi, 0); const PREDICTION_MODE left = av1_left_block_mode(mi, left_mi, 0); @@ -5261,6 +5704,7 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd, } else { ++counts->y_mode[size_group_lookup[bsize]][y_mode]; } +#endif // CONFIG_ENTROPY_STATS #if CONFIG_FILTER_INTRA if (mbmi->mode == DC_PRED #if CONFIG_PALETTE @@ -5271,7 +5715,7 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd, mbmi->filter_intra_mode_info.use_filter_intra_mode[0]; ++counts->filter_intra[0][use_filter_intra_mode]; } - if (mbmi->uv_mode == DC_PRED + if (mbmi->uv_mode == UV_DC_PRED #if CONFIG_CB4X4 && is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x, @@ -5306,7 +5750,9 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd, (void)mi_col; (void)xd; #endif +#if CONFIG_ENTROPY_STATS ++counts->uv_mode[y_mode][uv_mode]; +#endif // CONFIG_ENTROPY_STATS } #if CONFIG_VAR_TX @@ -5325,9 +5771,17 @@ static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd, if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; +#if CONFIG_RECT_TX_EXT + if (tx_size == plane_tx_size || + mbmi->tx_size == quarter_txsize_lookup[mbmi->sb_type]) { +#else if (tx_size == plane_tx_size) { +#endif ++counts->txfm_partition[ctx][0]; - mbmi->tx_size = tx_size; +#if CONFIG_RECT_TX_EXT + if (tx_size == plane_tx_size) +#endif + mbmi->tx_size = tx_size; txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, tx_size, tx_size); } else { @@ -5438,18 +5892,22 @@ static void tx_partition_set_contexts(const AV1_COMMON *const cm, void av1_update_tx_type_count(const AV1_COMMON *cm, MACROBLOCKD *xd, #if CONFIG_TXK_SEL - int block, int plane, + int blk_row, int blk_col, int block, int plane, #endif BLOCK_SIZE bsize, TX_SIZE tx_size, FRAME_COUNTS *counts) { MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; int is_inter = is_inter_block(mbmi); + #if !CONFIG_TXK_SEL TX_TYPE tx_type = mbmi->tx_type; #else + (void)blk_row; + (void)blk_col; // Only y plane's tx_type is updated if (plane > 0) return; - TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size); + TX_TYPE tx_type = + av1_get_tx_type(PLANE_TYPE_Y, xd, blk_row, blk_col, block, tx_size); #endif #if CONFIG_EXT_TX if (get_ext_tx_types(tx_size, bsize, is_inter, cm->reduced_tx_set_used) > 1 && @@ -5509,7 +5967,7 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, x->pvq_coded = (dry_run == OUTPUT_ENABLED) ? 1 : 0; #endif #if CONFIG_CFL - x->cfl_store_y = (dry_run == OUTPUT_ENABLED) ? 1 : 0; + x->cfl_store_y = 1; #endif if (!is_inter) { @@ -5526,13 +5984,8 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_PALETTE if (bsize >= BLOCK_8X8 && !dry_run) { for (plane = 0; plane <= 1; ++plane) { - if (mbmi->palette_mode_info.palette_size[plane] > 0) { - mbmi->palette_mode_info.palette_first_color_idx[plane] = - xd->plane[plane].color_index_map[0]; - // TODO(huisu): this increases the use of token buffer. Needs stretch - // test to verify. + if (mbmi->palette_mode_info.palette_size[plane] > 0) av1_tokenize_palette_sb(cpi, td, plane, t, dry_run, bsize, rate); - } } } #endif // CONFIG_PALETTE @@ -5559,9 +6012,21 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, &xd->block_refs[ref]->sf); } - av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, block_size); +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // Single ref compound mode + if (!is_compound && is_inter_singleref_comp_mode(mbmi->mode)) { + xd->block_refs[1] = xd->block_refs[0]; + YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[0]); +#if CONFIG_INTRABC + assert(IMPLIES(!is_intrabc_block(mbmi), cfg)); +#else + assert(cfg != NULL); +#endif // !CONFIG_INTRABC + av1_setup_pre_planes(xd, 1, cfg, mi_row, mi_col, &xd->block_refs[1]->sf); + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF - av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, NULL, block_size); + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, block_size); #if CONFIG_MOTION_VAR if (mbmi->motion_mode == OBMC_CAUSAL) { #if CONFIG_NCOBMC @@ -5587,10 +6052,11 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, #endif } -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (bsize < BLOCK_8X8) { - daala_dist_set_sub8x8_dst(x, x->decoded_8x8, bsize, block_size_wide[bsize], - block_size_high[bsize], mi_row, mi_col); + dist_8x8_set_sub8x8_dst(x, (uint8_t *)x->decoded_8x8, bsize, + block_size_wide[bsize], block_size_high[bsize], + mi_row, mi_col); } #endif @@ -5629,13 +6095,16 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, ++td->counts->tx_size[tx_size_cat][tx_size_ctx][depth]; #endif -#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT + +#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX) if (is_quarter_tx_allowed(xd, mbmi, is_inter) && - mbmi->tx_size != txsize_sqr_up_map[mbmi->tx_size]) { - ++td->counts->quarter_tx_size[mbmi->tx_size == - quarter_txsize_lookup[mbmi->sb_type]]; + quarter_txsize_lookup[bsize] != max_txsize_rect_lookup[bsize] && + (mbmi->tx_size == quarter_txsize_lookup[bsize] || + mbmi->tx_size == max_txsize_rect_lookup[bsize])) { + ++td->counts + ->quarter_tx_size[mbmi->tx_size == quarter_txsize_lookup[bsize]]; } -#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#endif #if CONFIG_EXT_TX && CONFIG_RECT_TX assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(xd, mbmi))); #endif // CONFIG_EXT_TX && CONFIG_RECT_TX @@ -5673,8 +6142,8 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, } ++td->counts->tx_size_totals[txsize_sqr_map[tx_size]]; - ++td->counts - ->tx_size_totals[txsize_sqr_map[get_uv_tx_size(mbmi, &xd->plane[1])]]; + ++td->counts->tx_size_totals[txsize_sqr_map[av1_get_uv_tx_size( + mbmi, &xd->plane[1])]]; #if !CONFIG_TXK_SEL av1_update_tx_type_count(cm, xd, bsize, tx_size, td->counts); #endif @@ -5837,7 +6306,7 @@ static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_EXT_INTER int mi_row_ori, int mi_col_ori, #endif // CONFIG_EXT_INTER - int mi_row_pred, int mi_col_pred, + int mi_row_pred, int mi_col_pred, int plane, BLOCK_SIZE bsize_pred, int b_sub8x8, int block) { // Used in supertx // (mi_row_ori, mi_col_ori): location for mv @@ -5859,28 +6328,39 @@ static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td, &xd->block_refs[ref]->sf); } +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // Single ref compound mode + if (!is_compound && is_inter_singleref_comp_mode(mbmi->mode)) { + xd->block_refs[1] = xd->block_refs[0]; + YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[0]); + av1_setup_pre_planes(xd, 1, cfg, mi_row_pred, mi_col_pred, + &xd->block_refs[1]->sf); + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!b_sub8x8) - av1_build_inter_predictors_sb_extend(cm, xd, + av1_build_inter_predictor_sb_extend(cm, xd, #if CONFIG_EXT_INTER - mi_row_ori, mi_col_ori, + mi_row_ori, mi_col_ori, #endif // CONFIG_EXT_INTER - mi_row_pred, mi_col_pred, bsize_pred); + mi_row_pred, mi_col_pred, plane, + bsize_pred); else - av1_build_inter_predictors_sb_sub8x8_extend(cm, xd, + av1_build_inter_predictor_sb_sub8x8_extend(cm, xd, #if CONFIG_EXT_INTER - mi_row_ori, mi_col_ori, + mi_row_ori, mi_col_ori, #endif // CONFIG_EXT_INTER - mi_row_pred, mi_col_pred, - bsize_pred, block); + mi_row_pred, mi_col_pred, plane, + bsize_pred, block); } static void predict_b_extend(const AV1_COMP *const cpi, ThreadData *td, const TileInfo *const tile, int block, int mi_row_ori, int mi_col_ori, int mi_row_pred, int mi_col_pred, int mi_row_top, int mi_col_top, - uint8_t *dst_buf[3], int dst_stride[3], + int plane, uint8_t *dst_buf, int dst_stride, BLOCK_SIZE bsize_top, BLOCK_SIZE bsize_pred, - RUN_TYPE dry_run, int b_sub8x8, int bextend) { + RUN_TYPE dry_run, int b_sub8x8) { // Used in supertx // (mi_row_ori, mi_col_ori): location for mv // (mi_row_pred, mi_col_pred, bsize_pred): region to predict @@ -5905,34 +6385,27 @@ static void predict_b_extend(const AV1_COMP *const cpi, ThreadData *td, set_offsets_extend(cpi, td, tile, mi_row_pred, mi_col_pred, mi_row_ori, mi_col_ori, bsize_pred); - xd->plane[0].dst.stride = dst_stride[0]; - xd->plane[1].dst.stride = dst_stride[1]; - xd->plane[2].dst.stride = dst_stride[2]; - xd->plane[0].dst.buf = dst_buf[0] + - (r >> xd->plane[0].subsampling_y) * dst_stride[0] + - (c >> xd->plane[0].subsampling_x); - xd->plane[1].dst.buf = dst_buf[1] + - (r >> xd->plane[1].subsampling_y) * dst_stride[1] + - (c >> xd->plane[1].subsampling_x); - xd->plane[2].dst.buf = dst_buf[2] + - (r >> xd->plane[2].subsampling_y) * dst_stride[2] + - (c >> xd->plane[2].subsampling_x); + xd->plane[plane].dst.stride = dst_stride; + xd->plane[plane].dst.buf = + dst_buf + (r >> xd->plane[plane].subsampling_y) * dst_stride + + (c >> xd->plane[plane].subsampling_x); predict_superblock(cpi, td, #if CONFIG_EXT_INTER mi_row_ori, mi_col_ori, #endif // CONFIG_EXT_INTER - mi_row_pred, mi_col_pred, bsize_pred, b_sub8x8, block); + mi_row_pred, mi_col_pred, plane, bsize_pred, b_sub8x8, + block); - if (!dry_run && !bextend) + if (!dry_run && (plane == 0) && (block == 0 || !b_sub8x8)) update_stats(&cpi->common, td, mi_row_pred, mi_col_pred, 1); } static void extend_dir(const AV1_COMP *const cpi, ThreadData *td, const TileInfo *const tile, int block, BLOCK_SIZE bsize, - BLOCK_SIZE top_bsize, int mi_row, int mi_col, - int mi_row_top, int mi_col_top, RUN_TYPE dry_run, - uint8_t *dst_buf[3], int dst_stride[3], int dir) { + BLOCK_SIZE top_bsize, int mi_row_ori, int mi_col_ori, + int mi_row, int mi_col, int mi_row_top, int mi_col_top, + int plane, uint8_t *dst_buf, int dst_stride, int dir) { // dir: 0-lower, 1-upper, 2-left, 3-right // 4-lowerleft, 5-upperleft, 6-lowerright, 7-upperright MACROBLOCKD *xd = &td->mb.e_mbd; @@ -5973,10 +6446,10 @@ static void extend_dir(const AV1_COMP *const cpi, ThreadData *td, for (j = 0; j < mi_height + ext_offset; j += high_unit) for (i = 0; i < mi_width + ext_offset; i += wide_unit) - predict_b_extend(cpi, td, tile, block, mi_row, mi_col, mi_row_pred + j, - mi_col_pred + i, mi_row_top, mi_col_top, dst_buf, - dst_stride, top_bsize, extend_bsize, dry_run, b_sub8x8, - 1); + predict_b_extend(cpi, td, tile, block, mi_row_ori, mi_col_ori, + mi_row_pred + j, mi_col_pred + i, mi_row_top, + mi_col_top, plane, dst_buf, dst_stride, top_bsize, + extend_bsize, 1, b_sub8x8); } else if (dir == 2 || dir == 3) { // left and right extend_bsize = (mi_height == mi_size_high[BLOCK_8X8] || bsize < BLOCK_8X8 || yss < xss) @@ -5996,10 +6469,10 @@ static void extend_dir(const AV1_COMP *const cpi, ThreadData *td, for (j = 0; j < mi_height + ext_offset; j += high_unit) for (i = 0; i < mi_width + ext_offset; i += wide_unit) - predict_b_extend(cpi, td, tile, block, mi_row, mi_col, mi_row_pred + j, - mi_col_pred + i, mi_row_top, mi_col_top, dst_buf, - dst_stride, top_bsize, extend_bsize, dry_run, b_sub8x8, - 1); + predict_b_extend(cpi, td, tile, block, mi_row_ori, mi_col_ori, + mi_row_pred + j, mi_col_pred + i, mi_row_top, + mi_col_top, plane, dst_buf, dst_stride, top_bsize, + extend_bsize, 1, b_sub8x8); } else { extend_bsize = BLOCK_8X8; #if CONFIG_CB4X4 @@ -6018,35 +6491,24 @@ static void extend_dir(const AV1_COMP *const cpi, ThreadData *td, for (j = 0; j < mi_height + ext_offset; j += high_unit) for (i = 0; i < mi_width + ext_offset; i += wide_unit) - predict_b_extend(cpi, td, tile, block, mi_row, mi_col, mi_row_pred + j, - mi_col_pred + i, mi_row_top, mi_col_top, dst_buf, - dst_stride, top_bsize, extend_bsize, dry_run, b_sub8x8, - 1); + predict_b_extend(cpi, td, tile, block, mi_row_ori, mi_col_ori, + mi_row_pred + j, mi_col_pred + i, mi_row_top, + mi_col_top, plane, dst_buf, dst_stride, top_bsize, + extend_bsize, 1, b_sub8x8); } } static void extend_all(const AV1_COMP *const cpi, ThreadData *td, const TileInfo *const tile, int block, BLOCK_SIZE bsize, - BLOCK_SIZE top_bsize, int mi_row, int mi_col, - int mi_row_top, int mi_col_top, RUN_TYPE dry_run, - uint8_t *dst_buf[3], int dst_stride[3]) { + BLOCK_SIZE top_bsize, int mi_row_ori, int mi_col_ori, + int mi_row, int mi_col, int mi_row_top, int mi_col_top, + int plane, uint8_t *dst_buf, int dst_stride) { assert(block >= 0 && block < 4); - extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride, 0); - extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride, 1); - extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride, 2); - extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride, 3); - extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride, 4); - extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride, 5); - extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride, 6); - extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride, 7); + for (int i = 0; i < 8; ++i) { + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row_ori, mi_col_ori, + mi_row, mi_col, mi_row_top, mi_col_top, plane, dst_buf, + dst_stride, i); + } } // This function generates prediction for multiple blocks, between which @@ -6140,29 +6602,36 @@ static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td, switch (partition) { case PARTITION_NONE: assert(bsize < top_bsize); - predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize, - bsize, dry_run, 0, 0); - extend_all(cpi, td, tile, 0, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride); + for (i = 0; i < MAX_MB_PLANE; ++i) { + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], dst_stride[i], + top_bsize, bsize, dry_run, 0); + extend_all(cpi, td, tile, 0, bsize, top_bsize, mi_row, mi_col, mi_row, + mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i]); + } break; case PARTITION_HORZ: if (bsize == BLOCK_8X8 && !unify_bsize) { - // Fisrt half - predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize, - BLOCK_8X8, dry_run, 1, 0); - if (bsize < top_bsize) - extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride); - - // Second half - predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf1, dst_stride1, - top_bsize, BLOCK_8X8, dry_run, 1, 1); - if (bsize < top_bsize) - extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf1, dst_stride1); + for (i = 0; i < MAX_MB_PLANE; ++i) { + // First half + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], dst_stride[i], + top_bsize, BLOCK_8X8, dry_run, 1); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i]); + + // Second half + predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf1[i], + dst_stride1[i], top_bsize, BLOCK_8X8, dry_run, 1); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf1[i], + dst_stride1[i]); + } // Smooth xd->plane[0].dst.buf = dst_buf[0]; @@ -6172,60 +6641,89 @@ static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td, mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_HORZ, 0); } else { - // First half - predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize, - subsize, dry_run, 0, 0); - if (bsize < top_bsize) - extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride); - else - extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride, 0); - - if (mi_row + hbs < cm->mi_rows) { - // Second half - predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col, mi_row + hbs, - mi_col, mi_row_top, mi_col_top, dst_buf1, - dst_stride1, top_bsize, subsize, dry_run, 0, 0); - if (bsize < top_bsize) - extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, - mi_col, mi_row_top, mi_col_top, dry_run, dst_buf1, - dst_stride1); - else - extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, - mi_col, mi_row_top, mi_col_top, dry_run, dst_buf1, - dst_stride1, 1); - - // Smooth - for (i = 0; i < MAX_MB_PLANE; i++) { + for (i = 0; i < MAX_MB_PLANE; ++i) { +#if CONFIG_CB4X4 + const struct macroblockd_plane *pd = &xd->plane[i]; + int handle_chroma_sub8x8 = need_handle_chroma_sub8x8( + subsize, pd->subsampling_x, pd->subsampling_y); + + if (handle_chroma_sub8x8) { + int mode_offset_row = CONFIG_CHROMA_SUB8X8 ? hbs : 0; + + predict_b_extend(cpi, td, tile, 0, mi_row + mode_offset_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, + dst_buf[i], dst_stride[i], top_bsize, bsize, + dry_run, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, bsize, top_bsize, + mi_row + mode_offset_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], dst_stride[i]); + } else { +#endif + // First half + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i], top_bsize, subsize, dry_run, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i]); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i], 0); xd->plane[i].dst.buf = dst_buf[i]; xd->plane[i].dst.stride = dst_stride[i]; - av1_build_masked_inter_predictor_complex( - xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], - mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize, - PARTITION_HORZ, i); + + if (mi_row + hbs < cm->mi_rows) { + // Second half + predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col, + mi_row + hbs, mi_col, mi_row_top, mi_col_top, i, + dst_buf1[i], dst_stride1[i], top_bsize, subsize, + dry_run, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, + mi_col, mi_row + hbs, mi_col, mi_row_top, mi_col_top, + i, dst_buf1[i], dst_stride1[i]); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, + mi_col, mi_row + hbs, mi_col, mi_row_top, mi_col_top, + i, dst_buf1[i], dst_stride1[i], 1); + // Smooth + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + av1_build_masked_inter_predictor_complex( + xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], + mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize, + PARTITION_HORZ, i); + } +#if CONFIG_CB4X4 } +#endif } } break; case PARTITION_VERT: if (bsize == BLOCK_8X8 && !unify_bsize) { - // First half - predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize, - BLOCK_8X8, dry_run, 1, 0); - if (bsize < top_bsize) - extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride); - - // Second half - predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf1, dst_stride1, - top_bsize, BLOCK_8X8, dry_run, 1, 1); - if (bsize < top_bsize) - extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf1, dst_stride1); + for (i = 0; i < MAX_MB_PLANE; ++i) { + // First half + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], dst_stride[i], + top_bsize, BLOCK_8X8, dry_run, 1); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i]); + + // Second half + predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf1[i], + dst_stride1[i], top_bsize, BLOCK_8X8, dry_run, 1); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf1[i], + dst_stride1[i]); + } // Smooth xd->plane[0].dst.buf = dst_buf[0]; @@ -6235,66 +6733,160 @@ static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td, mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_VERT, 0); } else { - // bsize: not important, not useful - predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize, - subsize, dry_run, 0, 0); - if (bsize < top_bsize) - extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride); - else - extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride, 3); - - if (mi_col + hbs < cm->mi_cols) { - predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, mi_row, - mi_col + hbs, mi_row_top, mi_col_top, dst_buf1, - dst_stride1, top_bsize, subsize, dry_run, 0, 0); - if (bsize < top_bsize) - extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, - mi_col + hbs, mi_row_top, mi_col_top, dry_run, dst_buf1, - dst_stride1); - else - extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, - mi_col + hbs, mi_row_top, mi_col_top, dry_run, dst_buf1, - dst_stride1, 2); - - for (i = 0; i < MAX_MB_PLANE; i++) { + for (i = 0; i < MAX_MB_PLANE; ++i) { +#if CONFIG_CB4X4 + const struct macroblockd_plane *pd = &xd->plane[i]; + int handle_chroma_sub8x8 = need_handle_chroma_sub8x8( + subsize, pd->subsampling_x, pd->subsampling_y); + + if (handle_chroma_sub8x8) { + int mode_offset_col = CONFIG_CHROMA_SUB8X8 ? hbs : 0; + + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + mode_offset_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, + dst_buf[i], dst_stride[i], top_bsize, bsize, + dry_run, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, bsize, top_bsize, mi_row, + mi_col + mode_offset_col, mi_row, mi_col, mi_row_top, + mi_col_top, i, dst_buf[i], dst_stride[i]); + } else { +#endif + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i], top_bsize, subsize, dry_run, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i]); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i], 3); xd->plane[i].dst.buf = dst_buf[i]; xd->plane[i].dst.stride = dst_stride[i]; - av1_build_masked_inter_predictor_complex( - xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], - mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize, - PARTITION_VERT, i); + + if (mi_col + hbs < cm->mi_cols) { + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, mi_row, + mi_col + hbs, mi_row_top, mi_col_top, i, + dst_buf1[i], dst_stride1[i], top_bsize, subsize, + dry_run, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, + mi_col + hbs, mi_row, mi_col + hbs, mi_row_top, + mi_col_top, i, dst_buf1[i], dst_stride1[i]); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, + mi_col + hbs, mi_row, mi_col + hbs, mi_row_top, + mi_col_top, i, dst_buf1[i], dst_stride1[i], 2); + + // smooth + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + av1_build_masked_inter_predictor_complex( + xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], + mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize, + PARTITION_VERT, i); + } +#if CONFIG_CB4X4 } +#endif } } break; case PARTITION_SPLIT: if (bsize == BLOCK_8X8 && !unify_bsize) { - predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize, - BLOCK_8X8, dry_run, 1, 0); - predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf1, dst_stride1, - top_bsize, BLOCK_8X8, dry_run, 1, 1); - predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf2, dst_stride2, - top_bsize, BLOCK_8X8, dry_run, 1, 1); - predict_b_extend(cpi, td, tile, 3, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf3, dst_stride3, - top_bsize, BLOCK_8X8, dry_run, 1, 1); - - if (bsize < top_bsize) { - extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride); - extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf1, dst_stride1); - extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf2, dst_stride2); - extend_all(cpi, td, tile, 3, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf3, dst_stride3); + for (i = 0; i < MAX_MB_PLANE; i++) { + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], dst_stride[i], + top_bsize, BLOCK_8X8, dry_run, 1); + predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf1[i], + dst_stride1[i], top_bsize, BLOCK_8X8, dry_run, 1); + predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf2[i], + dst_stride2[i], top_bsize, BLOCK_8X8, dry_run, 1); + predict_b_extend(cpi, td, tile, 3, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf3[i], + dst_stride3[i], top_bsize, BLOCK_8X8, dry_run, 1); + + if (bsize < top_bsize) { + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i]); + extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf1[i], + dst_stride1[i]); + extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf2[i], + dst_stride2[i]); + extend_all(cpi, td, tile, 3, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf3[i], + dst_stride3[i]); + } + } +#if CONFIG_CB4X4 + } else if (bsize == BLOCK_8X8) { + for (i = 0; i < MAX_MB_PLANE; i++) { + const struct macroblockd_plane *pd = &xd->plane[i]; + int handle_chroma_sub8x8 = need_handle_chroma_sub8x8( + subsize, pd->subsampling_x, pd->subsampling_y); + + if (handle_chroma_sub8x8) { + int mode_offset_row = + CONFIG_CHROMA_SUB8X8 && mi_row + hbs < cm->mi_rows ? hbs : 0; + int mode_offset_col = + CONFIG_CHROMA_SUB8X8 && mi_col + hbs < cm->mi_cols ? hbs : 0; + + predict_b_extend(cpi, td, tile, 0, mi_row + mode_offset_row, + mi_col + mode_offset_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i], top_bsize, BLOCK_8X8, dry_run, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, BLOCK_8X8, top_bsize, + mi_row + mode_offset_row, mi_col + mode_offset_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i]); + } else { + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i], top_bsize, subsize, dry_run, 0); + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, mi_row, + mi_col + hbs, mi_row_top, mi_col_top, i, + dst_buf1[i], dst_stride1[i], top_bsize, subsize, + dry_run, 0); + if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) + predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col, + mi_row + hbs, mi_col, mi_row_top, mi_col_top, i, + dst_buf2[i], dst_stride2[i], top_bsize, subsize, + dry_run, 0); + if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) + predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col + hbs, + mi_row + hbs, mi_col + hbs, mi_row_top, + mi_col_top, i, dst_buf3[i], dst_stride3[i], + top_bsize, subsize, dry_run, 0); + + if (bsize < top_bsize) { + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i]); + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, + mi_col + hbs, mi_row, mi_col + hbs, mi_row_top, + mi_col_top, i, dst_buf1[i], dst_stride1[i]); + if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, + mi_col, mi_row + hbs, mi_col, mi_row_top, mi_col_top, + i, dst_buf2[i], dst_stride2[i]); + if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, + mi_col + hbs, mi_row + hbs, mi_col + hbs, mi_row_top, + mi_col_top, i, dst_buf3[i], dst_stride3[i]); + } + } } +#endif } else { predict_sb_complex(cpi, td, tile, mi_row, mi_col, mi_row_top, mi_col_top, dry_run, subsize, top_bsize, dst_buf, @@ -6314,10 +6906,16 @@ static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td, pc_tree->split[3]); } for (i = 0; i < MAX_MB_PLANE; i++) { -#if !CONFIG_CB4X4 +#if CONFIG_CB4X4 + const struct macroblockd_plane *pd = &xd->plane[i]; + int handle_chroma_sub8x8 = need_handle_chroma_sub8x8( + subsize, pd->subsampling_x, pd->subsampling_y); + if (handle_chroma_sub8x8) continue; // Skip <4x4 chroma smoothing +#else if (bsize == BLOCK_8X8 && i != 0) continue; // Skip <4x4 chroma smoothing #endif + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) { av1_build_masked_inter_predictor_complex( xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], @@ -6334,9 +6932,6 @@ static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td, PARTITION_HORZ, i); } } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) { - if (bsize == BLOCK_8X8 && i != 0) - continue; // Skip <4x4 chroma smoothing - av1_build_masked_inter_predictor_complex( xd, dst_buf[i], dst_stride[i], dst_buf2[i], dst_stride2[i], mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize, @@ -6660,8 +7255,7 @@ static void rd_supertx_sb(const AV1_COMP *const cpi, ThreadData *td, *tmp_rate = av1_cost_bit(av1_get_skip_prob(cm, xd), 1); x->skip = 1; } else { - if (RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist) < - RDCOST(x->rdmult, x->rddiv, 0, sse)) { + if (RDCOST(x->rdmult, *tmp_rate, *tmp_dist) < RDCOST(x->rdmult, 0, sse)) { *tmp_rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 0); x->skip = 0; } else { @@ -6671,7 +7265,7 @@ static void rd_supertx_sb(const AV1_COMP *const cpi, ThreadData *td, } } *tmp_rate += base_rate; - rd_tx = RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist); + rd_tx = RDCOST(x->rdmult, *tmp_rate, *tmp_dist); if (rd_tx < bestrd_tx * 0.99 || tx_type == DCT_DCT) { *best_tx = tx_type; bestrd_tx = rd_tx; diff --git a/third_party/aom/av1/encoder/encodeframe.h b/third_party/aom/av1/encoder/encodeframe.h index 46a99e1cf..569ec9f72 100644 --- a/third_party/aom/av1/encoder/encodeframe.h +++ b/third_party/aom/av1/encoder/encodeframe.h @@ -37,7 +37,7 @@ void av1_encode_tile(struct AV1_COMP *cpi, struct ThreadData *td, int tile_row, void av1_update_tx_type_count(const struct AV1Common *cm, MACROBLOCKD *xd, #if CONFIG_TXK_SEL - int block, int plane, + int blk_row, int blk_col, int block, int plane, #endif BLOCK_SIZE bsize, TX_SIZE tx_size, FRAME_COUNTS *counts); diff --git a/third_party/aom/av1/encoder/encodemb.c b/third_party/aom/av1/encoder/encodemb.c index 7c9781533..e7f4d313d 100644 --- a/third_party/aom/av1/encoder/encodemb.c +++ b/third_party/aom/av1/encoder/encodemb.c @@ -112,19 +112,9 @@ void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { // These numbers are empirically obtained. static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = { -#if CONFIG_EC_ADAPT { 10, 7 }, { 8, 5 }, -#else - { 10, 6 }, { 8, 6 }, -#endif }; -#define UPDATE_RD_COST() \ - { \ - rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); \ - rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); \ - } - static INLINE unsigned int get_token_bit_costs( unsigned int token_costs[2][COEFF_CONTEXTS][ENTROPY_TOKENS], int skip_eob, int ctx, int token) { @@ -133,23 +123,14 @@ static INLINE unsigned int get_token_bit_costs( } #if !CONFIG_LV_MAP -#define USE_GREEDY_OPTIMIZE_B 0 - -#if USE_GREEDY_OPTIMIZE_B - -typedef struct av1_token_state_greedy { - int16_t token; - tran_low_t qc; - tran_low_t dqc; -} av1_token_state_greedy; static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, - int block, TX_SIZE tx_size, int ctx) { + int blk_row, int blk_col, int block, + TX_SIZE tx_size, int ctx) { MACROBLOCKD *const xd = &mb->e_mbd; struct macroblock_plane *const p = &mb->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; const int ref = is_inter_block(&xd->mi[0]->mbmi); - av1_token_state_greedy tokens[MAX_TX_SQUARE + 1][2]; uint8_t token_cache[MAX_TX_SQUARE]; const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); @@ -158,23 +139,27 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, const PLANE_TYPE plane_type = pd->plane_type; const int16_t *const dequant_ptr = pd->dequant; const uint8_t *const band_translate = get_band_translate(tx_size); - TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + TX_TYPE tx_type = + av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size); const SCAN_ORDER *const scan_order = - get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); + get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi); const int16_t *const scan = scan_order->scan; const int16_t *const nb = scan_order->neighbors; int dqv; const int shift = av1_get_tx_scale(tx_size); #if CONFIG_AOM_QM int seg_id = xd->mi[0]->mbmi.segment_id; - const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!ref][tx_size]; + // Use a flat matrix (i.e. no weighting) for 1D and Identity transforms + const qm_val_t *iqmatrix = + IS_2D_TRANSFORM(tx_type) + ? pd->seg_iqmatrix[seg_id][!ref][tx_size] + : cm->giqmatrix[NUM_QM_LEVELS - 1][0][0][tx_size]; #endif #if CONFIG_NEW_QUANT int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type); const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq]; #endif // CONFIG_NEW_QUANT int sz = 0; - const int64_t rddiv = mb->rddiv; int64_t rd_cost0, rd_cost1; int16_t t0, t1; int i, final_eob; @@ -193,19 +178,8 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int64_t rate0, rate1; for (i = 0; i < eob; i++) { const int rc = scan[i]; - int x = qcoeff[rc]; - t0 = av1_get_token(x); - - tokens[i][0].qc = x; - tokens[i][0].token = t0; - tokens[i][0].dqc = dqcoeff[rc]; - - token_cache[rc] = av1_pt_energy_class[t0]; + token_cache[rc] = av1_pt_energy_class[av1_get_token(qcoeff[rc])]; } - tokens[eob][0].token = EOB_TOKEN; - tokens[eob][0].qc = 0; - tokens[eob][0].dqc = 0; - tokens[eob][1] = tokens[eob][0]; unsigned int(*token_costs_ptr)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = token_costs; @@ -213,20 +187,22 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, final_eob = 0; int64_t eob_cost0, eob_cost1; + tran_low_t before_best_eob_qc = 0; + tran_low_t before_best_eob_dqc = 0; const int ctx0 = ctx; /* Record the r-d cost */ int64_t accu_rate = 0; - int64_t accu_error = 0; + // Initialized to the worst possible error for the largest transform size. + // This ensures that it never goes negative. + int64_t accu_error = ((int64_t)1) << 50; rate0 = get_token_bit_costs(*(token_costs_ptr + band_translate[0]), 0, ctx0, EOB_TOKEN); - int64_t best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error); + int64_t best_block_rd_cost = RDCOST(rdmult, rate0, accu_error); // int64_t best_block_rd_cost_all0 = best_block_rd_cost; - int x_prev = 1; - for (i = 0; i < eob; i++) { const int rc = scan[i]; int x = qcoeff[rc]; @@ -238,9 +214,9 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, if (x == 0) { // no need to search when x == 0 - rate0 = - get_token_bit_costs(*(token_costs_ptr + band_cur), token_tree_sel_cur, - ctx_cur, tokens[i][0].token); + int token = av1_get_token(x); + rate0 = get_token_bit_costs(*(token_costs_ptr + band_cur), + token_tree_sel_cur, ctx_cur, token); accu_rate += rate0; x_prev = 0; // accu_error does not change when x==0 @@ -249,7 +225,7 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, */ // compute the distortion for the first candidate // and the distortion for quantizing to 0. - int dx0 = (-coeff[rc]) * (1 << shift); + int dx0 = abs(coeff[rc]) * (1 << shift); #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { dx0 >>= xd->bd - 8; @@ -273,7 +249,9 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift); #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - dx >>= xd->bd - 8; + int dx_sign = dx < 0 ? 1 : 0; + dx = abs(dx) >> (xd->bd - 8); + if (dx_sign) dx = -dx; } #endif // CONFIG_HIGHBITDEPTH d2 = (int64_t)dx * dx; @@ -329,14 +307,16 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, if (i < default_eob - 1) { int ctx_next, token_tree_sel_next; int band_next = band_translate[i + 1]; + int token_next = + i + 1 != eob ? av1_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN; token_cache[rc] = av1_pt_energy_class[t0]; ctx_next = get_coef_context(nb, token_cache, i + 1); token_tree_sel_next = (x == 0); - next_bits0 = get_token_bit_costs(*(token_costs_ptr + band_next), - token_tree_sel_next, ctx_next, - tokens[i + 1][0].token); + next_bits0 = + get_token_bit_costs(*(token_costs_ptr + band_next), + token_tree_sel_next, ctx_next, token_next); next_eob_bits0 = get_token_bit_costs(*(token_costs_ptr + band_next), token_tree_sel_next, ctx_next, EOB_TOKEN); @@ -345,9 +325,9 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, ctx_next = get_coef_context(nb, token_cache, i + 1); token_tree_sel_next = (x_a == 0); - next_bits1 = get_token_bit_costs(*(token_costs_ptr + band_next), - token_tree_sel_next, ctx_next, - tokens[i + 1][0].token); + next_bits1 = + get_token_bit_costs(*(token_costs_ptr + band_next), + token_tree_sel_next, ctx_next, token_next); if (x_a != 0) { next_eob_bits1 = @@ -356,16 +336,16 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, } } - rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), d2); - rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), d2_a); + rd_cost0 = RDCOST(rdmult, (rate0 + next_bits0), d2); + rd_cost1 = RDCOST(rdmult, (rate1 + next_bits1), d2_a); best_x = (rd_cost1 < rd_cost0); - eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0), + eob_cost0 = RDCOST(rdmult, (accu_rate + rate0 + next_eob_bits0), (accu_error + d2 - d0)); eob_cost1 = eob_cost0; if (x_a != 0) { - eob_cost1 = RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1), + eob_cost1 = RDCOST(rdmult, (accu_rate + rate1 + next_eob_bits1), (accu_error + d2_a - d0)); best_eob_x = (eob_cost1 < eob_cost0); } else { @@ -410,38 +390,35 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, token_cache[rc] = av1_pt_energy_class[t0]; } + assert(accu_error >= 0); x_prev = qcoeff[rc]; // determine whether to move the eob position to i+1 - int64_t best_eob_cost_i = eob_cost0; - - tokens[i][1].token = t0; - tokens[i][1].qc = x; - tokens[i][1].dqc = dqc; - - if ((x_a != 0) && (best_eob_x)) { - best_eob_cost_i = eob_cost1; - - tokens[i][1].token = t1; - tokens[i][1].qc = x_a; - tokens[i][1].dqc = dqc_a; - } + int use_a = (x_a != 0) && (best_eob_x); + int64_t best_eob_cost_i = use_a ? eob_cost1 : eob_cost0; if (best_eob_cost_i < best_block_rd_cost) { best_block_rd_cost = best_eob_cost_i; final_eob = i + 1; + if (use_a) { + before_best_eob_qc = x_a; + before_best_eob_dqc = dqc_a; + } else { + before_best_eob_qc = x; + before_best_eob_dqc = dqc; + } } } // if (x==0) } // for (i) assert(final_eob <= eob); if (final_eob > 0) { - assert(tokens[final_eob - 1][1].qc != 0); + assert(before_best_eob_qc != 0); i = final_eob - 1; int rc = scan[i]; - qcoeff[rc] = tokens[i][1].qc; - dqcoeff[rc] = tokens[i][1].dqc; + qcoeff[rc] = before_best_eob_qc; + dqcoeff[rc] = before_best_eob_dqc; } for (i = final_eob; i < eob; i++) { @@ -453,366 +430,19 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, mb->plane[plane].eobs[block] = final_eob; return final_eob; } - -#else // USE_GREEDY_OPTIMIZE_B - -typedef struct av1_token_state_org { - int64_t error; - int rate; - int16_t next; - int16_t token; - tran_low_t qc; - tran_low_t dqc; - uint8_t best_index; -} av1_token_state_org; - -static int optimize_b_org(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, - int block, TX_SIZE tx_size, int ctx) { - MACROBLOCKD *const xd = &mb->e_mbd; - struct macroblock_plane *const p = &mb->plane[plane]; - struct macroblockd_plane *const pd = &xd->plane[plane]; - const int ref = is_inter_block(&xd->mi[0]->mbmi); - av1_token_state_org tokens[MAX_TX_SQUARE + 1][2]; - uint8_t token_cache[MAX_TX_SQUARE]; - const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); - tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); - tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - const int eob = p->eobs[block]; - const PLANE_TYPE plane_type = pd->plane_type; - const int default_eob = tx_size_2d[tx_size]; - const int16_t *const dequant_ptr = pd->dequant; - const uint8_t *const band_translate = get_band_translate(tx_size); - TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); - const SCAN_ORDER *const scan_order = - get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); - const int16_t *const scan = scan_order->scan; - const int16_t *const nb = scan_order->neighbors; - int dqv; - const int shift = av1_get_tx_scale(tx_size); -#if CONFIG_AOM_QM - int seg_id = xd->mi[0]->mbmi.segment_id; - const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!ref][tx_size]; -#endif -#if CONFIG_NEW_QUANT - int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type); - const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq]; -#endif // CONFIG_NEW_QUANT - int next = eob, sz = 0; - const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1; - const int64_t rddiv = mb->rddiv; - int64_t rd_cost0, rd_cost1; - int rate0, rate1; - int64_t error0, error1; - int16_t t0, t1; - int best, band = (eob < default_eob) ? band_translate[eob] - : band_translate[eob - 1]; - int pt, i, final_eob; - const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd); - unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = - mb->token_costs[txsize_sqr_map[tx_size]][plane_type][ref]; - const uint16_t *band_counts = &band_count_table[tx_size][band]; - uint16_t band_left = eob - band_cum_count_table[tx_size][band] + 1; - int shortcut = 0; - int next_shortcut = 0; - -#if CONFIG_EXT_DELTA_Q - const int qindex = cm->seg.enabled - ? av1_get_qindex(&cm->seg, xd->mi[0]->mbmi.segment_id, - cm->base_qindex) - : cm->base_qindex; - assert(qindex > 0); - (void)qindex; -#else - assert(mb->qindex > 0); -#endif - - token_costs += band; - - assert((!plane_type && !plane) || (plane_type && plane)); - assert(eob <= default_eob); - - /* Now set up a Viterbi trellis to evaluate alternative roundings. */ - /* Initialize the sentinel node of the trellis. */ - tokens[eob][0].rate = 0; - tokens[eob][0].error = 0; - tokens[eob][0].next = default_eob; - tokens[eob][0].token = EOB_TOKEN; - tokens[eob][0].qc = 0; - tokens[eob][1] = tokens[eob][0]; - - for (i = 0; i < eob; i++) { - const int rc = scan[i]; - tokens[i][0].rate = av1_get_token_cost(qcoeff[rc], &t0, cat6_bits); - tokens[i][0].token = t0; - token_cache[rc] = av1_pt_energy_class[t0]; - } - - for (i = eob; i-- > 0;) { - int base_bits, dx; - int64_t d2; - const int rc = scan[i]; - int x = qcoeff[rc]; -#if CONFIG_AOM_QM - int iwt = iqmatrix[rc]; - dqv = dequant_ptr[rc != 0]; - dqv = ((iwt * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; -#else - dqv = dequant_ptr[rc != 0]; -#endif - next_shortcut = shortcut; - - /* Only add a trellis state for non-zero coefficients. */ - if (UNLIKELY(x)) { - error0 = tokens[next][0].error; - error1 = tokens[next][1].error; - /* Evaluate the first possibility for this state. */ - rate0 = tokens[next][0].rate; - rate1 = tokens[next][1].rate; - - if (next_shortcut) { - /* Consider both possible successor states. */ - if (next < default_eob) { - pt = get_coef_context(nb, token_cache, i + 1); - rate0 += - get_token_bit_costs(*token_costs, 0, pt, tokens[next][0].token); - rate1 += - get_token_bit_costs(*token_costs, 0, pt, tokens[next][1].token); - } - UPDATE_RD_COST(); - /* And pick the best. */ - best = rd_cost1 < rd_cost0; - } else { - if (next < default_eob) { - pt = get_coef_context(nb, token_cache, i + 1); - rate0 += - get_token_bit_costs(*token_costs, 0, pt, tokens[next][0].token); - } - best = 0; - } - - dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift); -#if CONFIG_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - dx >>= xd->bd - 8; - } -#endif // CONFIG_HIGHBITDEPTH - d2 = (int64_t)dx * dx; - tokens[i][0].rate += (best ? rate1 : rate0); - tokens[i][0].error = d2 + (best ? error1 : error0); - tokens[i][0].next = next; - tokens[i][0].qc = x; - tokens[i][0].dqc = dqcoeff[rc]; - tokens[i][0].best_index = best; - - /* Evaluate the second possibility for this state. */ - rate0 = tokens[next][0].rate; - rate1 = tokens[next][1].rate; - - // The threshold of 3 is empirically obtained. - if (UNLIKELY(abs(x) > 3)) { - shortcut = 0; - } else { -#if CONFIG_NEW_QUANT - shortcut = ((av1_dequant_abscoeff_nuq(abs(x), dqv, - dequant_val[band_translate[i]]) > - (abs(coeff[rc]) << shift)) && - (av1_dequant_abscoeff_nuq(abs(x) - 1, dqv, - dequant_val[band_translate[i]]) < - (abs(coeff[rc]) << shift))); -#else // CONFIG_NEW_QUANT -#if CONFIG_AOM_QM - if ((abs(x) * dequant_ptr[rc != 0] * iwt > - ((abs(coeff[rc]) << shift) << AOM_QM_BITS)) && - (abs(x) * dequant_ptr[rc != 0] * iwt < - (((abs(coeff[rc]) << shift) + dequant_ptr[rc != 0]) - << AOM_QM_BITS))) -#else - if ((abs(x) * dequant_ptr[rc != 0] > (abs(coeff[rc]) << shift)) && - (abs(x) * dequant_ptr[rc != 0] < - (abs(coeff[rc]) << shift) + dequant_ptr[rc != 0])) -#endif // CONFIG_AOM_QM - shortcut = 1; - else - shortcut = 0; -#endif // CONFIG_NEW_QUANT - } - - if (shortcut) { - sz = -(x < 0); - x -= 2 * sz + 1; - } else { - tokens[i][1] = tokens[i][0]; - next = i; - - if (UNLIKELY(!(--band_left))) { - --band_counts; - band_left = *band_counts; - --token_costs; - } - continue; - } - - /* Consider both possible successor states. */ - if (!x) { - /* If we reduced this coefficient to zero, check to see if - * we need to move the EOB back here. - */ - t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; - t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; - base_bits = 0; - } else { - base_bits = av1_get_token_cost(x, &t0, cat6_bits); - t1 = t0; - } - - if (next_shortcut) { - if (LIKELY(next < default_eob)) { - if (t0 != EOB_TOKEN) { - token_cache[rc] = av1_pt_energy_class[t0]; - pt = get_coef_context(nb, token_cache, i + 1); - rate0 += get_token_bit_costs(*token_costs, !x, pt, - tokens[next][0].token); - } - if (t1 != EOB_TOKEN) { - token_cache[rc] = av1_pt_energy_class[t1]; - pt = get_coef_context(nb, token_cache, i + 1); - rate1 += get_token_bit_costs(*token_costs, !x, pt, - tokens[next][1].token); - } - } - - UPDATE_RD_COST(); - /* And pick the best. */ - best = rd_cost1 < rd_cost0; - } else { - // The two states in next stage are identical. - if (next < default_eob && t0 != EOB_TOKEN) { - token_cache[rc] = av1_pt_energy_class[t0]; - pt = get_coef_context(nb, token_cache, i + 1); - rate0 += - get_token_bit_costs(*token_costs, !x, pt, tokens[next][0].token); - } - best = 0; - } - -#if CONFIG_NEW_QUANT - dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) - - (coeff[rc] << shift); -#if CONFIG_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - dx >>= xd->bd - 8; - } -#endif // CONFIG_HIGHBITDEPTH -#else // CONFIG_NEW_QUANT -#if CONFIG_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - dx -= ((dqv >> (xd->bd - 8)) + sz) ^ sz; - } else { - dx -= (dqv + sz) ^ sz; - } -#else - dx -= (dqv + sz) ^ sz; -#endif // CONFIG_HIGHBITDEPTH -#endif // CONFIG_NEW_QUANT - d2 = (int64_t)dx * dx; - - tokens[i][1].rate = base_bits + (best ? rate1 : rate0); - tokens[i][1].error = d2 + (best ? error1 : error0); - tokens[i][1].next = next; - tokens[i][1].token = best ? t1 : t0; - tokens[i][1].qc = x; - - if (x) { -#if CONFIG_NEW_QUANT - tokens[i][1].dqc = av1_dequant_abscoeff_nuq( - abs(x), dqv, dequant_val[band_translate[i]]); - tokens[i][1].dqc = shift ? ROUND_POWER_OF_TWO(tokens[i][1].dqc, shift) - : tokens[i][1].dqc; - if (sz) tokens[i][1].dqc = -tokens[i][1].dqc; -#else - if (x < 0) - tokens[i][1].dqc = -((-x * dqv) >> shift); - else - tokens[i][1].dqc = (x * dqv) >> shift; -#endif // CONFIG_NEW_QUANT - } else { - tokens[i][1].dqc = 0; - } - - tokens[i][1].best_index = best; - /* Finally, make this the new head of the trellis. */ - next = i; - } else { - /* There's no choice to make for a zero coefficient, so we don't - * add a new trellis node, but we do need to update the costs. - */ - t0 = tokens[next][0].token; - t1 = tokens[next][1].token; - pt = get_coef_context(nb, token_cache, i + 1); - /* Update the cost of each path if we're past the EOB token. */ - if (t0 != EOB_TOKEN) { - tokens[next][0].rate += get_token_bit_costs(*token_costs, 1, pt, t0); - tokens[next][0].token = ZERO_TOKEN; - } - if (t1 != EOB_TOKEN) { - tokens[next][1].rate += get_token_bit_costs(*token_costs, 1, pt, t1); - tokens[next][1].token = ZERO_TOKEN; - } - tokens[i][0].best_index = tokens[i][1].best_index = 0; - shortcut = (tokens[next][0].rate != tokens[next][1].rate); - /* Don't update next, because we didn't add a new node. */ - } - - if (UNLIKELY(!(--band_left))) { - --band_counts; - band_left = *band_counts; - --token_costs; - } - } - - /* Now pick the best path through the whole trellis. */ - rate0 = tokens[next][0].rate; - rate1 = tokens[next][1].rate; - error0 = tokens[next][0].error; - error1 = tokens[next][1].error; - t0 = tokens[next][0].token; - t1 = tokens[next][1].token; - rate0 += get_token_bit_costs(*token_costs, 0, ctx, t0); - rate1 += get_token_bit_costs(*token_costs, 0, ctx, t1); - UPDATE_RD_COST(); - best = rd_cost1 < rd_cost0; - - final_eob = -1; - - for (i = next; i < eob; i = next) { - const int x = tokens[i][best].qc; - const int rc = scan[i]; - if (x) final_eob = i; - qcoeff[rc] = x; - dqcoeff[rc] = tokens[i][best].dqc; - - next = tokens[i][best].next; - best = tokens[i][best].best_index; - } - final_eob++; - - mb->plane[plane].eobs[block] = final_eob; - assert(final_eob <= default_eob); - return final_eob; -} - -#endif // USE_GREEDY_OPTIMIZE_B #endif // !CONFIG_LV_MAP -int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, - const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l) { +int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int blk_row, + int blk_col, int block, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, const ENTROPY_CONTEXT *a, + const ENTROPY_CONTEXT *l) { MACROBLOCKD *const xd = &mb->e_mbd; struct macroblock_plane *const p = &mb->plane[plane]; const int eob = p->eobs[block]; assert((mb->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0)); if (eob == 0) return eob; if (xd->lossless[xd->mi[0]->mbmi.segment_id]) return eob; + #if CONFIG_PVQ (void)cm; (void)tx_size; @@ -823,26 +453,24 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, #if !CONFIG_LV_MAP (void)plane_bsize; + (void)blk_row; + (void)blk_col; #if CONFIG_VAR_TX int ctx = get_entropy_context(tx_size, a, l); #else int ctx = combine_entropy_contexts(*a, *l); -#endif - -#if USE_GREEDY_OPTIMIZE_B - return optimize_b_greedy(cm, mb, plane, block, tx_size, ctx); -#else // USE_GREEDY_OPTIMIZE_B - return optimize_b_org(cm, mb, plane, block, tx_size, ctx); -#endif // USE_GREEDY_OPTIMIZE_B +#endif // CONFIG_VAR_TX + return optimize_b_greedy(cm, mb, plane, blk_row, blk_col, block, tx_size, + ctx); #else // !CONFIG_LV_MAP TXB_CTX txb_ctx; get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx); - return av1_optimize_txb(cm, mb, plane, block, tx_size, &txb_ctx); + return av1_optimize_txb(cm, mb, plane, blk_row, blk_col, block, tx_size, + &txb_ctx); #endif // !CONFIG_LV_MAP } #if !CONFIG_PVQ -#if CONFIG_HIGHBITDEPTH typedef enum QUANT_FUNC { QUANT_FUNC_LOWBD = 0, QUANT_FUNC_HIGHBD = 1, @@ -862,29 +490,12 @@ static AV1_QUANT_FACADE #endif // !CONFIG_NEW_QUANT { NULL, NULL } }; +#endif // !CONFIG_PVQ -#else - -typedef enum QUANT_FUNC { - QUANT_FUNC_LOWBD = 0, - QUANT_FUNC_TYPES = 1 -} QUANT_FUNC; - -static AV1_QUANT_FACADE quant_func_list[AV1_XFORM_QUANT_TYPES] - [QUANT_FUNC_TYPES] = { -#if !CONFIG_NEW_QUANT - { av1_quantize_fp_facade }, - { av1_quantize_b_facade }, - { av1_quantize_dc_facade }, -#else // !CONFIG_NEW_QUANT - { av1_quantize_fp_nuq_facade }, - { av1_quantize_b_nuq_facade }, - { av1_quantize_dc_nuq_facade }, -#endif // !CONFIG_NEW_QUANT - { NULL } - }; -#endif // CONFIG_HIGHBITDEPTH -#endif // CONFIG_PVQ +typedef void (*fwdTxfmFunc)(const int16_t *diff, tran_low_t *coeff, int stride, + TxfmParam *txfm_param); +static const fwdTxfmFunc fwd_txfm_func[2] = { av1_fwd_txfm, + av1_highbd_fwd_txfm }; void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, @@ -892,7 +503,7 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, AV1_XFORM_QUANT xform_quant_idx) { MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; -#if !(CONFIG_PVQ || CONFIG_DAALA_DIST) +#if !(CONFIG_PVQ || CONFIG_DIST_8X8) const struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &xd->plane[plane]; #else @@ -900,9 +511,14 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, struct macroblockd_plane *const pd = &xd->plane[plane]; #endif PLANE_TYPE plane_type = get_plane_type(plane); - TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + TX_TYPE tx_type = + av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size); + +#if CONFIG_AOM_QM || CONFIG_NEW_QUANT const int is_inter = is_inter_block(mbmi); - const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, is_inter); +#endif + + const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi); tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); @@ -910,19 +526,28 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, const int diff_stride = block_size_wide[plane_bsize]; #if CONFIG_AOM_QM int seg_id = mbmi->segment_id; - const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][!is_inter][tx_size]; - const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!is_inter][tx_size]; + // Use a flat matrix (i.e. no weighting) for 1D and Identity transforms + const qm_val_t *qmatrix = + IS_2D_TRANSFORM(tx_type) ? pd->seg_qmatrix[seg_id][!is_inter][tx_size] + : cm->gqmatrix[NUM_QM_LEVELS - 1][0][0][tx_size]; + const qm_val_t *iqmatrix = + IS_2D_TRANSFORM(tx_type) + ? pd->seg_iqmatrix[seg_id][!is_inter][tx_size] + : cm->giqmatrix[NUM_QM_LEVELS - 1][0][0][tx_size]; #endif - FWD_TXFM_PARAM fwd_txfm_param; + TxfmParam txfm_param; -#if CONFIG_PVQ || CONFIG_DAALA_DIST +#if CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT || CONFIG_MRC_TX uint8_t *dst; - int16_t *pred; const int dst_stride = pd->dst.stride; - int tx_blk_size; +#if CONFIG_PVQ || CONFIG_DIST_8X8 + int16_t *pred; + const int txw = tx_size_wide[tx_size]; + const int txh = tx_size_high[tx_size]; int i, j; #endif +#endif #if !CONFIG_PVQ const int tx2d_size = tx_size_2d[tx_size]; @@ -960,79 +585,68 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, src_int16 = &p->src_int16[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]]; - // transform block size in pixels - tx_blk_size = tx_size_wide[tx_size]; #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - for (j = 0; j < tx_blk_size; j++) - for (i = 0; i < tx_blk_size; i++) + for (j = 0; j < txh; j++) + for (i = 0; i < txw; i++) src_int16[diff_stride * j + i] = CONVERT_TO_SHORTPTR(src)[src_stride * j + i]; } else { #endif // CONFIG_HIGHBITDEPTH - for (j = 0; j < tx_blk_size; j++) - for (i = 0; i < tx_blk_size; i++) + for (j = 0; j < txh; j++) + for (i = 0; i < txw; i++) src_int16[diff_stride * j + i] = src[src_stride * j + i]; #if CONFIG_HIGHBITDEPTH } #endif // CONFIG_HIGHBITDEPTH #endif -#if CONFIG_PVQ || CONFIG_DAALA_DIST +#if CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT || CONFIG_MRC_TX dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; +#if CONFIG_PVQ || CONFIG_DIST_8X8 pred = &pd->pred[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]]; - // transform block size in pixels - tx_blk_size = tx_size_wide[tx_size]; - // copy uint8 orig and predicted block to int16 buffer // in order to use existing VP10 transform functions #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - for (j = 0; j < tx_blk_size; j++) - for (i = 0; i < tx_blk_size; i++) + for (j = 0; j < txh; j++) + for (i = 0; i < txw; i++) pred[diff_stride * j + i] = CONVERT_TO_SHORTPTR(dst)[dst_stride * j + i]; } else { #endif // CONFIG_HIGHBITDEPTH - for (j = 0; j < tx_blk_size; j++) - for (i = 0; i < tx_blk_size; i++) + for (j = 0; j < txh; j++) + for (i = 0; i < txw; i++) pred[diff_stride * j + i] = dst[dst_stride * j + i]; #if CONFIG_HIGHBITDEPTH } #endif // CONFIG_HIGHBITDEPTH -#endif +#endif // CONFIG_PVQ || CONFIG_DIST_8X8 +#endif // CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT || CONFIG_MRC_TX (void)ctx; - fwd_txfm_param.tx_type = tx_type; - fwd_txfm_param.tx_size = tx_size; - fwd_txfm_param.lossless = xd->lossless[mbmi->segment_id]; + txfm_param.tx_type = tx_type; + txfm_param.tx_size = tx_size; + txfm_param.lossless = xd->lossless[mbmi->segment_id]; +#if CONFIG_MRC_TX || CONFIG_LGT + txfm_param.dst = dst; + txfm_param.stride = dst_stride; +#endif // CONFIG_MRC_TX || CONFIG_LGT +#if CONFIG_LGT + txfm_param.is_inter = is_inter_block(mbmi); + txfm_param.mode = get_prediction_mode(xd->mi[0], plane, tx_size, block); +#endif #if !CONFIG_PVQ -#if CONFIG_HIGHBITDEPTH - fwd_txfm_param.bd = xd->bd; - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); - if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) { - if (LIKELY(!x->skip_block)) { - quant_func_list[xform_quant_idx][QUANT_FUNC_HIGHBD]( - coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam); - } else { - av1_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob); - } - } -#if CONFIG_LV_MAP - p->txb_entropy_ctx[block] = - (uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob); -#endif // CONFIG_LV_MAP - return; - } -#endif // CONFIG_HIGHBITDEPTH - av1_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); + txfm_param.bd = xd->bd; + const int is_hbd = get_bitdepth_data_path_index(xd); + fwd_txfm_func[is_hbd](src_diff, coeff, diff_stride, &txfm_param); + if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) { if (LIKELY(!x->skip_block)) { - quant_func_list[xform_quant_idx][QUANT_FUNC_LOWBD]( + quant_func_list[xform_quant_idx][is_hbd]( coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam); } else { av1_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob); @@ -1042,17 +656,18 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, p->txb_entropy_ctx[block] = (uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob); #endif // CONFIG_LV_MAP -#else // #if !CONFIG_PVQ + return; +#else // CONFIG_PVQ (void)xform_quant_idx; #if CONFIG_HIGHBITDEPTH - fwd_txfm_param.bd = xd->bd; + txfm_param.bd = xd->bd; if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - av1_highbd_fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param); - av1_highbd_fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param); + av1_highbd_fwd_txfm(src_int16, coeff, diff_stride, &txfm_param); + av1_highbd_fwd_txfm(pred, ref_coeff, diff_stride, &txfm_param); } else { #endif - av1_fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param); - av1_fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param); + av1_fwd_txfm(src_int16, coeff, diff_stride, &txfm_param); + av1_fwd_txfm(pred, ref_coeff, diff_stride, &txfm_param); #if CONFIG_HIGHBITDEPTH } #endif @@ -1130,7 +745,8 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, #endif #if !CONFIG_PVQ - av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l); + av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size, a, + l); av1_set_txb_context(x, plane, block, tx_size, a, l); @@ -1143,9 +759,16 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, if (x->pvq_skip[plane]) return; #endif - TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block, tx_size); + TX_TYPE tx_type = + av1_get_tx_type(pd->plane_type, xd, blk_row, blk_col, block, tx_size); +#if CONFIG_LGT + PREDICTION_MODE mode = get_prediction_mode(xd->mi[0], plane, tx_size, block); + av1_inverse_transform_block(xd, dqcoeff, mode, tx_type, tx_size, dst, + pd->dst.stride, p->eobs[block]); +#else av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, dst, pd->dst.stride, p->eobs[block]); +#endif } #if CONFIG_VAR_TX @@ -1174,16 +797,32 @@ static void encode_block_inter(int plane, int block, int blk_row, int blk_col, encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg); } else { assert(tx_size < TX_SIZES_ALL); +#if CONFIG_RECT_TX_EXT + int is_qttx = plane_tx_size == quarter_txsize_lookup[plane_bsize]; + const TX_SIZE sub_txs = is_qttx ? plane_tx_size : sub_tx_size_map[tx_size]; + if (is_qttx) assert(blk_row == 0 && blk_col == 0 && block == 0); +#else const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; assert(sub_txs < tx_size); +#endif // This is the square transform block partition entry point. int bsl = tx_size_wide_unit[sub_txs]; int i; assert(bsl > 0); for (i = 0; i < 4; ++i) { +#if CONFIG_RECT_TX_EXT + int is_wide_tx = tx_size_wide_unit[sub_txs] > tx_size_high_unit[sub_txs]; + const int offsetr = + is_qttx ? (is_wide_tx ? i * tx_size_high_unit[sub_txs] : 0) + : blk_row + ((i >> 1) * bsl); + const int offsetc = + is_qttx ? (is_wide_tx ? 0 : i * tx_size_wide_unit[sub_txs]) + : blk_col + ((i & 0x01) * bsl); +#else const int offsetr = blk_row + ((i >> 1) * bsl); const int offsetc = blk_col + ((i & 0x01) * bsl); +#endif int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs]; if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue; @@ -1211,6 +850,7 @@ static void encode_block_pass1(int plane, int block, int blk_row, int blk_col, struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + TxfmParam txfm_param; uint8_t *dst; int ctx = 0; dst = &pd->dst @@ -1246,22 +886,20 @@ static void encode_block_pass1(int plane, int block, int blk_row, int blk_col, #endif // CONFIG_HIGHBITDEPTH } #endif // !CONFIG_PVQ + txfm_param.bd = xd->bd; + txfm_param.tx_type = DCT_DCT; + txfm_param.eob = p->eobs[block]; + txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id]; #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - if (xd->lossless[xd->mi[0]->mbmi.segment_id]) { - av1_highbd_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], - xd->bd); - } else { - av1_highbd_idct4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], - xd->bd); - } + av1_highbd_inv_txfm_add_4x4(dqcoeff, dst, pd->dst.stride, &txfm_param); return; } #endif // CONFIG_HIGHBITDEPTH if (xd->lossless[xd->mi[0]->mbmi.segment_id]) { - av1_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); + av1_iwht4x4_add(dqcoeff, dst, pd->dst.stride, &txfm_param); } else { - av1_idct4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); + av1_idct4x4_add(dqcoeff, dst, pd->dst.stride, &txfm_param); } } } @@ -1316,7 +954,7 @@ void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, av1_get_entropy_contexts(bsize, 0, pd, ctx.ta[plane], ctx.tl[plane]); #else const struct macroblockd_plane *const pd = &xd->plane[plane]; - const TX_SIZE tx_size = get_tx_size(plane, xd); + const TX_SIZE tx_size = av1_get_tx_size(plane, xd); av1_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]); #endif @@ -1327,11 +965,27 @@ void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, arg.tl = ctx.tl[plane]; #if CONFIG_VAR_TX - for (idy = 0; idy < mi_height; idy += bh) { - for (idx = 0; idx < mi_width; idx += bw) { - encode_block_inter(plane, block, idy, idx, plane_bsize, max_tx_size, - &arg); - block += step; + const BLOCK_SIZE max_unit_bsize = get_plane_block_size(BLOCK_64X64, pd); + int mu_blocks_wide = + block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0]; + int mu_blocks_high = + block_size_high[max_unit_bsize] >> tx_size_high_log2[0]; + + mu_blocks_wide = AOMMIN(mi_width, mu_blocks_wide); + mu_blocks_high = AOMMIN(mi_height, mu_blocks_high); + + for (idy = 0; idy < mi_height; idy += mu_blocks_high) { + for (idx = 0; idx < mi_width; idx += mu_blocks_wide) { + int blk_row, blk_col; + const int unit_height = AOMMIN(mu_blocks_high + idy, mi_height); + const int unit_width = AOMMIN(mu_blocks_wide + idx, mi_width); + for (blk_row = idy; blk_row < unit_height; blk_row += bh) { + for (blk_col = idx; blk_col < unit_width; blk_col += bw) { + encode_block_inter(plane, block, blk_row, blk_col, plane_bsize, + max_tx_size, &arg); + block += step; + } + } } } #else @@ -1357,7 +1011,7 @@ void av1_encode_sb_supertx(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize) { #if CONFIG_VAR_TX const TX_SIZE tx_size = TX_4X4; #else - const TX_SIZE tx_size = get_tx_size(plane, xd); + const TX_SIZE tx_size = av1_get_tx_size(plane, xd); #endif av1_subtract_plane(x, bsize, plane); av1_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]); @@ -1435,6 +1089,24 @@ static void quantize_scaler(int coeff, int16_t zbin, int16_t round_value, } } +#if CONFIG_HIGHBITDEPTH +typedef void (*hbd_dpcm_fwd_tx_func)(const int16_t *input, int stride, + TX_TYPE_1D tx_type, tran_low_t *output, + int dir); + +static hbd_dpcm_fwd_tx_func get_hbd_dpcm_fwd_tx_func(int tx_length) { + switch (tx_length) { + case 4: return av1_hbd_dpcm_ft4_c; + case 8: return av1_hbd_dpcm_ft8_c; + case 16: return av1_hbd_dpcm_ft16_c; + case 32: + return av1_hbd_dpcm_ft32_c; + // TODO(huisu): add support for TX_64X64. + default: assert(0); return NULL; + } +} +#endif // CONFIG_HIGHBITDEPTH + typedef void (*dpcm_fwd_tx_func)(const int16_t *input, int stride, TX_TYPE_1D tx_type, tran_low_t *output); @@ -1539,7 +1211,7 @@ static void hbd_process_block_dpcm_vert( int16_t *src_diff, int diff_stride, tran_low_t *coeff, tran_low_t *qcoeff, tran_low_t *dqcoeff) { const int tx1d_width = tx_size_wide[tx_size]; - dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_width); + hbd_dpcm_fwd_tx_func forward_tx = get_hbd_dpcm_fwd_tx_func(tx1d_width); hbd_dpcm_inv_txfm_add_func inverse_tx = av1_get_hbd_dpcm_inv_txfm_add_func(tx1d_width); uint16_t *src = CONVERT_TO_SHORTPTR(src8); @@ -1553,7 +1225,7 @@ static void hbd_process_block_dpcm_vert( // Subtraction. for (int c = 0; c < tx1d_width; ++c) src_diff[c] = src[c] - dst[c]; // Forward transform. - forward_tx(src_diff, 1, tx_type_1d, coeff); + forward_tx(src_diff, 1, tx_type_1d, coeff, 1); // Quantization. for (int c = 0; c < tx1d_width; ++c) { quantize_scaler(coeff[c], p->zbin[q_idx], p->round[q_idx], @@ -1562,7 +1234,7 @@ static void hbd_process_block_dpcm_vert( q_idx = 1; } // Inverse transform. - inverse_tx(dqcoeff, 1, tx_type_1d, bd, dst); + inverse_tx(dqcoeff, 1, tx_type_1d, bd, dst, 1); // Move to the next row. coeff += tx1d_width; qcoeff += tx1d_width; @@ -1580,7 +1252,7 @@ static void hbd_process_block_dpcm_horz( int16_t *src_diff, int diff_stride, tran_low_t *coeff, tran_low_t *qcoeff, tran_low_t *dqcoeff) { const int tx1d_height = tx_size_high[tx_size]; - dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_height); + hbd_dpcm_fwd_tx_func forward_tx = get_hbd_dpcm_fwd_tx_func(tx1d_height); hbd_dpcm_inv_txfm_add_func inverse_tx = av1_get_hbd_dpcm_inv_txfm_add_func(tx1d_height); uint16_t *src = CONVERT_TO_SHORTPTR(src8); @@ -1597,7 +1269,7 @@ static void hbd_process_block_dpcm_horz( } // Forward transform. tran_low_t tx_buff[64]; - forward_tx(src_diff, diff_stride, tx_type_1d, tx_buff); + forward_tx(src_diff, diff_stride, tx_type_1d, tx_buff, 0); for (int r = 0; r < tx1d_height; ++r) coeff[r * tx1d_width] = tx_buff[r]; // Quantization. for (int r = 0; r < tx1d_height; ++r) { @@ -1609,7 +1281,7 @@ static void hbd_process_block_dpcm_horz( } // Inverse transform. for (int r = 0; r < tx1d_height; ++r) tx_buff[r] = dqcoeff[r * tx1d_width]; - inverse_tx(tx_buff, dst_stride, tx_type_1d, bd, dst); + inverse_tx(tx_buff, dst_stride, tx_type_1d, bd, dst, 0); // Move to the next column. ++coeff, ++qcoeff, ++dqcoeff, ++src_diff, ++dst, ++src; } @@ -1631,7 +1303,8 @@ void av1_encode_block_intra_dpcm(const AV1_COMMON *cm, MACROBLOCK *x, const int dst_stride = pd->dst.stride; const int tx1d_width = tx_size_wide[tx_size]; const int tx1d_height = tx_size_high[tx_size]; - const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, 0); + const SCAN_ORDER *const scan_order = + get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi); tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); uint8_t *dst = @@ -1711,30 +1384,20 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, struct macroblockd_plane *const pd = &xd->plane[plane]; tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); PLANE_TYPE plane_type = get_plane_type(plane); - const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + const TX_TYPE tx_type = + av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size); uint16_t *eob = &p->eobs[block]; const int dst_stride = pd->dst.stride; uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; -#if CONFIG_CFL - -#if CONFIG_EC_ADAPT - FRAME_CONTEXT *const ec_ctx = xd->tile_ctx; -#else - FRAME_CONTEXT *const ec_ctx = cm->fc; -#endif // CONFIG_EC_ADAPT - av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col, - blk_row, tx_size, plane_bsize); -#else av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size); -#endif +#if CONFIG_DPCM_INTRA || CONFIG_LGT + const PREDICTION_MODE mode = + get_prediction_mode(xd->mi[0], plane, tx_size, block); #if CONFIG_DPCM_INTRA - const int block_raster_idx = av1_block_index_to_raster_order(tx_size, block); const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - const PREDICTION_MODE mode = - (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode; if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) { av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col, plane_bsize, tx_size, tx_type, args->ta, @@ -1742,6 +1405,7 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, return; } #endif // CONFIG_DPCM_INTRA +#endif // CONFIG_DPCM_INTRA || CONFIG_LGT av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size); @@ -1751,7 +1415,8 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, if (args->enable_optimize_b) { av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, ctx, AV1_XFORM_QUANT_FP); - av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l); + av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size, + a, l); } else { av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, ctx, AV1_XFORM_QUANT_B); @@ -1763,220 +1428,25 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, if (x->pvq_skip[plane]) return; #endif // CONFIG_PVQ - av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, dst, dst_stride, - *eob); + av1_inverse_transform_block(xd, dqcoeff, +#if CONFIG_LGT + mode, +#endif + tx_type, tx_size, dst, dst_stride, *eob); #if !CONFIG_PVQ if (*eob) *(args->skip) = 0; #else // Note : *(args->skip) == mbmi->skip #endif #if CONFIG_CFL - MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; if (plane == AOM_PLANE_Y && x->cfl_store_y) { - cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size); - } - - if (mbmi->uv_mode == DC_PRED) { - // TODO(ltrudeau) find a cleaner way to detect last transform block - if (plane == AOM_PLANE_U) { - xd->cfl->num_tx_blk[CFL_PRED_U] = - (blk_row == 0 && blk_col == 0) ? 1 - : xd->cfl->num_tx_blk[CFL_PRED_U] + 1; - } - - if (plane == AOM_PLANE_V) { - xd->cfl->num_tx_blk[CFL_PRED_V] = - (blk_row == 0 && blk_col == 0) ? 1 - : xd->cfl->num_tx_blk[CFL_PRED_V] + 1; - - if (mbmi->skip && - xd->cfl->num_tx_blk[CFL_PRED_U] == xd->cfl->num_tx_blk[CFL_PRED_V]) { - assert(plane_bsize != BLOCK_INVALID); - const int block_width = block_size_wide[plane_bsize]; - const int block_height = block_size_high[plane_bsize]; - - // if SKIP is chosen at the block level, and ind != 0, we must change - // the prediction - if (mbmi->cfl_alpha_idx != 0) { - const struct macroblockd_plane *const pd_cb = &xd->plane[AOM_PLANE_U]; - uint8_t *const dst_cb = pd_cb->dst.buf; - const int dst_stride_cb = pd_cb->dst.stride; - uint8_t *const dst_cr = pd->dst.buf; - const int dst_stride_cr = pd->dst.stride; - for (int j = 0; j < block_height; j++) { - for (int i = 0; i < block_width; i++) { - dst_cb[dst_stride_cb * j + i] = - (uint8_t)(xd->cfl->dc_pred[CFL_PRED_U] + 0.5); - dst_cr[dst_stride_cr * j + i] = - (uint8_t)(xd->cfl->dc_pred[CFL_PRED_V] + 0.5); - } - } - mbmi->cfl_alpha_idx = 0; - mbmi->cfl_alpha_signs[CFL_PRED_U] = CFL_SIGN_POS; - mbmi->cfl_alpha_signs[CFL_PRED_V] = CFL_SIGN_POS; - } - } - } + // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is + // intra predicted. + cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size, plane_bsize); } #endif } -#if CONFIG_CFL -static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg, - const uint8_t *src, int src_stride, int blk_width, - int blk_height, double dc_pred, double alpha, - int *dist_neg_out) { - const double dc_pred_bias = dc_pred + 0.5; - int dist = 0; - int diff; - - if (alpha == 0.0) { - const int dc_pred_i = (int)dc_pred_bias; - for (int j = 0; j < blk_height; j++) { - for (int i = 0; i < blk_width; i++) { - diff = src[i] - dc_pred_i; - dist += diff * diff; - } - src += src_stride; - } - - if (dist_neg_out) *dist_neg_out = dist; - - return dist; - } - - int dist_neg = 0; - for (int j = 0; j < blk_height; j++) { - for (int i = 0; i < blk_width; i++) { - const double scaled_luma = alpha * (y_pix[i] - y_avg); - const int uv = src[i]; - diff = uv - (int)(scaled_luma + dc_pred_bias); - dist += diff * diff; - diff = uv + (int)(scaled_luma - dc_pred_bias); - dist_neg += diff * diff; - } - y_pix += y_stride; - src += src_stride; - } - - if (dist_neg_out) *dist_neg_out = dist_neg; - - return dist; -} - -static int cfl_compute_alpha_ind(MACROBLOCK *const x, const CFL_CTX *const cfl, - BLOCK_SIZE bsize, - CFL_SIGN_TYPE signs_out[CFL_SIGNS]) { - const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U]; - const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V]; - const uint8_t *const src_u = p_u->src.buf; - const uint8_t *const src_v = p_v->src.buf; - const int src_stride_u = p_u->src.stride; - const int src_stride_v = p_v->src.stride; - const int block_width = block_size_wide[bsize]; - const int block_height = block_size_high[bsize]; - const double dc_pred_u = cfl->dc_pred[CFL_PRED_U]; - const double dc_pred_v = cfl->dc_pred[CFL_PRED_V]; - - // Temporary pixel buffer used to store the CfL prediction when we compute the - // alpha index. - uint8_t tmp_pix[MAX_SB_SQUARE]; - // Load CfL Prediction over the entire block - const double y_avg = - cfl_load(cfl, tmp_pix, MAX_SB_SIZE, 0, 0, block_width, block_height); - - int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE]; - sse[CFL_PRED_U][0] = - cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, - block_width, block_height, dc_pred_u, 0, NULL); - sse[CFL_PRED_V][0] = - cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, - block_width, block_height, dc_pred_v, 0, NULL); - for (int m = 1; m < CFL_MAGS_SIZE; m += 2) { - assert(cfl_alpha_mags[m + 1] == -cfl_alpha_mags[m]); - sse[CFL_PRED_U][m] = cfl_alpha_dist( - tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, block_width, - block_height, dc_pred_u, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]); - sse[CFL_PRED_V][m] = cfl_alpha_dist( - tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, block_width, - block_height, dc_pred_v, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]); - } - - int dist; - int64_t cost; - int64_t best_cost; - - // Compute least squares parameter of the entire block - // IMPORTANT: We assume that the first code is 0,0 - int ind = 0; - signs_out[CFL_PRED_U] = CFL_SIGN_POS; - signs_out[CFL_PRED_V] = CFL_SIGN_POS; - - dist = sse[CFL_PRED_U][0] + sse[CFL_PRED_V][0]; - dist *= 16; - best_cost = RDCOST(x->rdmult, x->rddiv, cfl->costs[0], dist); - - for (int c = 1; c < CFL_ALPHABET_SIZE; c++) { - const int idx_u = cfl_alpha_codes[c][CFL_PRED_U]; - const int idx_v = cfl_alpha_codes[c][CFL_PRED_V]; - for (CFL_SIGN_TYPE sign_u = idx_u == 0; sign_u < CFL_SIGNS; sign_u++) { - for (CFL_SIGN_TYPE sign_v = idx_v == 0; sign_v < CFL_SIGNS; sign_v++) { - dist = sse[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] + - sse[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)]; - dist *= 16; - cost = RDCOST(x->rdmult, x->rddiv, cfl->costs[c], dist); - if (cost < best_cost) { - best_cost = cost; - ind = c; - signs_out[CFL_PRED_U] = sign_u; - signs_out[CFL_PRED_V] = sign_v; - } - } - } - } - - return ind; -} - -static inline void cfl_update_costs(CFL_CTX *cfl, FRAME_CONTEXT *ec_ctx) { - assert(ec_ctx->cfl_alpha_cdf[CFL_ALPHABET_SIZE - 1] == - AOM_ICDF(CDF_PROB_TOP)); - const int prob_den = CDF_PROB_TOP; - - int prob_num = AOM_ICDF(ec_ctx->cfl_alpha_cdf[0]); - cfl->costs[0] = av1_cost_zero(get_prob(prob_num, prob_den)); - - for (int c = 1; c < CFL_ALPHABET_SIZE; c++) { - int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0) + - (cfl_alpha_codes[c][CFL_PRED_V] != 0); - prob_num = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]) - - AOM_ICDF(ec_ctx->cfl_alpha_cdf[c - 1]); - cfl->costs[c] = av1_cost_zero(get_prob(prob_num, prob_den)) + - av1_cost_literal(sign_bit_cost); - } -} - -void av1_predict_intra_block_encoder_facade(MACROBLOCK *x, - FRAME_CONTEXT *ec_ctx, int plane, - int block_idx, int blk_col, - int blk_row, TX_SIZE tx_size, - BLOCK_SIZE plane_bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - if (plane != AOM_PLANE_Y && mbmi->uv_mode == DC_PRED) { - if (blk_col == 0 && blk_row == 0 && plane == AOM_PLANE_U) { - CFL_CTX *const cfl = xd->cfl; - cfl_update_costs(cfl, ec_ctx); - cfl_dc_pred(xd, plane_bsize, tx_size); - mbmi->cfl_alpha_idx = - cfl_compute_alpha_ind(x, cfl, plane_bsize, mbmi->cfl_alpha_signs); - } - } - av1_predict_intra_block_facade(xd, plane, block_idx, blk_col, blk_row, - tx_size); -} -#endif - void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int plane, int enable_optimize_b, int mi_row, @@ -2001,7 +1471,7 @@ void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x, if (enable_optimize_b) { const struct macroblockd_plane *const pd = &xd->plane[plane]; - const TX_SIZE tx_size = get_tx_size(plane, xd); + const TX_SIZE tx_size = av1_get_tx_size(plane, xd); av1_get_entropy_contexts(bsize, tx_size, pd, ta, tl); } av1_foreach_transformed_block_in_plane( diff --git a/third_party/aom/av1/encoder/encodemb.h b/third_party/aom/av1/encoder/encodemb.h index 35a2c1570..65476bcae 100644 --- a/third_party/aom/av1/encoder/encodemb.h +++ b/third_party/aom/av1/encoder/encodemb.h @@ -53,9 +53,10 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int ctx, AV1_XFORM_QUANT xform_quant_idx); -int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, - const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l); +int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int blk_row, + int blk_col, int block, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, const ENTROPY_CONTEXT *a, + const ENTROPY_CONTEXT *l); void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize, int blk_col, int blk_row, TX_SIZE tx_size); @@ -86,14 +87,6 @@ void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta, int *k, int *size, int skip_rest, int skip_dir, int bs); #endif -#if CONFIG_CFL -void av1_predict_intra_block_encoder_facade(MACROBLOCK *x, - FRAME_CONTEXT *ec_ctx, int plane, - int block_idx, int blk_col, - int blk_row, TX_SIZE tx_size, - BLOCK_SIZE plane_bsize); -#endif - #if CONFIG_DPCM_INTRA void av1_encode_block_intra_dpcm(const AV1_COMMON *cm, MACROBLOCK *x, PREDICTION_MODE mode, int plane, int block, diff --git a/third_party/aom/av1/encoder/encodemv.c b/third_party/aom/av1/encoder/encodemv.c index eb0ff88c4..fd61fe6b2 100644 --- a/third_party/aom/av1/encoder/encodemv.c +++ b/third_party/aom/av1/encoder/encodemv.c @@ -31,7 +31,7 @@ void av1_entropy_mv_init(void) { } static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp, - int usehp) { + MvSubpelPrecision precision) { int offset; const int sign = comp < 0; const int mag = sign ? -comp : comp; @@ -42,34 +42,53 @@ static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp, assert(comp != 0); - // Sign +// Sign +#if CONFIG_NEW_MULTISYMBOL + aom_write_bit(w, sign); +#else aom_write(w, sign, mvcomp->sign); +#endif // Class aom_write_symbol(w, mv_class, mvcomp->class_cdf, MV_CLASSES); // Integer bits if (mv_class == MV_CLASS_0) { +#if CONFIG_NEW_MULTISYMBOL + aom_write_symbol(w, d, mvcomp->class0_cdf, CLASS0_SIZE); +#else aom_write(w, d, mvcomp->class0[0]); +#endif } else { int i; const int n = mv_class + CLASS0_BITS - 1; // number of bits for (i = 0; i < n; ++i) aom_write(w, (d >> i) & 1, mvcomp->bits[i]); } - // Fractional bits - aom_write_symbol( - w, fr, mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf, - MV_FP_SIZE); +// Fractional bits +#if CONFIG_INTRABC + if (precision > MV_SUBPEL_NONE) +#endif // CONFIG_INTRABC + { + aom_write_symbol(w, fr, mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] + : mvcomp->fp_cdf, + MV_FP_SIZE); + } // High precision bit - if (usehp) + if (precision > MV_SUBPEL_LOW_PRECISION) +#if CONFIG_NEW_MULTISYMBOL + aom_write_symbol( + w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf, + 2); +#else aom_write(w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp : mvcomp->hp); +#endif } static void build_nmv_component_cost_table(int *mvcost, const nmv_component *const mvcomp, - int usehp) { + MvSubpelPrecision precision) { int i, v; int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE]; int bits_cost[MV_OFFSET_BITS][2]; @@ -89,7 +108,7 @@ static void build_nmv_component_cost_table(int *mvcost, av1_cost_tokens(class0_fp_cost[i], mvcomp->class0_fp[i], av1_mv_fp_tree); av1_cost_tokens(fp_cost, mvcomp->fp, av1_mv_fp_tree); - if (usehp) { + if (precision > MV_SUBPEL_LOW_PRECISION) { class0_hp_cost[0] = av1_cost_zero(mvcomp->class0_hp); class0_hp_cost[1] = av1_cost_one(mvcomp->class0_hp); hp_cost[0] = av1_cost_zero(mvcomp->hp); @@ -110,16 +129,21 @@ static void build_nmv_component_cost_table(int *mvcost, const int b = c + CLASS0_BITS - 1; /* number of bits */ for (i = 0; i < b; ++i) cost += bits_cost[i][((d >> i) & 1)]; } - if (c == MV_CLASS_0) { - cost += class0_fp_cost[d][f]; - } else { - cost += fp_cost[f]; - } - if (usehp) { +#if CONFIG_INTRABC + if (precision > MV_SUBPEL_NONE) +#endif // CONFIG_INTRABC + { if (c == MV_CLASS_0) { - cost += class0_hp_cost[e]; + cost += class0_fp_cost[d][f]; } else { - cost += hp_cost[e]; + cost += fp_cost[f]; + } + if (precision > MV_SUBPEL_LOW_PRECISION) { + if (c == MV_CLASS_0) { + cost += class0_hp_cost[e]; + } else { + cost += hp_cost[e]; + } } } mvcost[v] = cost + sign_cost[0]; @@ -127,36 +151,16 @@ static void build_nmv_component_cost_table(int *mvcost, } } +#if !CONFIG_NEW_MULTISYMBOL static void update_mv(aom_writer *w, const unsigned int ct[2], aom_prob *cur_p, aom_prob upd_p) { (void)upd_p; -#if CONFIG_TILE_GROUPS // Just use the default maximum number of tile groups to avoid passing in the // actual // number av1_cond_prob_diff_update(w, cur_p, ct, DEFAULT_MAX_NUM_TG); -#else - av1_cond_prob_diff_update(w, cur_p, ct, 1); -#endif } -#if !CONFIG_EC_ADAPT -static void write_mv_update(const aom_tree_index *tree, - aom_prob probs[/*n - 1*/], - const unsigned int counts[/*n - 1*/], int n, - aom_writer *w) { - int i; - unsigned int branch_ct[32][2]; - - // Assuming max number of probabilities <= 32 - assert(n <= 32); - - av1_tree_probs_from_distribution(tree, branch_ct, counts); - for (i = 0; i < n - 1; ++i) - update_mv(w, branch_ct[i], &probs[i], MV_UPDATE_PROB); -} -#endif - void av1_write_nmv_probs(AV1_COMMON *cm, int usehp, aom_writer *w, nmv_context_counts *const nmv_counts) { int i; @@ -164,34 +168,6 @@ void av1_write_nmv_probs(AV1_COMMON *cm, int usehp, aom_writer *w, for (nmv_ctx = 0; nmv_ctx < NMV_CONTEXTS; ++nmv_ctx) { nmv_context *const mvc = &cm->fc->nmvc[nmv_ctx]; nmv_context_counts *const counts = &nmv_counts[nmv_ctx]; -#if !CONFIG_EC_ADAPT - write_mv_update(av1_mv_joint_tree, mvc->joints, counts->joints, MV_JOINTS, - w); - - for (i = 0; i < 2; ++i) { - int j; - nmv_component *comp = &mvc->comps[i]; - nmv_component_counts *comp_counts = &counts->comps[i]; - - update_mv(w, comp_counts->sign, &comp->sign, MV_UPDATE_PROB); - write_mv_update(av1_mv_class_tree, comp->classes, comp_counts->classes, - MV_CLASSES, w); - write_mv_update(av1_mv_class0_tree, comp->class0, comp_counts->class0, - CLASS0_SIZE, w); - for (j = 0; j < MV_OFFSET_BITS; ++j) - update_mv(w, comp_counts->bits[j], &comp->bits[j], MV_UPDATE_PROB); - } - - for (i = 0; i < 2; ++i) { - int j; - for (j = 0; j < CLASS0_SIZE; ++j) - write_mv_update(av1_mv_fp_tree, mvc->comps[i].class0_fp[j], - counts->comps[i].class0_fp[j], MV_FP_SIZE, w); - - write_mv_update(av1_mv_fp_tree, mvc->comps[i].fp, counts->comps[i].fp, - MV_FP_SIZE, w); - } -#endif if (usehp) { for (i = 0; i < 2; ++i) { @@ -202,6 +178,7 @@ void av1_write_nmv_probs(AV1_COMMON *cm, int usehp, aom_writer *w, } } } +#endif void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, const MV *mv, const MV *ref, nmv_context *mvctx, int usehp) { @@ -230,18 +207,19 @@ void av1_encode_dv(aom_writer *w, const MV *mv, const MV *ref, aom_write_symbol(w, j, mvctx->joint_cdf, MV_JOINTS); if (mv_joint_vertical(j)) - encode_mv_component(w, diff.row, &mvctx->comps[0], 0); + encode_mv_component(w, diff.row, &mvctx->comps[0], MV_SUBPEL_NONE); if (mv_joint_horizontal(j)) - encode_mv_component(w, diff.col, &mvctx->comps[1], 0); + encode_mv_component(w, diff.col, &mvctx->comps[1], MV_SUBPEL_NONE); } #endif // CONFIG_INTRABC void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2], - const nmv_context *ctx, int usehp) { + const nmv_context *ctx, + MvSubpelPrecision precision) { av1_cost_tokens(mvjoint, ctx->joints, av1_mv_joint_tree); - build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], usehp); - build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp); + build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], precision); + build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], precision); } #if CONFIG_EXT_INTER @@ -284,6 +262,27 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext, mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx); nmv_context_counts *counts = &nmv_counts[nmv_ctx]; av1_inc_mv(&diff, counts, 1); +#if CONFIG_COMPOUND_SINGLEREF + } else { + assert( // mode == SR_NEAREST_NEWMV || + mode == SR_NEAR_NEWMV || mode == SR_ZERO_NEWMV || mode == SR_NEW_NEWMV); + const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv; + int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame); + int nmv_ctx = + av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type], + mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx); + nmv_context_counts *counts = &nmv_counts[nmv_ctx]; + (void)pred_mvs; + MV diff; + if (mode == SR_NEW_NEWMV) { + diff.row = mvs[0].as_mv.row - ref->row; + diff.col = mvs[0].as_mv.col - ref->col; + av1_inc_mv(&diff, counts, 1); + } + diff.row = mvs[1].as_mv.row - ref->row; + diff.col = mvs[1].as_mv.col - ref->col; + av1_inc_mv(&diff, counts, 1); +#endif // CONFIG_COMPOUND_SINGLEREF } } @@ -328,7 +327,7 @@ static void inc_mvs_sub8x8(const MODE_INFO *mi, int block, const int_mv mvs[2], av1_inc_mv(&diff, counts, 1); } } -#else +#else // !CONFIG_EXT_INTER static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext, const int_mv mvs[2], const int_mv pred_mvs[2], nmv_context_counts *nmv_counts) { diff --git a/third_party/aom/av1/encoder/encodemv.h b/third_party/aom/av1/encoder/encodemv.h index 6d442147f..8689cec27 100644 --- a/third_party/aom/av1/encoder/encodemv.h +++ b/third_party/aom/av1/encoder/encodemv.h @@ -20,14 +20,17 @@ extern "C" { void av1_entropy_mv_init(void); +#if !CONFIG_NEW_MULTISYMBOL void av1_write_nmv_probs(AV1_COMMON *cm, int usehp, aom_writer *w, nmv_context_counts *const counts); +#endif void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, const MV *mv, const MV *ref, nmv_context *mvctx, int usehp); void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2], - const nmv_context *mvctx, int usehp); + const nmv_context *mvctx, + MvSubpelPrecision precision); void av1_update_mv_count(ThreadData *td); diff --git a/third_party/aom/av1/encoder/encoder.c b/third_party/aom/av1/encoder/encoder.c index 4782ce2b7..943e2c6a0 100644 --- a/third_party/aom/av1/encoder/encoder.c +++ b/third_party/aom/av1/encoder/encoder.c @@ -18,7 +18,6 @@ #include "av1/common/alloccommon.h" #if CONFIG_CDEF #include "av1/common/cdef.h" -#include "av1/common/clpf.h" #endif // CONFIG_CDEF #include "av1/common/filter.h" #include "av1/common/idct.h" @@ -31,6 +30,9 @@ #include "av1/encoder/aq_cyclicrefresh.h" #include "av1/encoder/aq_variance.h" #include "av1/encoder/bitstream.h" +#if CONFIG_BGSPRITE +#include "av1/encoder/bgsprite.h" +#endif // CONFIG_BGSPRITE #if CONFIG_ANS #include "aom_dsp/buf_ans.h" #endif @@ -73,6 +75,8 @@ #if CONFIG_ENTROPY_STATS FRAME_COUNTS aggregate_fc; +// Aggregate frame counts per frame context type +FRAME_COUNTS aggregate_fc_per_type[FRAME_CONTEXTS]; #endif // CONFIG_ENTROPY_STATS #define AM_SEGMENT_ID_INACTIVE 7 @@ -421,7 +425,6 @@ void av1_initialize_enc(void) { static void dealloc_compressor_data(AV1_COMP *cpi) { AV1_COMMON *const cm = &cpi->common; - int i; aom_free(cpi->mbmi_ext_base); cpi->mbmi_ext_base = NULL; @@ -467,10 +470,6 @@ static void dealloc_compressor_data(AV1_COMP *cpi) { cpi->td.mb.mask_buf = NULL; #endif - // Free up-sampled reference buffers. - for (i = 0; i < (REF_FRAMES + 1); i++) - aom_free_frame_buffer(&cpi->upsampled_ref_bufs[i].buf); - av1_free_ref_frame_buffers(cm->buffer_pool); #if CONFIG_LV_MAP av1_free_txb_buf(cpi); @@ -483,8 +482,11 @@ static void dealloc_compressor_data(AV1_COMP *cpi) { aom_free_frame_buffer(&cpi->last_frame_db); aom_free_frame_buffer(&cpi->trial_frame_rst); aom_free(cpi->extra_rstbuf); - for (i = 0; i < MAX_MB_PLANE; ++i) - av1_free_restoration_struct(&cpi->rst_search[i]); + { + int i; + for (i = 0; i < MAX_MB_PLANE; ++i) + av1_free_restoration_struct(&cpi->rst_search[i]); + } #endif // CONFIG_LOOP_RESTORATION aom_free_frame_buffer(&cpi->scaled_source); aom_free_frame_buffer(&cpi->scaled_last_source); @@ -497,8 +499,7 @@ static void dealloc_compressor_data(AV1_COMP *cpi) { av1_free_pc_tree(&cpi->td); #if CONFIG_PALETTE - if (cpi->common.allow_screen_content_tools) - aom_free(cpi->td.mb.palette_buffer); + aom_free(cpi->td.mb.palette_buffer); #endif // CONFIG_PALETTE #if CONFIG_ANS @@ -735,13 +736,18 @@ static void alloc_util_frame_buffers(AV1_COMP *cpi) { NULL, NULL)) aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, "Failed to allocate last frame deblocked buffer"); - if (aom_realloc_frame_buffer(&cpi->trial_frame_rst, cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, + if (aom_realloc_frame_buffer( + &cpi->trial_frame_rst, +#if CONFIG_FRAME_SUPERRES + cm->superres_upscaled_width, cm->superres_upscaled_height, +#else + cm->width, cm->height, +#endif // CONFIG_FRAME_SUPERRES + cm->subsampling_x, cm->subsampling_y, #if CONFIG_HIGHBITDEPTH - cm->use_highbitdepth, + cm->use_highbitdepth, #endif - AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, - NULL, NULL)) + AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, "Failed to allocate trial restored frame buffer"); int extra_rstbuf_sz = RESTORATION_EXTBUF_SIZE; @@ -821,93 +827,107 @@ void av1_new_framerate(AV1_COMP *cpi, double framerate) { static void set_tile_info(AV1_COMP *cpi) { AV1_COMMON *const cm = &cpi->common; -#if CONFIG_TILE_GROUPS && CONFIG_DEPENDENT_HORZTILES +#if CONFIG_DEPENDENT_HORZTILES int tile_row, tile_col, num_tiles_in_tg; int tg_row_start, tg_col_start; #endif #if CONFIG_EXT_TILE + if (cpi->oxcf.large_scale_tile) { #if CONFIG_EXT_PARTITION - if (cpi->oxcf.superblock_size != AOM_SUPERBLOCK_SIZE_64X64) { - cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 32); - cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 32); - cm->tile_width <<= MAX_MIB_SIZE_LOG2; - cm->tile_height <<= MAX_MIB_SIZE_LOG2; - } else { + if (cpi->oxcf.superblock_size != AOM_SUPERBLOCK_SIZE_64X64) { + cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 32); + cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 32); + cm->tile_width <<= MAX_MIB_SIZE_LOG2; + cm->tile_height <<= MAX_MIB_SIZE_LOG2; + } else { + cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 64); + cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 64); + cm->tile_width <<= MAX_MIB_SIZE_LOG2 - 1; + cm->tile_height <<= MAX_MIB_SIZE_LOG2 - 1; + } +#else cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 64); cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 64); - cm->tile_width <<= MAX_MIB_SIZE_LOG2 - 1; - cm->tile_height <<= MAX_MIB_SIZE_LOG2 - 1; - } -#else - cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 64); - cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 64); - cm->tile_width <<= MAX_MIB_SIZE_LOG2; - cm->tile_height <<= MAX_MIB_SIZE_LOG2; + cm->tile_width <<= MAX_MIB_SIZE_LOG2; + cm->tile_height <<= MAX_MIB_SIZE_LOG2; #endif // CONFIG_EXT_PARTITION - cm->tile_width = AOMMIN(cm->tile_width, cm->mi_cols); - cm->tile_height = AOMMIN(cm->tile_height, cm->mi_rows); + cm->tile_width = AOMMIN(cm->tile_width, cm->mi_cols); + cm->tile_height = AOMMIN(cm->tile_height, cm->mi_rows); - assert(cm->tile_width >> MAX_MIB_SIZE <= 32); - assert(cm->tile_height >> MAX_MIB_SIZE <= 32); + assert(cm->tile_width >> MAX_MIB_SIZE <= 32); + assert(cm->tile_height >> MAX_MIB_SIZE <= 32); - // Get the number of tiles - cm->tile_cols = 1; - while (cm->tile_cols * cm->tile_width < cm->mi_cols) ++cm->tile_cols; + // Get the number of tiles + cm->tile_cols = 1; + while (cm->tile_cols * cm->tile_width < cm->mi_cols) ++cm->tile_cols; - cm->tile_rows = 1; - while (cm->tile_rows * cm->tile_height < cm->mi_rows) ++cm->tile_rows; -#else - int min_log2_tile_cols, max_log2_tile_cols; - av1_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); + cm->tile_rows = 1; + while (cm->tile_rows * cm->tile_height < cm->mi_rows) ++cm->tile_rows; + } else { +#endif // CONFIG_EXT_TILE + int min_log2_tile_cols, max_log2_tile_cols; + av1_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); - cm->log2_tile_cols = - clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols); - cm->log2_tile_rows = cpi->oxcf.tile_rows; + cm->log2_tile_cols = + clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols); + cm->log2_tile_rows = cpi->oxcf.tile_rows; - cm->tile_cols = 1 << cm->log2_tile_cols; - cm->tile_rows = 1 << cm->log2_tile_rows; + cm->tile_cols = 1 << cm->log2_tile_cols; + cm->tile_rows = 1 << cm->log2_tile_rows; - cm->tile_width = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2); - cm->tile_width >>= cm->log2_tile_cols; - cm->tile_height = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2); - cm->tile_height >>= cm->log2_tile_rows; + cm->tile_width = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2); + cm->tile_width >>= cm->log2_tile_cols; + cm->tile_height = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2); + cm->tile_height >>= cm->log2_tile_rows; - // round to integer multiples of max superblock size - cm->tile_width = ALIGN_POWER_OF_TWO(cm->tile_width, MAX_MIB_SIZE_LOG2); - cm->tile_height = ALIGN_POWER_OF_TWO(cm->tile_height, MAX_MIB_SIZE_LOG2); + // round to integer multiples of max superblock size + cm->tile_width = ALIGN_POWER_OF_TWO(cm->tile_width, MAX_MIB_SIZE_LOG2); + cm->tile_height = ALIGN_POWER_OF_TWO(cm->tile_height, MAX_MIB_SIZE_LOG2); +#if CONFIG_EXT_TILE + } #endif // CONFIG_EXT_TILE #if CONFIG_DEPENDENT_HORZTILES cm->dependent_horz_tiles = cpi->oxcf.dependent_horz_tiles; #if CONFIG_EXT_TILE - if (cm->tile_rows <= 1) cm->dependent_horz_tiles = 0; -#else - if (cm->log2_tile_rows == 0) cm->dependent_horz_tiles = 0; -#endif -#if CONFIG_TILE_GROUPS - if (cpi->oxcf.mtu == 0) { - cm->num_tg = cpi->oxcf.num_tile_groups; + if (cm->large_scale_tile) { + // May not needed since cpi->oxcf.dependent_horz_tiles is already adjusted. + cm->dependent_horz_tiles = 0; } else { - // Use a default value for the purposes of weighting costs in probability - // updates - cm->num_tg = DEFAULT_MAX_NUM_TG; +#endif // CONFIG_EXT_TILE + if (cm->log2_tile_rows == 0) cm->dependent_horz_tiles = 0; +#if CONFIG_EXT_TILE } - num_tiles_in_tg = - (cm->tile_cols * cm->tile_rows + cm->num_tg - 1) / cm->num_tg; - tg_row_start = 0; - tg_col_start = 0; - for (tile_row = 0; tile_row < cm->tile_rows; ++tile_row) { - for (tile_col = 0; tile_col < cm->tile_cols; ++tile_col) { - if ((tile_row * cm->tile_cols + tile_col) % num_tiles_in_tg == 0) { - tg_row_start = tile_row; - tg_col_start = tile_col; +#endif // CONFIG_EXT_TILE + +#if CONFIG_EXT_TILE + if (!cm->large_scale_tile) { +#endif // CONFIG_EXT_TILE + if (cpi->oxcf.mtu == 0) { + cm->num_tg = cpi->oxcf.num_tile_groups; + } else { + // Use a default value for the purposes of weighting costs in probability + // updates + cm->num_tg = DEFAULT_MAX_NUM_TG; + } + num_tiles_in_tg = + (cm->tile_cols * cm->tile_rows + cm->num_tg - 1) / cm->num_tg; + tg_row_start = 0; + tg_col_start = 0; + for (tile_row = 0; tile_row < cm->tile_rows; ++tile_row) { + for (tile_col = 0; tile_col < cm->tile_cols; ++tile_col) { + if ((tile_row * cm->tile_cols + tile_col) % num_tiles_in_tg == 0) { + tg_row_start = tile_row; + tg_col_start = tile_col; + } + cm->tile_group_start_row[tile_row][tile_col] = tg_row_start; + cm->tile_group_start_col[tile_row][tile_col] = tg_col_start; } - cm->tile_group_start_row[tile_row][tile_col] = tg_row_start; - cm->tile_group_start_col[tile_row][tile_col] = tg_col_start; } +#if CONFIG_EXT_TILE } -#endif +#endif // CONFIG_EXT_TILE #endif #if CONFIG_LOOPFILTERING_ACROSS_TILES @@ -965,6 +985,10 @@ static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) { cm->use_highbitdepth = oxcf->use_highbitdepth; #endif cm->color_space = oxcf->color_space; +#if CONFIG_COLORSPACE_HEADERS + cm->transfer_function = oxcf->transfer_function; + cm->chroma_sample_position = oxcf->chroma_sample_position; +#endif cm->color_range = oxcf->color_range; cm->width = oxcf->width; @@ -1175,6 +1199,21 @@ MAKE_BFP_SAD3_WRAPPER(aom_highbd_sad4x4x3) MAKE_BFP_SAD8_WRAPPER(aom_highbd_sad4x4x8) MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x4x4d) +#if CONFIG_EXT_PARTITION_TYPES +MAKE_BFP_SAD_WRAPPER(aom_highbd_sad4x16) +MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad4x16_avg) +MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x16x4d) +MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x4) +MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x4_avg) +MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x4x4d) +MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x32) +MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x32_avg) +MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x32x4d) +MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x8) +MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x8_avg) +MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x8x4d) +#endif + #if CONFIG_EXT_INTER #define HIGHBD_MBFP(BT, MCSDF, MCSVF) \ cpi->fn_ptr[BT].msdf = MCSDF; \ @@ -1223,6 +1262,13 @@ MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x8) MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x4) MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x8) MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x4) + +#if CONFIG_EXT_PARTITION_TYPES +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x16) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x4) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x32) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x8) +#endif #endif // CONFIG_EXT_INTER #if CONFIG_MOTION_VAR @@ -1266,6 +1312,13 @@ MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x8) MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x4) MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x8) MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x4) + +#if CONFIG_EXT_PARTITION_TYPES +MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x16) +MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x4) +MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x32) +MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x8) +#endif #endif // CONFIG_MOTION_VAR static void highbd_set_var_fns(AV1_COMP *const cpi) { @@ -1273,6 +1326,32 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) { if (cm->use_highbitdepth) { switch (cm->bit_depth) { case AOM_BITS_8: +#if CONFIG_EXT_PARTITION_TYPES + HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits8, + aom_highbd_sad32x8_avg_bits8, aom_highbd_8_variance32x8, + aom_highbd_8_sub_pixel_variance32x8, + aom_highbd_8_sub_pixel_avg_variance32x8, NULL, NULL, + aom_highbd_sad32x8x4d_bits8) + + HIGHBD_BFP(BLOCK_8X32, aom_highbd_sad8x32_bits8, + aom_highbd_sad8x32_avg_bits8, aom_highbd_8_variance8x32, + aom_highbd_8_sub_pixel_variance8x32, + aom_highbd_8_sub_pixel_avg_variance8x32, NULL, NULL, + aom_highbd_sad8x32x4d_bits8) + + HIGHBD_BFP(BLOCK_16X4, aom_highbd_sad16x4_bits8, + aom_highbd_sad16x4_avg_bits8, aom_highbd_8_variance16x4, + aom_highbd_8_sub_pixel_variance16x4, + aom_highbd_8_sub_pixel_avg_variance16x4, NULL, NULL, + aom_highbd_sad16x4x4d_bits8) + + HIGHBD_BFP(BLOCK_4X16, aom_highbd_sad4x16_bits8, + aom_highbd_sad4x16_avg_bits8, aom_highbd_8_variance4x16, + aom_highbd_8_sub_pixel_variance4x16, + aom_highbd_8_sub_pixel_avg_variance4x16, NULL, NULL, + aom_highbd_sad4x16x4d_bits8) +#endif + HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits8, aom_highbd_sad32x16_avg_bits8, aom_highbd_8_variance32x16, aom_highbd_8_sub_pixel_variance32x16, @@ -1354,7 +1433,7 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) { aom_highbd_8_sub_pixel_avg_variance4x4, aom_highbd_sad4x4x3_bits8, aom_highbd_sad4x4x8_bits8, aom_highbd_sad4x4x4d_bits8) -#if CONFIG_CB4X4 +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 HIGHBD_BFP(BLOCK_2X2, NULL, NULL, aom_highbd_8_variance2x2, NULL, NULL, NULL, NULL, NULL) HIGHBD_BFP(BLOCK_4X2, NULL, NULL, aom_highbd_8_variance4x2, NULL, NULL, @@ -1420,6 +1499,19 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) { aom_highbd_8_masked_sub_pixel_variance8x4) HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits8, aom_highbd_8_masked_sub_pixel_variance4x4) +#if CONFIG_EXT_PARTITION_TYPES + HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits8, + aom_highbd_8_masked_sub_pixel_variance32x8) + + HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits8, + aom_highbd_8_masked_sub_pixel_variance8x32) + + HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits8, + aom_highbd_8_masked_sub_pixel_variance16x4) + + HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits8, + aom_highbd_8_masked_sub_pixel_variance4x16) +#endif #endif // CONFIG_EXT_INTER #if CONFIG_MOTION_VAR #if CONFIG_EXT_PARTITION @@ -1472,10 +1564,53 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) { HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits8, aom_highbd_obmc_variance4x4, aom_highbd_obmc_sub_pixel_variance4x4) +#if CONFIG_EXT_PARTITION_TYPES + HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits8, + aom_highbd_obmc_variance32x8, + aom_highbd_obmc_sub_pixel_variance32x8) + + HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits8, + aom_highbd_obmc_variance8x32, + aom_highbd_obmc_sub_pixel_variance8x32) + + HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits8, + aom_highbd_obmc_variance16x4, + aom_highbd_obmc_sub_pixel_variance16x4) + + HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits8, + aom_highbd_obmc_variance4x16, + aom_highbd_obmc_sub_pixel_variance4x16) +#endif #endif // CONFIG_MOTION_VAR break; case AOM_BITS_10: +#if CONFIG_EXT_PARTITION_TYPES + HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits10, + aom_highbd_sad32x8_avg_bits10, aom_highbd_10_variance32x8, + aom_highbd_10_sub_pixel_variance32x8, + aom_highbd_10_sub_pixel_avg_variance32x8, NULL, NULL, + aom_highbd_sad32x8x4d_bits10) + + HIGHBD_BFP(BLOCK_8X32, aom_highbd_sad8x32_bits10, + aom_highbd_sad8x32_avg_bits10, aom_highbd_10_variance8x32, + aom_highbd_10_sub_pixel_variance8x32, + aom_highbd_10_sub_pixel_avg_variance8x32, NULL, NULL, + aom_highbd_sad8x32x4d_bits10) + + HIGHBD_BFP(BLOCK_16X4, aom_highbd_sad16x4_bits10, + aom_highbd_sad16x4_avg_bits10, aom_highbd_10_variance16x4, + aom_highbd_10_sub_pixel_variance16x4, + aom_highbd_10_sub_pixel_avg_variance16x4, NULL, NULL, + aom_highbd_sad16x4x4d_bits10) + + HIGHBD_BFP(BLOCK_4X16, aom_highbd_sad4x16_bits10, + aom_highbd_sad4x16_avg_bits10, aom_highbd_10_variance4x16, + aom_highbd_10_sub_pixel_variance4x16, + aom_highbd_10_sub_pixel_avg_variance4x16, NULL, NULL, + aom_highbd_sad4x16x4d_bits10) +#endif + HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits10, aom_highbd_sad32x16_avg_bits10, aom_highbd_10_variance32x16, aom_highbd_10_sub_pixel_variance32x16, @@ -1559,7 +1694,7 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) { aom_highbd_10_sub_pixel_avg_variance4x4, aom_highbd_sad4x4x3_bits10, aom_highbd_sad4x4x8_bits10, aom_highbd_sad4x4x4d_bits10) -#if CONFIG_CB4X4 +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 HIGHBD_BFP(BLOCK_2X2, NULL, NULL, aom_highbd_10_variance2x2, NULL, NULL, NULL, NULL, NULL) HIGHBD_BFP(BLOCK_4X2, NULL, NULL, aom_highbd_10_variance4x2, NULL, NULL, @@ -1627,6 +1762,19 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) { aom_highbd_10_masked_sub_pixel_variance8x4) HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits10, aom_highbd_10_masked_sub_pixel_variance4x4) +#if CONFIG_EXT_PARTITION_TYPES + HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits10, + aom_highbd_10_masked_sub_pixel_variance32x8) + + HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits10, + aom_highbd_10_masked_sub_pixel_variance8x32) + + HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits10, + aom_highbd_10_masked_sub_pixel_variance16x4) + + HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits10, + aom_highbd_10_masked_sub_pixel_variance4x16) +#endif #endif // CONFIG_EXT_INTER #if CONFIG_MOTION_VAR #if CONFIG_EXT_PARTITION @@ -1679,10 +1827,53 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) { HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits10, aom_highbd_10_obmc_variance4x4, aom_highbd_10_obmc_sub_pixel_variance4x4) +#if CONFIG_EXT_PARTITION_TYPES + HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits10, + aom_highbd_10_obmc_variance32x8, + aom_highbd_10_obmc_sub_pixel_variance32x8) + + HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits10, + aom_highbd_10_obmc_variance8x32, + aom_highbd_10_obmc_sub_pixel_variance8x32) + + HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits10, + aom_highbd_10_obmc_variance16x4, + aom_highbd_10_obmc_sub_pixel_variance16x4) + + HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits10, + aom_highbd_10_obmc_variance4x16, + aom_highbd_10_obmc_sub_pixel_variance4x16) +#endif #endif // CONFIG_MOTION_VAR break; case AOM_BITS_12: +#if CONFIG_EXT_PARTITION_TYPES + HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits12, + aom_highbd_sad32x8_avg_bits12, aom_highbd_12_variance32x8, + aom_highbd_12_sub_pixel_variance32x8, + aom_highbd_12_sub_pixel_avg_variance32x8, NULL, NULL, + aom_highbd_sad32x8x4d_bits12) + + HIGHBD_BFP(BLOCK_8X32, aom_highbd_sad8x32_bits12, + aom_highbd_sad8x32_avg_bits12, aom_highbd_12_variance8x32, + aom_highbd_12_sub_pixel_variance8x32, + aom_highbd_12_sub_pixel_avg_variance8x32, NULL, NULL, + aom_highbd_sad8x32x4d_bits12) + + HIGHBD_BFP(BLOCK_16X4, aom_highbd_sad16x4_bits12, + aom_highbd_sad16x4_avg_bits12, aom_highbd_12_variance16x4, + aom_highbd_12_sub_pixel_variance16x4, + aom_highbd_12_sub_pixel_avg_variance16x4, NULL, NULL, + aom_highbd_sad16x4x4d_bits12) + + HIGHBD_BFP(BLOCK_4X16, aom_highbd_sad4x16_bits12, + aom_highbd_sad4x16_avg_bits12, aom_highbd_12_variance4x16, + aom_highbd_12_sub_pixel_variance4x16, + aom_highbd_12_sub_pixel_avg_variance4x16, NULL, NULL, + aom_highbd_sad4x16x4d_bits12) +#endif + HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits12, aom_highbd_sad32x16_avg_bits12, aom_highbd_12_variance32x16, aom_highbd_12_sub_pixel_variance32x16, @@ -1766,7 +1957,7 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) { aom_highbd_12_sub_pixel_avg_variance4x4, aom_highbd_sad4x4x3_bits12, aom_highbd_sad4x4x8_bits12, aom_highbd_sad4x4x4d_bits12) -#if CONFIG_CB4X4 +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 HIGHBD_BFP(BLOCK_2X2, NULL, NULL, aom_highbd_12_variance2x2, NULL, NULL, NULL, NULL, NULL) HIGHBD_BFP(BLOCK_4X2, NULL, NULL, aom_highbd_12_variance4x2, NULL, NULL, @@ -1834,6 +2025,19 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) { aom_highbd_12_masked_sub_pixel_variance8x4) HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits12, aom_highbd_12_masked_sub_pixel_variance4x4) +#if CONFIG_EXT_PARTITION_TYPES + HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits12, + aom_highbd_12_masked_sub_pixel_variance32x8) + + HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits12, + aom_highbd_12_masked_sub_pixel_variance8x32) + + HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits12, + aom_highbd_12_masked_sub_pixel_variance16x4) + + HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits12, + aom_highbd_12_masked_sub_pixel_variance4x16) +#endif #endif // CONFIG_EXT_INTER #if CONFIG_MOTION_VAR @@ -1887,6 +2091,23 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) { HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits12, aom_highbd_12_obmc_variance4x4, aom_highbd_12_obmc_sub_pixel_variance4x4) +#if CONFIG_EXT_PARTITION_TYPES + HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits12, + aom_highbd_12_obmc_variance32x8, + aom_highbd_12_obmc_sub_pixel_variance32x8) + + HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits12, + aom_highbd_12_obmc_variance8x32, + aom_highbd_12_obmc_sub_pixel_variance8x32) + + HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits12, + aom_highbd_12_obmc_variance16x4, + aom_highbd_12_obmc_sub_pixel_variance16x4) + + HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits12, + aom_highbd_12_obmc_variance4x16, + aom_highbd_12_obmc_sub_pixel_variance4x16) +#endif #endif // CONFIG_MOTION_VAR break; @@ -1933,10 +2154,15 @@ void set_compound_tools(AV1_COMMON *cm) { void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) { AV1_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; + MACROBLOCK *const x = &cpi->td.mb; if (cm->profile != oxcf->profile) cm->profile = oxcf->profile; cm->bit_depth = oxcf->bit_depth; cm->color_space = oxcf->color_space; +#if CONFIG_COLORSPACE_HEADERS + cm->transfer_function = oxcf->transfer_function; + cm->chroma_sample_position = oxcf->chroma_sample_position; +#endif cm->color_range = oxcf->color_range; if (cm->profile <= PROFILE_1) @@ -1945,9 +2171,9 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) { assert(cm->bit_depth > AOM_BITS_8); cpi->oxcf = *oxcf; - cpi->td.mb.e_mbd.bd = (int)cm->bit_depth; + x->e_mbd.bd = (int)cm->bit_depth; #if CONFIG_GLOBAL_MOTION - cpi->td.mb.e_mbd.global_motion = cm->global_motion; + x->e_mbd.global_motion = cm->global_motion; #endif // CONFIG_GLOBAL_MOTION if ((oxcf->pass == 0) && (oxcf->rc_mode == AOM_Q)) { @@ -1969,17 +2195,9 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) { cm->reset_frame_context = RESET_FRAME_CONTEXT_NONE; #if CONFIG_PALETTE - cm->allow_screen_content_tools = (cpi->oxcf.content == AOM_CONTENT_SCREEN); - if (cm->allow_screen_content_tools) { - MACROBLOCK *x = &cpi->td.mb; - if (x->palette_buffer == 0) { - CHECK_MEM_ERROR(cm, x->palette_buffer, - aom_memalign(16, sizeof(*x->palette_buffer))); - } - // Reallocate the pc_tree, as it's contents depends on - // the state of cm->allow_screen_content_tools - av1_free_pc_tree(&cpi->td); - av1_setup_pc_tree(&cpi->common, &cpi->td); + if (x->palette_buffer == NULL) { + CHECK_MEM_ERROR(cm, x->palette_buffer, + aom_memalign(16, sizeof(*x->palette_buffer))); } #endif // CONFIG_PALETTE #if CONFIG_EXT_INTER @@ -2058,15 +2276,6 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) { #endif // CONFIG_ANS && ANS_MAX_SYMBOLS } -static INLINE void init_upsampled_ref_frame_bufs(AV1_COMP *cpi) { - int i; - - for (i = 0; i < (REF_FRAMES + 1); ++i) { - cpi->upsampled_ref_bufs[i].ref_count = 0; - cpi->upsampled_ref_idx[i] = INVALID_IDX; - } -} - AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, BufferPool *const pool) { unsigned int i; @@ -2099,10 +2308,6 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, cpi->resize_state = 0; cpi->resize_avg_qp = 0; cpi->resize_buffer_underflow = 0; - cpi->resize_scale_num = 16; - cpi->resize_scale_den = 16; - cpi->resize_next_scale_num = 16; - cpi->resize_next_scale_den = 16; cpi->common.buffer_pool = pool; @@ -2197,6 +2402,7 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, #endif #if CONFIG_ENTROPY_STATS av1_zero(aggregate_fc); + av1_zero_array(aggregate_fc_per_type, FRAME_CONTEXTS); #endif // CONFIG_ENTROPY_STATS cpi->first_time_stamp_ever = INT64_MAX; @@ -2278,8 +2484,6 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, #endif - init_upsampled_ref_frame_bufs(cpi); - av1_set_speed_features_framesize_independent(cpi); av1_set_speed_features_framesize_dependent(cpi); @@ -2293,6 +2497,24 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, cpi->fn_ptr[BT].sdx8f = SDX8F; \ cpi->fn_ptr[BT].sdx4df = SDX4DF; +#if CONFIG_EXT_PARTITION_TYPES + BFP(BLOCK_4X16, aom_sad4x16, aom_sad4x16_avg, aom_variance4x16, + aom_sub_pixel_variance4x16, aom_sub_pixel_avg_variance4x16, NULL, NULL, + aom_sad4x16x4d) + + BFP(BLOCK_16X4, aom_sad16x4, aom_sad16x4_avg, aom_variance16x4, + aom_sub_pixel_variance16x4, aom_sub_pixel_avg_variance16x4, NULL, NULL, + aom_sad16x4x4d) + + BFP(BLOCK_8X32, aom_sad8x32, aom_sad8x32_avg, aom_variance8x32, + aom_sub_pixel_variance8x32, aom_sub_pixel_avg_variance8x32, NULL, NULL, + aom_sad8x32x4d) + + BFP(BLOCK_32X8, aom_sad32x8, aom_sad32x8_avg, aom_variance32x8, + aom_sub_pixel_variance32x8, aom_sub_pixel_avg_variance32x8, NULL, NULL, + aom_sad32x8x4d) +#endif + #if CONFIG_EXT_PARTITION BFP(BLOCK_128X128, aom_sad128x128, aom_sad128x128_avg, aom_variance128x128, aom_sub_pixel_variance128x128, aom_sub_pixel_avg_variance128x128, @@ -2359,7 +2581,7 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, aom_sub_pixel_variance4x4, aom_sub_pixel_avg_variance4x4, aom_sad4x4x3, aom_sad4x4x8, aom_sad4x4x4d) -#if CONFIG_CB4X4 +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 BFP(BLOCK_2X2, NULL, NULL, aom_variance2x2, NULL, NULL, NULL, NULL, NULL) BFP(BLOCK_2X4, NULL, NULL, aom_variance2x4, NULL, NULL, NULL, NULL, NULL) BFP(BLOCK_4X2, NULL, NULL, aom_variance4x2, NULL, NULL, NULL, NULL, NULL) @@ -2405,6 +2627,20 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, aom_obmc_sub_pixel_variance8x4) OBFP(BLOCK_4X4, aom_obmc_sad4x4, aom_obmc_variance4x4, aom_obmc_sub_pixel_variance4x4) + +#if CONFIG_EXT_PARTITION_TYPES + OBFP(BLOCK_4X16, aom_obmc_sad4x16, aom_obmc_variance4x16, + aom_obmc_sub_pixel_variance4x16) + + OBFP(BLOCK_16X4, aom_obmc_sad16x4, aom_obmc_variance16x4, + aom_obmc_sub_pixel_variance16x4) + + OBFP(BLOCK_8X32, aom_obmc_sad8x32, aom_obmc_variance8x32, + aom_obmc_sub_pixel_variance8x32) + + OBFP(BLOCK_32X8, aom_obmc_sad32x8, aom_obmc_variance32x8, + aom_obmc_sub_pixel_variance32x8) +#endif #endif // CONFIG_MOTION_VAR #if CONFIG_EXT_INTER @@ -2431,6 +2667,16 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, MBFP(BLOCK_4X8, aom_masked_sad4x8, aom_masked_sub_pixel_variance4x8) MBFP(BLOCK_8X4, aom_masked_sad8x4, aom_masked_sub_pixel_variance8x4) MBFP(BLOCK_4X4, aom_masked_sad4x4, aom_masked_sub_pixel_variance4x4) + +#if CONFIG_EXT_PARTITION_TYPES + MBFP(BLOCK_4X16, aom_masked_sad4x16, aom_masked_sub_pixel_variance4x16) + + MBFP(BLOCK_16X4, aom_masked_sad16x4, aom_masked_sub_pixel_variance16x4) + + MBFP(BLOCK_8X32, aom_masked_sad8x32, aom_masked_sub_pixel_variance8x32) + + MBFP(BLOCK_32X8, aom_masked_sad32x8, aom_masked_sub_pixel_variance32x8) +#endif #endif // CONFIG_EXT_INTER #if CONFIG_HIGHBITDEPTH @@ -2449,7 +2695,9 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, av1_loop_filter_init(cm); #if CONFIG_FRAME_SUPERRES - cm->superres_scale_numerator = SUPERRES_SCALE_DENOMINATOR; + cm->superres_scale_numerator = SCALE_DENOMINATOR; + cm->superres_upscaled_width = oxcf->width; + cm->superres_upscaled_height = oxcf->height; #endif // CONFIG_FRAME_SUPERRES #if CONFIG_LOOP_RESTORATION av1_loop_restoration_precal(); @@ -2479,6 +2727,8 @@ void av1_remove_compressor(AV1_COMP *cpi) { fprintf(stderr, "Writing counts.stt\n"); FILE *f = fopen("counts.stt", "wb"); fwrite(&aggregate_fc, sizeof(aggregate_fc), 1, f); + fwrite(aggregate_fc_per_type, sizeof(aggregate_fc_per_type[0]), + FRAME_CONTEXTS, f); fclose(f); } #endif // CONFIG_ENTROPY_STATS @@ -2566,8 +2816,7 @@ void av1_remove_compressor(AV1_COMP *cpi) { // Deallocate allocated thread data. if (t < cpi->num_workers - 1) { #if CONFIG_PALETTE - if (cpi->common.allow_screen_content_tools) - aom_free(thread_data->td->palette_buffer); + aom_free(thread_data->td->palette_buffer); #endif // CONFIG_PALETTE #if CONFIG_MOTION_VAR aom_free(thread_data->td->above_pred_buf); @@ -2835,71 +3084,6 @@ void aom_write_one_yuv_frame(AV1_COMMON *cm, YV12_BUFFER_CONFIG *s) { } #endif // OUTPUT_YUV_REC -#if CONFIG_HIGHBITDEPTH -static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, int planes, - int bd) { -#else -static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, int planes) { -#endif // CONFIG_HIGHBITDEPTH - const int src_w = src->y_crop_width; - const int src_h = src->y_crop_height; - const int dst_w = dst->y_crop_width; - const int dst_h = dst->y_crop_height; - const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer, - src->v_buffer }; - const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride }; - uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer }; - const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride }; - const InterpFilterParams interp_filter_params = - av1_get_interp_filter_params(EIGHTTAP_REGULAR); - const int16_t *kernel = interp_filter_params.filter_ptr; - const int taps = interp_filter_params.taps; - int x, y, i; - - assert(planes <= 3); - for (y = 0; y < dst_h; y += 16) { - for (x = 0; x < dst_w; x += 16) { - for (i = 0; i < planes; ++i) { - const int factor = (i == 0 || i == 3 ? 1 : 2); - const int x_q4 = x * (16 / factor) * src_w / dst_w; - const int y_q4 = y * (16 / factor) * src_h / dst_h; - const int src_stride = src_strides[i]; - const int dst_stride = dst_strides[i]; - const uint8_t *src_ptr = srcs[i] + - (y / factor) * src_h / dst_h * src_stride + - (x / factor) * src_w / dst_w; - uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor); - -#if CONFIG_HIGHBITDEPTH - if (src->flags & YV12_FLAG_HIGHBITDEPTH) { - aom_highbd_convolve8(src_ptr, src_stride, dst_ptr, dst_stride, - &kernel[(x_q4 & 0xf) * taps], 16 * src_w / dst_w, - &kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h, - 16 / factor, 16 / factor, bd); - } else { - aom_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, - &kernel[(x_q4 & 0xf) * taps], 16 * src_w / dst_w, - &kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h, - 16 / factor, 16 / factor); - } -#else - aom_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, - &kernel[(x_q4 & 0xf) * taps], 16 * src_w / dst_w, - &kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h, - 16 / factor, 16 / factor); -#endif // CONFIG_HIGHBITDEPTH - } - } - } - - if (planes == 1) - aom_extend_frame_borders_y(dst); - else - aom_extend_frame_borders(dst); -} - #if CONFIG_GLOBAL_MOTION #define GM_RECODE_LOOP_NUM4X4_FACTOR 192 static int recode_loop_test_global_motion(AV1_COMP *cpi) { @@ -2949,52 +3133,6 @@ static int recode_loop_test(AV1_COMP *cpi, int high_limit, int low_limit, int q, return force_recode; } -static INLINE int get_free_upsampled_ref_buf(EncRefCntBuffer *ubufs) { - int i; - - for (i = 0; i < (REF_FRAMES + 1); i++) { - if (!ubufs[i].ref_count) { - return i; - } - } - return INVALID_IDX; -} - -// Up-sample 1 reference frame. -static INLINE int upsample_ref_frame(AV1_COMP *cpi, - const YV12_BUFFER_CONFIG *const ref) { - AV1_COMMON *const cm = &cpi->common; - EncRefCntBuffer *ubufs = cpi->upsampled_ref_bufs; - int new_uidx = get_free_upsampled_ref_buf(ubufs); - - if (new_uidx == INVALID_IDX) { - return INVALID_IDX; - } else { - YV12_BUFFER_CONFIG *upsampled_ref = &ubufs[new_uidx].buf; - - // Can allocate buffer for Y plane only. - if (upsampled_ref->buffer_alloc_sz < (ref->buffer_alloc_sz << 6)) - if (aom_realloc_frame_buffer(upsampled_ref, (cm->width << 3), - (cm->height << 3), cm->subsampling_x, - cm->subsampling_y, -#if CONFIG_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - (AOM_BORDER_IN_PIXELS << 3), - cm->byte_alignment, NULL, NULL, NULL)) - aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, - "Failed to allocate up-sampled frame buffer"); - -// Currently, only Y plane is up-sampled, U, V are not used. -#if CONFIG_HIGHBITDEPTH - scale_and_extend_frame(ref, upsampled_ref, 1, (int)cm->bit_depth); -#else - scale_and_extend_frame(ref, upsampled_ref, 1); -#endif - return new_uidx; - } -} - #define DUMP_REF_FRAME_IMAGES 0 #if DUMP_REF_FRAME_IMAGES == 1 @@ -3068,34 +3206,50 @@ static INLINE void shift_last_ref_frames(AV1_COMP *cpi) { } #endif // CONFIG_EXT_REFS +#if CONFIG_VAR_REFS +static void enc_check_valid_ref_frames(AV1_COMP *const cpi) { + AV1_COMMON *const cm = &cpi->common; + MV_REFERENCE_FRAME ref_frame; + + // TODO(zoeliu): To handle ALTREF_FRAME the same way as do with other + // reference frames. Current encoder invalid ALTREF when ALTREF + // is the same as LAST, but invalid all the other references + // when they are the same as ALTREF. + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { + int ref_buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); + RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - LAST_FRAME]; + + if (ref_buf_idx != INVALID_IDX) { + ref_buf->is_valid = 1; + + MV_REFERENCE_FRAME ref; + for (ref = LAST_FRAME; ref < ref_frame; ++ref) { + int buf_idx = get_ref_frame_buf_idx(cpi, ref); + RefBuffer *const buf = &cm->frame_refs[ref - LAST_FRAME]; + if (buf->is_valid && buf_idx == ref_buf_idx) { + if (ref_frame != ALTREF_FRAME || ref == LAST_FRAME) { + ref_buf->is_valid = 0; + break; + } else { + buf->is_valid = 0; + } + } + } + } else { + ref_buf->is_valid = 0; + } + } +} +#endif // CONFIG_VAR_REFS + void av1_update_reference_frames(AV1_COMP *cpi) { AV1_COMMON *const cm = &cpi->common; BufferPool *const pool = cm->buffer_pool; - const int use_upsampled_ref = cpi->sf.use_upsampled_references; - int new_uidx = 0; // NOTE: Save the new show frame buffer index for --test-code=warn, i.e., // for the purpose to verify no mismatch between encoder and decoder. if (cm->show_frame) cpi->last_show_frame_buf_idx = cm->new_fb_idx; - if (use_upsampled_ref) { -#if CONFIG_EXT_REFS - if (cm->show_existing_frame) { - new_uidx = cpi->upsampled_ref_idx[cpi->existing_fb_idx_to_show]; - // TODO(zoeliu): Once following is confirmed, remove it. - assert(cpi->upsampled_ref_bufs[new_uidx].ref_count > 0); - } else { -#endif // CONFIG_EXT_REFS - // Up-sample the current encoded frame. - RefCntBuffer *bufs = pool->frame_bufs; - const YV12_BUFFER_CONFIG *const ref = &bufs[cm->new_fb_idx].buf; - - new_uidx = upsample_ref_frame(cpi, ref); -#if CONFIG_EXT_REFS - assert(new_uidx != INVALID_IDX); - } -#endif // CONFIG_EXT_REFS - } // At this point the new frame has been encoded. // If any buffer copy / swapping is signaled it should be done here. if (cm->frame_type == KEY_FRAME) { @@ -3107,17 +3261,6 @@ void av1_update_reference_frames(AV1_COMP *cpi) { #endif // CONFIG_EXT_REFS ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); - - if (use_upsampled_ref) { - uref_cnt_fb(cpi->upsampled_ref_bufs, - &cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx); -#if CONFIG_EXT_REFS - uref_cnt_fb(cpi->upsampled_ref_bufs, - &cpi->upsampled_ref_idx[cpi->bwd_fb_idx], new_uidx); -#endif // CONFIG_EXT_REFS - uref_cnt_fb(cpi->upsampled_ref_bufs, - &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx); - } } else if (av1_preserve_existing_gf(cpi)) { // We have decided to preserve the previously existing golden frame as our // new ARF frame. However, in the short term in function @@ -3131,10 +3274,6 @@ void av1_update_reference_frames(AV1_COMP *cpi) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); - if (use_upsampled_ref) - uref_cnt_fb(cpi->upsampled_ref_bufs, - &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx); - tmp = cpi->alt_fb_idx; cpi->alt_fb_idx = cpi->gld_fb_idx; cpi->gld_fb_idx = tmp; @@ -3146,19 +3285,6 @@ void av1_update_reference_frames(AV1_COMP *cpi) { // TODO(zoeliu): Do we need to copy cpi->interp_filter_selected[0] over to // cpi->interp_filter_selected[GOLDEN_FRAME]? #if CONFIG_EXT_REFS - } else if (cpi->rc.is_last_bipred_frame) { - // Refresh the LAST_FRAME with the BWDREF_FRAME and retire the LAST3_FRAME - // by updating the virtual indices. Note that the frame BWDREF_FRAME points - // to now should be retired, and it should not be used before refreshed. - int tmp = cpi->lst_fb_idxes[LAST_REF_FRAMES - 1]; - - shift_last_ref_frames(cpi); - cpi->lst_fb_idxes[0] = cpi->bwd_fb_idx; - cpi->bwd_fb_idx = tmp; - - memcpy(cpi->interp_filter_selected[LAST_FRAME], - cpi->interp_filter_selected[BWDREF_FRAME], - sizeof(cpi->interp_filter_selected[BWDREF_FRAME])); } else if (cpi->rc.is_src_frame_ext_arf && cm->show_existing_frame) { // Deal with the special case for showing existing internal ALTREF_FRAME // Refresh the LAST_FRAME with the ALTREF_FRAME and retire the LAST3_FRAME @@ -3195,9 +3321,6 @@ void av1_update_reference_frames(AV1_COMP *cpi) { } #endif // CONFIG_EXT_REFS ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx); - if (use_upsampled_ref) - uref_cnt_fb(cpi->upsampled_ref_bufs, &cpi->upsampled_ref_idx[arf_idx], - new_uidx); memcpy(cpi->interp_filter_selected[ALTREF_FRAME + which_arf], cpi->interp_filter_selected[0], @@ -3207,9 +3330,6 @@ void av1_update_reference_frames(AV1_COMP *cpi) { if (cpi->refresh_golden_frame) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); - if (use_upsampled_ref) - uref_cnt_fb(cpi->upsampled_ref_bufs, - &cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx); #if !CONFIG_EXT_REFS if (!cpi->rc.is_src_frame_alt_ref) @@ -3234,9 +3354,6 @@ void av1_update_reference_frames(AV1_COMP *cpi) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->bwd_fb_idx], cm->new_fb_idx); - if (use_upsampled_ref) - uref_cnt_fb(cpi->upsampled_ref_bufs, - &cpi->upsampled_ref_idx[cpi->bwd_fb_idx], new_uidx); memcpy(cpi->interp_filter_selected[BWDREF_FRAME], cpi->interp_filter_selected[0], @@ -3293,11 +3410,6 @@ void av1_update_reference_frames(AV1_COMP *cpi) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idxes[ref_frame]], cm->new_fb_idx); - - if (use_upsampled_ref) - uref_cnt_fb(cpi->upsampled_ref_bufs, - &cpi->upsampled_ref_idx[cpi->lst_fb_idxes[ref_frame]], - new_uidx); } } else { int tmp; @@ -3306,30 +3418,39 @@ void av1_update_reference_frames(AV1_COMP *cpi) { &cm->ref_frame_map[cpi->lst_fb_idxes[LAST_REF_FRAMES - 1]], cm->new_fb_idx); - if (use_upsampled_ref) - uref_cnt_fb( - cpi->upsampled_ref_bufs, - &cpi->upsampled_ref_idx[cpi->lst_fb_idxes[LAST_REF_FRAMES - 1]], - new_uidx); - tmp = cpi->lst_fb_idxes[LAST_REF_FRAMES - 1]; shift_last_ref_frames(cpi); cpi->lst_fb_idxes[0] = tmp; assert(cm->show_existing_frame == 0); - // NOTE: Currently only LF_UPDATE and INTNL_OVERLAY_UPDATE frames are to - // refresh the LAST_FRAME. memcpy(cpi->interp_filter_selected[LAST_FRAME], cpi->interp_filter_selected[0], sizeof(cpi->interp_filter_selected[0])); + + if (cpi->rc.is_last_bipred_frame) { + // Refresh the LAST_FRAME with the BWDREF_FRAME and retire the + // LAST3_FRAME by updating the virtual indices. + // + // NOTE: The source frame for BWDREF does not have a holding position as + // the OVERLAY frame for ALTREF's. Hence, to resolve the reference + // virtual index reshuffling for BWDREF, the encoder always + // specifies a LAST_BIPRED right before BWDREF and completes the + // reshuffling job accordingly. + tmp = cpi->lst_fb_idxes[LAST_REF_FRAMES - 1]; + + shift_last_ref_frames(cpi); + cpi->lst_fb_idxes[0] = cpi->bwd_fb_idx; + cpi->bwd_fb_idx = tmp; + + memcpy(cpi->interp_filter_selected[LAST_FRAME], + cpi->interp_filter_selected[BWDREF_FRAME], + sizeof(cpi->interp_filter_selected[BWDREF_FRAME])); + } } #else ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx); - if (use_upsampled_ref) - uref_cnt_fb(cpi->upsampled_ref_bufs, - &cpi->upsampled_ref_idx[cpi->lst_fb_idx], new_uidx); if (!cpi->rc.is_src_frame_alt_ref) { memcpy(cpi->interp_filter_selected[LAST_FRAME], cpi->interp_filter_selected[0], @@ -3344,61 +3465,8 @@ void av1_update_reference_frames(AV1_COMP *cpi) { #endif // DUMP_REF_FRAME_IMAGES } -static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) { - MACROBLOCKD *xd = &cpi->td.mb.e_mbd; - struct loopfilter *lf = &cm->lf; - if (is_lossless_requested(&cpi->oxcf)) { - lf->filter_level = 0; - } else { - struct aom_usec_timer timer; - - aom_clear_system_state(); - - aom_usec_timer_start(&timer); - - av1_pick_filter_level(cpi->source, cpi, cpi->sf.lpf_pick); - - aom_usec_timer_mark(&timer); - cpi->time_pick_lpf += aom_usec_timer_elapsed(&timer); - } - - if (lf->filter_level > 0) { -#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_CB4X4 - av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0); -#else - if (cpi->num_workers > 1) - av1_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane, - lf->filter_level, 0, 0, cpi->workers, - cpi->num_workers, &cpi->lf_row_sync); - else - av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0); -#endif - } -#if CONFIG_CDEF - if (is_lossless_requested(&cpi->oxcf)) { - cm->cdef_bits = 0; - cm->cdef_strengths[0] = 0; - cm->nb_cdef_strengths = 1; - } else { - // Find cm->dering_level, cm->clpf_strength_u and cm->clpf_strength_v - av1_cdef_search(cm->frame_to_show, cpi->source, cm, xd); - - // Apply the filter - av1_cdef_frame(cm->frame_to_show, cm, xd); - } -#endif -#if CONFIG_LOOP_RESTORATION - av1_pick_filter_restoration(cpi->source, cpi, cpi->sf.lpf_pick); - if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE || - cm->rst_info[1].frame_restoration_type != RESTORE_NONE || - cm->rst_info[2].frame_restoration_type != RESTORE_NONE) { - av1_loop_restoration_frame(cm->frame_to_show, cm, cm->rst_info, 7, 0, NULL); - } -#endif // CONFIG_LOOP_RESTORATION - aom_extend_frame_inner_borders(cm->frame_to_show); -} - static INLINE void alloc_frame_mvs(AV1_COMMON *const cm, int buffer_idx) { + assert(buffer_idx != INVALID_IDX); RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx]; if (new_fb_ptr->mvs == NULL || new_fb_ptr->mi_rows < cm->mi_rows || new_fb_ptr->mi_cols < cm->mi_cols) { @@ -3458,8 +3526,8 @@ void av1_scale_references(AV1_COMP *cpi) { cm->byte_alignment, NULL, NULL, NULL)) aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, "Failed to allocate frame buffer"); - scale_and_extend_frame(ref, &new_fb_ptr->buf, MAX_MB_PLANE, - (int)cm->bit_depth); + av1_resize_and_extend_frame(ref, &new_fb_ptr->buf, + (int)cm->bit_depth); cpi->scaled_ref_idx[ref_frame - 1] = new_fb; alloc_frame_mvs(cm, new_fb); } @@ -3482,36 +3550,11 @@ void av1_scale_references(AV1_COMP *cpi) { NULL, NULL, NULL)) aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, "Failed to allocate frame buffer"); - scale_and_extend_frame(ref, &new_fb_ptr->buf, MAX_MB_PLANE); + av1_resize_and_extend_frame(ref, &new_fb_ptr->buf); cpi->scaled_ref_idx[ref_frame - 1] = new_fb; alloc_frame_mvs(cm, new_fb); } #endif // CONFIG_HIGHBITDEPTH - - if (cpi->sf.use_upsampled_references && - (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width || - new_fb_ptr->buf.y_crop_height != cm->height)) { - const int map_idx = get_ref_frame_map_idx(cpi, ref_frame); - EncRefCntBuffer *ubuf = - &cpi->upsampled_ref_bufs[cpi->upsampled_ref_idx[map_idx]]; - - if (aom_realloc_frame_buffer(&ubuf->buf, (cm->width << 3), - (cm->height << 3), cm->subsampling_x, - cm->subsampling_y, -#if CONFIG_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - (AOM_BORDER_IN_PIXELS << 3), - cm->byte_alignment, NULL, NULL, NULL)) - aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, - "Failed to allocate up-sampled frame buffer"); -#if CONFIG_HIGHBITDEPTH - scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, 1, - (int)cm->bit_depth); -#else - scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, 1); -#endif - } } else { const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); RefCntBuffer *const buf = &pool->frame_bufs[buf_idx]; @@ -3742,66 +3785,38 @@ static void init_motion_estimation(AV1_COMP *cpi) { } #if CONFIG_LOOP_RESTORATION -static void set_restoration_tilesize(int width, int height, +#define COUPLED_CHROMA_FROM_LUMA_RESTORATION 0 +static void set_restoration_tilesize(int width, int height, int sx, int sy, RestorationInfo *rst) { (void)width; (void)height; + (void)sx; + (void)sy; +#if COUPLED_CHROMA_FROM_LUMA_RESTORATION + int s = AOMMIN(sx, sy); +#else + int s = 0; +#endif // !COUPLED_CHROMA_FROM_LUMA_RESTORATION + rst[0].restoration_tilesize = (RESTORATION_TILESIZE_MAX >> 1); - rst[1].restoration_tilesize = rst[0].restoration_tilesize; - rst[2].restoration_tilesize = rst[0].restoration_tilesize; + rst[1].restoration_tilesize = rst[0].restoration_tilesize >> s; + rst[2].restoration_tilesize = rst[1].restoration_tilesize; } #endif // CONFIG_LOOP_RESTORATION -static void set_scaled_size(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - AV1EncoderConfig *const oxcf = &cpi->oxcf; - - // TODO(afergs): Replace with call to av1_resize_pending? Could replace - // scaled_size_set as well. - // TODO(afergs): Realistically, if resize_pending is true, then the other - // conditions must already be satisfied. - // Try this first: - // av1_resize_pending && - // (DYNAMIC && (1 Pass CBR || 2 Pass VBR) - // STATIC && FIRST_FRAME) - // Really, av1_resize_pending should just reflect the above. - // TODO(afergs): Allow fixed resizing in AOM_CBR mode? - // 2 Pass VBR: Resize if fixed resize and first frame, or dynamic resize and - // a resize is pending. - // 1 Pass CBR: Resize if dynamic resize and resize pending. - if ((oxcf->pass == 2 && oxcf->rc_mode == AOM_VBR && - ((oxcf->resize_mode == RESIZE_FIXED && cm->current_video_frame == 0) || - (oxcf->resize_mode == RESIZE_DYNAMIC && av1_resize_pending(cpi)))) || - (oxcf->pass == 0 && oxcf->rc_mode == AOM_CBR && - oxcf->resize_mode == RESIZE_DYNAMIC && av1_resize_pending(cpi))) { - // TODO(afergs): This feels hacky... Should it just set? Should - // av1_set_next_scaled_size be a library function? - av1_calculate_next_scaled_size(cpi, &oxcf->scaled_frame_width, - &oxcf->scaled_frame_height); - } -} - static void set_frame_size(AV1_COMP *cpi, int width, int height) { - int ref_frame; AV1_COMMON *const cm = &cpi->common; - AV1EncoderConfig *const oxcf = &cpi->oxcf; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; + int ref_frame; if (width != cm->width || height != cm->height) { // There has been a change in the encoded frame size av1_set_size_literal(cpi, width, height); - - // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed. - // TODO(afergs): Make condition just (pass == 0) or (rc_mode == CBR) - - // UNLESS CBR starts allowing FIXED resizing. Then the resize - // mode will need to get checked too. - if (oxcf->pass == 0 && oxcf->rc_mode == AOM_CBR && - oxcf->resize_mode == RESIZE_DYNAMIC) - set_mv_search_params(cpi); // TODO(afergs): Needed? Caller calls after... + set_mv_search_params(cpi); } #if !CONFIG_XIPHRC - if (oxcf->pass == 2) { + if (cpi->oxcf.pass == 2) { av1_set_target_rate(cpi); } #endif @@ -3820,18 +3835,29 @@ static void set_frame_size(AV1_COMP *cpi, int width, int height) { "Failed to allocate frame buffer"); #if CONFIG_LOOP_RESTORATION - set_restoration_tilesize(cm->width, cm->height, cm->rst_info); + set_restoration_tilesize( +#if CONFIG_FRAME_SUPERRES + cm->superres_upscaled_width, cm->superres_upscaled_height, +#else + cm->width, cm->height, +#endif // CONFIG_FRAME_SUPERRES + cm->subsampling_x, cm->subsampling_y, cm->rst_info); for (int i = 0; i < MAX_MB_PLANE; ++i) cm->rst_info[i].frame_restoration_type = RESTORE_NONE; av1_alloc_restoration_buffers(cm); for (int i = 0; i < MAX_MB_PLANE; ++i) { cpi->rst_search[i].restoration_tilesize = cm->rst_info[i].restoration_tilesize; - av1_alloc_restoration_struct(cm, &cpi->rst_search[i], cm->width, - cm->height); + av1_alloc_restoration_struct(cm, &cpi->rst_search[i], +#if CONFIG_FRAME_SUPERRES + cm->superres_upscaled_width, + cm->superres_upscaled_height); +#else + cm->width, cm->height); +#endif // CONFIG_FRAME_SUPERRES } -#endif // CONFIG_LOOP_RESTORATION - alloc_util_frame_buffers(cpi); +#endif // CONFIG_LOOP_RESTORATION + alloc_util_frame_buffers(cpi); // TODO(afergs): Remove? Gets called anyways. init_motion_estimation(cpi); for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { @@ -3857,6 +3883,12 @@ static void set_frame_size(AV1_COMP *cpi, int width, int height) { ref_buf->buf = NULL; } } + +#if CONFIG_VAR_REFS + // Check duplicate reference frames + enc_check_valid_ref_frames(cpi); +#endif // CONFIG_VAR_REFS + #if CONFIG_INTRABC #if CONFIG_HIGHBITDEPTH av1_setup_scale_factors_for_frame(&xd->sf_identity, cm->width, cm->height, @@ -3872,84 +3904,167 @@ static void set_frame_size(AV1_COMP *cpi, int width, int height) { } static void setup_frame_size(AV1_COMP *cpi) { - set_scaled_size(cpi); + int encode_width = cpi->oxcf.width; + int encode_height = cpi->oxcf.height; + + uint8_t resize_num = av1_calculate_next_resize_scale(cpi); + av1_calculate_scaled_size(&encode_width, &encode_height, resize_num); + #if CONFIG_FRAME_SUPERRES - int encode_width; - int encode_height; - av1_calculate_superres_size(cpi, &encode_width, &encode_height); + AV1_COMMON *cm = &cpi->common; + cm->superres_upscaled_width = encode_width; + cm->superres_upscaled_height = encode_height; + cm->superres_scale_numerator = + av1_calculate_next_superres_scale(cpi, encode_width, encode_width); + av1_calculate_scaled_size(&encode_width, &encode_height, + cm->superres_scale_numerator); +#endif // CONFIG_FRAME_SUPERRES + set_frame_size(cpi, encode_width, encode_height); +} + +#if CONFIG_FRAME_SUPERRES +static void superres_post_encode(AV1_COMP *cpi) { + AV1_COMMON *cm = &cpi->common; + + if (av1_superres_unscaled(cm)) return; + + av1_superres_upscale(cm, NULL); + + // If regular resizing is occurring the source will need to be downscaled to + // match the upscaled superres resolution. Otherwise the original source is + // used. + if (av1_resize_unscaled(cm)) { + cpi->source = cpi->unscaled_source; + if (cpi->last_source != NULL) cpi->last_source = cpi->unscaled_last_source; + } else { + assert(cpi->unscaled_source->y_crop_width != cm->superres_upscaled_width); + assert(cpi->unscaled_source->y_crop_height != cm->superres_upscaled_height); + // Do downscale. cm->(width|height) has been updated by av1_superres_upscale + if (aom_realloc_frame_buffer( + &cpi->scaled_source, cm->superres_upscaled_width, + cm->superres_upscaled_height, cm->subsampling_x, cm->subsampling_y, +#if CONFIG_HIGHBITDEPTH + cm->use_highbitdepth, +#endif // CONFIG_HIGHBITDEPTH + AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) + aom_internal_error( + &cm->error, AOM_CODEC_MEM_ERROR, + "Failed to reallocate scaled source buffer for superres"); + assert(cpi->scaled_source.y_crop_width == cm->superres_upscaled_width); + assert(cpi->scaled_source.y_crop_height == cm->superres_upscaled_height); +#if CONFIG_HIGHBITDEPTH + av1_resize_and_extend_frame(cpi->unscaled_source, &cpi->scaled_source, + (int)cm->bit_depth); #else - set_frame_size(cpi, cpi->oxcf.scaled_frame_width, - cpi->oxcf.scaled_frame_height); -#endif // CONFIG_FRAME_SUPERRES + av1_resize_and_extend_frame(cpi->unscaled_source, &cpi->scaled_source); +#endif // CONFIG_HIGHBITDEPTH + cpi->source = &cpi->scaled_source; + } } +#endif // CONFIG_FRAME_SUPERRES -static void reset_use_upsampled_references(AV1_COMP *cpi) { - MV_REFERENCE_FRAME ref_frame; +static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) { + MACROBLOCKD *xd = &cpi->td.mb.e_mbd; + struct loopfilter *lf = &cm->lf; + int no_loopfilter = 0; - // reset up-sampled reference buffer structure. - init_upsampled_ref_frame_bufs(cpi); + if (is_lossless_requested(&cpi->oxcf)) no_loopfilter = 1; - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, ref_frame); - int new_uidx = upsample_ref_frame(cpi, ref); +#if CONFIG_EXT_TILE + // 0 loopfilter level is only necessary if individual tile + // decoding is required. + if (cm->single_tile_decoding) no_loopfilter = 1; +#endif // CONFIG_EXT_TILE + + if (no_loopfilter) { + lf->filter_level = 0; + } else { + struct aom_usec_timer timer; + + aom_clear_system_state(); + + aom_usec_timer_start(&timer); + + av1_pick_filter_level(cpi->source, cpi, cpi->sf.lpf_pick); + + aom_usec_timer_mark(&timer); + cpi->time_pick_lpf += aom_usec_timer_elapsed(&timer); + } - // Update the up-sampled reference index. - cpi->upsampled_ref_idx[get_ref_frame_map_idx(cpi, ref_frame)] = new_uidx; - cpi->upsampled_ref_bufs[new_uidx].ref_count++; + if (lf->filter_level > 0) { +#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_CB4X4 +#if CONFIG_UV_LVL + av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0); + av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level_u, 1, 0); + av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level_v, 2, 0); +#else + av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0); +#endif // CONFIG_UV_LVL +#else + if (cpi->num_workers > 1) + av1_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane, + lf->filter_level, 0, 0, cpi->workers, + cpi->num_workers, &cpi->lf_row_sync); + else + av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0); +#endif + } +#if CONFIG_CDEF + if (is_lossless_requested(&cpi->oxcf)) { + cm->cdef_bits = 0; + cm->cdef_strengths[0] = 0; + cm->nb_cdef_strengths = 1; + } else { + // Find cm->dering_level, cm->clpf_strength_u and cm->clpf_strength_v + av1_cdef_search(cm->frame_to_show, cpi->source, cm, xd, + cpi->oxcf.speed > 0); + + // Apply the filter + av1_cdef_frame(cm->frame_to_show, cm, xd); } +#endif + +#if CONFIG_FRAME_SUPERRES + superres_post_encode(cpi); +#endif // CONFIG_FRAME_SUPERRES + +#if CONFIG_LOOP_RESTORATION + av1_pick_filter_restoration(cpi->source, cpi, cpi->sf.lpf_pick); + if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE || + cm->rst_info[1].frame_restoration_type != RESTORE_NONE || + cm->rst_info[2].frame_restoration_type != RESTORE_NONE) { + av1_loop_restoration_frame(cm->frame_to_show, cm, cm->rst_info, 7, 0, NULL); + } +#endif // CONFIG_LOOP_RESTORATION + // TODO(debargha): Fix mv search range on encoder side + // aom_extend_frame_inner_borders(cm->frame_to_show); + aom_extend_frame_borders(cm->frame_to_show); } static void encode_without_recode_loop(AV1_COMP *cpi) { AV1_COMMON *const cm = &cpi->common; int q = 0, bottom_index = 0, top_index = 0; // Dummy variables. - const int use_upsampled_ref = cpi->sf.use_upsampled_references; aom_clear_system_state(); -#if CONFIG_FRAME_SUPERRES - // TODO(afergs): Figure out when is actually a good time to do superres - cm->superres_scale_numerator = SUPERRES_SCALE_DENOMINATOR; - // (uint8_t)(rand() % 9 + SUPERRES_SCALE_NUMERATOR_MIN); - cpi->superres_pending = cpi->oxcf.superres_enabled && 0; -#endif // CONFIG_FRAME_SUPERRES - + set_size_independent_vars(cpi); setup_frame_size(cpi); - av1_resize_step(cpi); - - // For 1 pass CBR under dynamic resize mode: use faster scaling for source. - // Only for 2x2 scaling for now. - if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == AOM_CBR && - cpi->oxcf.resize_mode == RESIZE_DYNAMIC && - cpi->un_scaled_source->y_width == (cm->width << 1) && - cpi->un_scaled_source->y_height == (cm->height << 1)) { - cpi->source = av1_scale_if_required_fast(cm, cpi->un_scaled_source, - &cpi->scaled_source); - if (cpi->unscaled_last_source != NULL) - cpi->last_source = av1_scale_if_required_fast( - cm, cpi->unscaled_last_source, &cpi->scaled_last_source); - } else { - cpi->source = - av1_scale_if_required(cm, cpi->un_scaled_source, &cpi->scaled_source); - if (cpi->unscaled_last_source != NULL) - cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source, - &cpi->scaled_last_source); - } + assert(cm->width == cpi->scaled_source.y_crop_width); + assert(cm->height == cpi->scaled_source.y_crop_height); + + set_size_dependent_vars(cpi, &q, &bottom_index, &top_index); + + cpi->source = + av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source); + if (cpi->unscaled_last_source != NULL) + cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source, + &cpi->scaled_last_source); if (frame_is_intra_only(cm) == 0) { av1_scale_references(cpi); } - set_size_independent_vars(cpi); - set_size_dependent_vars(cpi, &q, &bottom_index, &top_index); - - // cpi->sf.use_upsampled_references can be different from frame to frame. - // Every time when cpi->sf.use_upsampled_references is changed from 0 to 1. - // The reference frames for this frame have to be up-sampled before encoding. - if (!use_upsampled_ref && cpi->sf.use_upsampled_references && - cm->frame_type != KEY_FRAME) - reset_use_upsampled_references(cpi); - av1_set_quantizer(cm, q); setup_frame(cpi); suppress_active_map(cpi); @@ -3968,11 +4083,6 @@ static void encode_without_recode_loop(AV1_COMP *cpi) { // transform / motion compensation build reconstruction frame av1_encode_frame(cpi); -#if CONFIG_FRAME_SUPERRES - // TODO(afergs): Upscale the frame to show - cpi->superres_pending = 0; -#endif // CONFIG_FRAME_SUPERRES - // Update some stats from cyclic refresh, and check if we should not update // golden reference, for 1 pass CBR. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->frame_type != KEY_FRAME && @@ -4000,7 +4110,6 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size, int frame_over_shoot_limit; int frame_under_shoot_limit; int q = 0, q_low = 0, q_high = 0; - const int use_upsampled_ref = cpi->sf.use_upsampled_references; set_size_independent_vars(cpi); @@ -4009,22 +4118,9 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size, setup_frame_size(cpi); -#if CONFIG_FRAME_SUPERRES - if (loop_count == 0 || av1_resize_pending(cpi) || cpi->superres_pending) { -#else - if (loop_count == 0 || av1_resize_pending(cpi)) { -#endif // CONFIG_FRAME_SUPERRES + if (loop_count == 0) { set_size_dependent_vars(cpi, &q, &bottom_index, &top_index); - // cpi->sf.use_upsampled_references can be different from frame to frame. - // Every time when cpi->sf.use_upsampled_references is changed from 0 to - // 1. - // The reference frames for this frame have to be up-sampled before - // encoding. - if (!use_upsampled_ref && cpi->sf.use_upsampled_references && - cm->frame_type != KEY_FRAME) - reset_use_upsampled_references(cpi); - // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed. set_mv_search_params(cpi); @@ -4034,9 +4130,6 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size, undershoot_seen = 0; #endif - // Advance resize to next state now that updates are done - av1_resize_step(cpi); - q_low = bottom_index; q_high = top_index; @@ -4051,8 +4144,7 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size, } cpi->source = - av1_scale_if_required(cm, cpi->un_scaled_source, &cpi->scaled_source); - + av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source); if (cpi->unscaled_last_source != NULL) cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source, &cpi->scaled_last_source); @@ -4174,8 +4266,6 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size, #if !CONFIG_XIPHRC int retries = 0; - // TODO(afergs): Replace removed recode when av1_resize_pending is true - // Frame size out of permitted range: // Update correction factor & compute new Q to try... // Frame is too large @@ -4285,7 +4375,7 @@ static int get_ref_frame_flags(const AV1_COMP *cpi) { map[cpi->lst_fb_idxes[2]] == map[cpi->lst_fb_idxes[1]]; const int gld_is_last2 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[1]]; const int gld_is_last3 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[2]]; -#else +#else // !CONFIG_ONE_SIDED_COMPOUND const int bwd_is_last = map[cpi->bwd_fb_idx] == map[cpi->lst_fb_idxes[0]]; const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idxes[0]]; @@ -4299,12 +4389,12 @@ static int get_ref_frame_flags(const AV1_COMP *cpi) { const int bwd_is_gld = map[cpi->bwd_fb_idx] == map[cpi->gld_fb_idx]; -#endif +#endif // CONFIG_ONE_SIDED_COMPOUND const int last2_is_alt = map[cpi->lst_fb_idxes[1]] == map[cpi->alt_fb_idx]; const int last3_is_alt = map[cpi->lst_fb_idxes[2]] == map[cpi->alt_fb_idx]; const int gld_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx]; const int bwd_is_alt = map[cpi->bwd_fb_idx] == map[cpi->alt_fb_idx]; -#else +#else // !CONFIG_EXT_REFS const int gld_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx]; const int gld_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx]; const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx]; @@ -4476,11 +4566,14 @@ static void dump_filtered_recon_frames(AV1_COMP *cpi) { } printf( "\nFrame=%5d, encode_update_type[%5d]=%1d, show_existing_frame=%d, " - "y_stride=%4d, uv_stride=%4d, width=%4d, height=%4d\n", + "source_alt_ref_active=%d, refresh_alt_ref_frame=%d, rf_level=%d, " + "y_stride=%4d, uv_stride=%4d, cm->width=%4d, cm->height=%4d\n", cm->current_video_frame, cpi->twopass.gf_group.index, cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index], - cm->show_existing_frame, recon_buf->y_stride, recon_buf->uv_stride, - cm->width, cm->height); + cm->show_existing_frame, cpi->rc.source_alt_ref_active, + cpi->refresh_alt_ref_frame, + cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index], + recon_buf->y_stride, recon_buf->uv_stride, cm->width, cm->height); // --- Y --- for (h = 0; h < cm->height; ++h) { @@ -4502,8 +4595,6 @@ static void dump_filtered_recon_frames(AV1_COMP *cpi) { } #endif // DUMP_RECON_FRAMES -#if CONFIG_EC_ADAPT - static void make_update_tile_list_enc(AV1_COMP *cpi, const int tile_rows, const int tile_cols, FRAME_CONTEXT *ec_ctxs[]) { @@ -4512,7 +4603,6 @@ static void make_update_tile_list_enc(AV1_COMP *cpi, const int tile_rows, ec_ctxs[i] = &cpi->tile_data[i].tctx; } -#endif static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, uint8_t *dest, int skip_adapt, unsigned int *frame_flags) { @@ -4520,13 +4610,11 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, const AV1EncoderConfig *const oxcf = &cpi->oxcf; struct segmentation *const seg = &cm->seg; TX_SIZE t; -#if CONFIG_EC_ADAPT FRAME_CONTEXT **tile_ctxs = aom_malloc(cm->tile_rows * cm->tile_cols * sizeof(&cpi->tile_data[0].tctx)); aom_cdf_prob **cdf_ptrs = aom_malloc(cm->tile_rows * cm->tile_cols * sizeof(&cpi->tile_data[0].tctx.partition_cdf[0][0])); -#endif #if CONFIG_XIPHRC int frame_type; int drop_this_frame = 0; @@ -4610,15 +4698,10 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, #endif } - cm->last_width = cm->width; - cm->last_height = cm->height; - ++cm->current_video_frame; -#if CONFIG_EC_ADAPT aom_free(tile_ctxs); aom_free(cdf_ptrs); -#endif return; } #endif // CONFIG_EXT_REFS @@ -4654,7 +4737,6 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, cm->reset_frame_context = RESET_FRAME_CONTEXT_CURRENT; } } -#if CONFIG_TILE_GROUPS if (cpi->oxcf.mtu == 0) { cm->num_tg = cpi->oxcf.num_tile_groups; } else { @@ -4662,20 +4744,18 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, // updates cm->num_tg = DEFAULT_MAX_NUM_TG; } -#endif #if CONFIG_EXT_TILE - cm->tile_encoding_mode = cpi->oxcf.tile_encoding_mode; + cm->large_scale_tile = cpi->oxcf.large_scale_tile; + cm->single_tile_decoding = cpi->oxcf.single_tile_decoding; #endif // CONFIG_EXT_TILE #if CONFIG_XIPHRC if (drop_this_frame) { av1_rc_postencode_update_drop_frame(cpi); ++cm->current_video_frame; -#if CONFIG_EC_ADAPT aom_free(tile_ctxs); aom_free(cdf_ptrs); -#endif return; } #else @@ -4686,10 +4766,8 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, if (av1_rc_drop_frame(cpi)) { av1_rc_postencode_update_drop_frame(cpi); ++cm->current_video_frame; -#if CONFIG_EC_ADAPT aom_free(tile_ctxs); aom_free(cdf_ptrs); -#endif return; } } @@ -4770,6 +4848,10 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, cm->frame_to_show = get_frame_new_buffer(cm); cm->frame_to_show->color_space = cm->color_space; +#if CONFIG_COLORSPACE_HEADERS + cm->frame_to_show->transfer_function = cm->transfer_function; + cm->frame_to_show->chroma_sample_position = cm->chroma_sample_position; +#endif cm->frame_to_show->color_range = cm->color_range; cm->frame_to_show->render_width = cm->render_width; cm->frame_to_show->render_height = cm->render_height; @@ -4786,10 +4868,8 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, av1_pack_bitstream(cpi, dest, size); if (skip_adapt) { -#if CONFIG_EC_ADAPT aom_free(tile_ctxs); aom_free(cdf_ptrs); -#endif return; } @@ -4823,11 +4903,13 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, cpi->td.rd_counts.coef_counts[t]); #if CONFIG_ENTROPY_STATS av1_accumulate_frame_counts(&aggregate_fc, &cm->counts); + assert(cm->frame_context_idx < FRAME_CONTEXTS); + av1_accumulate_frame_counts(&aggregate_fc_per_type[cm->frame_context_idx], + &cm->counts); #endif // CONFIG_ENTROPY_STATS if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) { av1_adapt_coef_probs(cm); av1_adapt_intra_frame_probs(cm); -#if CONFIG_EC_ADAPT make_update_tile_list_enc(cpi, cm->tile_rows, cm->tile_cols, tile_ctxs); av1_average_tile_coef_cdfs(cpi->common.fc, tile_ctxs, cdf_ptrs, cm->tile_rows * cm->tile_cols); @@ -4837,7 +4919,6 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, av1_average_tile_pvq_cdfs(cpi->common.fc, tile_ctxs, cm->tile_rows * cm->tile_cols); #endif // CONFIG_PVQ -#endif // CONFIG_EC_ADAPT #if CONFIG_ADAPT_SCAN av1_adapt_scan_order(cm); #endif // CONFIG_ADAPT_SCAN @@ -4847,12 +4928,10 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) { av1_adapt_inter_frame_probs(cm); av1_adapt_mv_probs(cm, cm->allow_high_precision_mv); -#if CONFIG_EC_ADAPT av1_average_tile_inter_cdfs(&cpi->common, cpi->common.fc, tile_ctxs, cdf_ptrs, cm->tile_rows * cm->tile_cols); av1_average_tile_mv_cdfs(cpi->common.fc, tile_ctxs, cdf_ptrs, cm->tile_rows * cm->tile_cols); -#endif } } @@ -4888,10 +4967,8 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, if (drop_this_frame) { av1_rc_postencode_update_drop_frame(cpi); ++cm->current_video_frame; -#if CONFIG_EC_ADAPT aom_free(tile_ctxs); aom_free(cdf_ptrs); -#endif return; } #else // !CONFIG_XIPHRC @@ -4915,13 +4992,6 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, cm->seg.update_data = 0; cm->lf.mode_ref_delta_update = 0; - // keep track of the last coded dimensions - cm->last_width = cm->width; - cm->last_height = cm->height; - - // reset to normal state now that we are done. - if (!cm->show_existing_frame) cm->last_show_frame = cm->show_frame; - if (cm->show_frame) { #if CONFIG_EXT_REFS // TODO(zoeliu): We may only swamp mi and prev_mi for those frames that are @@ -4935,13 +5005,20 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, #if CONFIG_EXT_REFS // NOTE: Shall not refer to any frame not used as reference. - if (cm->is_reference_frame) + if (cm->is_reference_frame) { #endif // CONFIG_EXT_REFS cm->prev_frame = cm->cur_frame; -#if CONFIG_EC_ADAPT + // keep track of the last coded dimensions + cm->last_width = cm->width; + cm->last_height = cm->height; + + // reset to normal state now that we are done. + cm->last_show_frame = cm->show_frame; +#if CONFIG_EXT_REFS + } +#endif // CONFIG_EXT_REFS aom_free(tile_ctxs); aom_free(cdf_ptrs); -#endif } static void Pass0Encode(AV1_COMP *cpi, size_t *size, uint8_t *dest, @@ -5220,12 +5297,17 @@ static void adjust_image_stat(double y, double u, double v, double all, s->worst = AOMMIN(s->worst, all); } -static void compute_internal_stats(AV1_COMP *cpi) { +static void compute_internal_stats(AV1_COMP *cpi, int frame_bytes) { AV1_COMMON *const cm = &cpi->common; double samples = 0.0; uint32_t in_bit_depth = 8; uint32_t bit_depth = 8; +#if CONFIG_INTER_STATS_ONLY + if (cm->frame_type == KEY_FRAME) return; // skip key frame +#endif + cpi->bytes += frame_bytes; + #if CONFIG_HIGHBITDEPTH if (cm->use_highbitdepth) { in_bit_depth = cpi->oxcf.input_bit_depth; @@ -5413,8 +5495,7 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags, if (cpi->b_calculate_psnr) generate_psnr_packet(cpi); #if CONFIG_INTERNAL_STATS - compute_internal_stats(cpi); - cpi->bytes += (int)(*size); + compute_internal_stats(cpi, (int)(*size)); #endif // CONFIG_INTERNAL_STATS // Clear down mmx registers @@ -5448,8 +5529,17 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags, cpi->alt_ref_source = source; if (oxcf->arnr_max_frames > 0) { - // Produce the filtered ARF frame. - av1_temporal_filter(cpi, arf_src_index); +// Produce the filtered ARF frame. +#if CONFIG_BGSPRITE + int bgsprite_ret = av1_background_sprite(cpi, arf_src_index); + // Do temporal filter if bgsprite not generated. + if (bgsprite_ret != 0) +#endif // CONFIG_BGSPRITE + av1_temporal_filter(cpi, +#if CONFIG_BGSPRITE + NULL, +#endif // CONFIG_BGSPRITE + arf_src_index); aom_extend_frame_borders(&cpi->alt_ref_buffer); force_src_buffer = &cpi->alt_ref_buffer; } @@ -5489,7 +5579,7 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags, if ((last_source = av1_lookahead_peek(cpi->lookahead, -1)) == NULL) return -1; } - + if (cm->current_video_frame > 0) assert(last_source != NULL); // Read in the source frame. source = av1_lookahead_pop(cpi->lookahead, flush); @@ -5501,11 +5591,9 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags, check_src_altref(cpi, source); } } - if (source) { - cpi->un_scaled_source = cpi->source = + cpi->unscaled_source = cpi->source = force_src_buffer ? force_src_buffer : &source->img; - cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL; *time_stamp = source->ts_start; @@ -5576,7 +5664,6 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags, av1_rc_get_second_pass_params(cpi); } else if (oxcf->pass == 1) { setup_frame_size(cpi); - av1_resize_step(cpi); } #endif @@ -5645,8 +5732,7 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags, #if CONFIG_INTERNAL_STATS if (oxcf->pass != 1) { - compute_internal_stats(cpi); - cpi->bytes += (int)(*size); + compute_internal_stats(cpi, (int)(*size)); } #endif // CONFIG_INTERNAL_STATS @@ -5712,9 +5798,10 @@ int av1_set_internal_size(AV1_COMP *cpi, AOM_SCALING horiz_mode, int av1_set_size_literal(AV1_COMP *cpi, int width, int height) { AV1_COMMON *cm = &cpi->common; #if CONFIG_HIGHBITDEPTH - check_initial_width(cpi, cm->use_highbitdepth, 1, 1); + check_initial_width(cpi, cm->use_highbitdepth, cm->subsampling_x, + cm->subsampling_y); #else - check_initial_width(cpi, 1, 1); + check_initial_width(cpi, cm->subsampling_x, cm->subsampling_y); #endif // CONFIG_HIGHBITDEPTH if (width <= 0 || height <= 0) return 1; diff --git a/third_party/aom/av1/encoder/encoder.h b/third_party/aom/av1/encoder/encoder.h index ee1257c2d..9b98975b7 100644 --- a/third_party/aom/av1/encoder/encoder.h +++ b/third_party/aom/av1/encoder/encoder.h @@ -21,6 +21,7 @@ #include "av1/common/entropymode.h" #include "av1/common/thread_common.h" #include "av1/common/onyxc_int.h" +#include "av1/common/resize.h" #include "av1/encoder/aq_cyclicrefresh.h" #if CONFIG_ANS #include "aom_dsp/ans.h" @@ -52,6 +53,10 @@ extern "C" { #endif +#if CONFIG_SPEED_REFS +#define MIN_SPEED_REFS_BLKSIZE BLOCK_16X16 +#endif // CONFIG_SPEED_REFS + typedef struct { int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS]; int nmv_costs[NMV_CONTEXTS][2][MV_VALS]; @@ -128,7 +133,14 @@ typedef enum { RESIZE_NONE = 0, // No frame resizing allowed. RESIZE_FIXED = 1, // All frames are coded at the specified dimension. RESIZE_DYNAMIC = 2 // Coded size of each frame is determined by the codec. -} RESIZE_TYPE; +} RESIZE_MODE; +#if CONFIG_FRAME_SUPERRES +typedef enum { + SUPERRES_NONE = 0, + SUPERRES_FIXED = 1, + SUPERRES_DYNAMIC = 2 +} SUPERRES_MODE; +#endif // CONFIG_FRAME_SUPERRES typedef struct AV1EncoderConfig { BITSTREAM_PROFILE profile; @@ -190,22 +202,22 @@ typedef struct AV1EncoderConfig { int qm_minlevel; int qm_maxlevel; #endif -#if CONFIG_TILE_GROUPS unsigned int num_tile_groups; unsigned int mtu; -#endif #if CONFIG_TEMPMV_SIGNALING unsigned int disable_tempmv; #endif // Internal frame size scaling. - RESIZE_TYPE resize_mode; - int scaled_frame_width; - int scaled_frame_height; + RESIZE_MODE resize_mode; + uint8_t resize_scale_numerator; + uint8_t resize_kf_scale_numerator; #if CONFIG_FRAME_SUPERRES - // Frame Super-Resolution size scaling - int superres_enabled; + // Frame Super-Resolution size scaling. + SUPERRES_MODE superres_mode; + uint8_t superres_scale_numerator; + uint8_t superres_kf_scale_numerator; #endif // CONFIG_FRAME_SUPERRES // Enable feature to reduce the frame quantization every x frames. @@ -265,6 +277,10 @@ typedef struct AV1EncoderConfig { int use_highbitdepth; #endif aom_color_space_t color_space; +#if CONFIG_COLORSPACE_HEADERS + aom_transfer_function_t transfer_function; + aom_chroma_sample_position_t chroma_sample_position; +#endif int color_range; int render_width; int render_height; @@ -276,7 +292,8 @@ typedef struct AV1EncoderConfig { int ans_window_size_log2; #endif // CONFIG_ANS && ANS_MAX_SYMBOLS #if CONFIG_EXT_TILE - unsigned int tile_encoding_mode; + unsigned int large_scale_tile; + unsigned int single_tile_decoding; #endif // CONFIG_EXT_TILE unsigned int motion_vector_unit_test; @@ -289,8 +306,8 @@ static INLINE int is_lossless_requested(const AV1EncoderConfig *cfg) { // TODO(jingning) All spatially adaptive variables should go to TileDataEnc. typedef struct TileDataEnc { TileInfo tile_info; - int thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; - int mode_map[BLOCK_SIZES][MAX_MODES]; + int thresh_freq_fact[BLOCK_SIZES_ALL][MAX_MODES]; + int mode_map[BLOCK_SIZES_ALL][MAX_MODES]; int m_search_count; int ex_search_count; #if CONFIG_PVQ @@ -299,9 +316,7 @@ typedef struct TileDataEnc { #if CONFIG_CFL CFL_CTX cfl; #endif -#if CONFIG_EC_ADAPT DECLARE_ALIGNED(16, FRAME_CONTEXT, tctx); -#endif } TileDataEnc; typedef struct RD_COUNTS { @@ -311,6 +326,8 @@ typedef struct RD_COUNTS { // Stores number of 4x4 blocks using global motion per reference frame. int global_motion_used[TOTAL_REFS_PER_FRAME]; #endif // CONFIG_GLOBAL_MOTION + int single_ref_used_flag; + int compound_ref_used_flag; } RD_COUNTS; typedef struct ThreadData { @@ -372,18 +389,11 @@ typedef struct AV1_COMP { YV12_BUFFER_CONFIG *source; YV12_BUFFER_CONFIG *last_source; // NULL for first frame and alt_ref frames - YV12_BUFFER_CONFIG *un_scaled_source; + YV12_BUFFER_CONFIG *unscaled_source; YV12_BUFFER_CONFIG scaled_source; YV12_BUFFER_CONFIG *unscaled_last_source; YV12_BUFFER_CONFIG scaled_last_source; - // Up-sampled reference buffers - // NOTE(zoeliu): It is needed to allocate sufficient space to the up-sampled - // reference buffers, which should include the up-sampled version of all the - // possibly stored references plus the currently coded frame itself. - EncRefCntBuffer upsampled_ref_bufs[REF_FRAMES + 1]; - int upsampled_ref_idx[REF_FRAMES + 1]; - // For a still frame, this flag is set to 1 to skip partition search. int partition_search_skippable_frame; @@ -471,7 +481,7 @@ typedef struct AV1_COMP { fractional_mv_step_fp *find_fractional_mv_step; av1_full_search_fn_t full_search_sad; // It is currently unused. av1_diamond_search_fn_t diamond_search_sad; - aom_variance_fn_ptr_t fn_ptr[BLOCK_SIZES]; + aom_variance_fn_ptr_t fn_ptr[BLOCK_SIZES_ALL]; uint64_t time_receive_data; uint64_t time_compress_data; uint64_t time_pick_lpf; @@ -538,17 +548,24 @@ typedef struct AV1_COMP { #if CONFIG_EXT_INTER unsigned int inter_compound_mode_cost[INTER_MODE_CONTEXTS] [INTER_COMPOUND_MODES]; +#if CONFIG_COMPOUND_SINGLEREF + unsigned int inter_singleref_comp_mode_cost[INTER_MODE_CONTEXTS] + [INTER_SINGLEREF_COMP_MODES]; +#endif // CONFIG_COMPOUND_SINGLEREF #if CONFIG_INTERINTRA unsigned int interintra_mode_cost[BLOCK_SIZE_GROUPS][INTERINTRA_MODES]; #endif // CONFIG_INTERINTRA #endif // CONFIG_EXT_INTER #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION - int motion_mode_cost[BLOCK_SIZES][MOTION_MODES]; + int motion_mode_cost[BLOCK_SIZES_ALL][MOTION_MODES]; #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION - int motion_mode_cost1[BLOCK_SIZES][2]; + int motion_mode_cost1[BLOCK_SIZES_ALL][2]; #endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION +#if CONFIG_MOTION_VAR && CONFIG_NCOBMC_ADAPT_WEIGHT + int ncobmc_mode_cost[ADAPT_OVERLAP_BLOCKS][MAX_NCOBMC_MODES]; +#endif // CONFIG_MOTION_VAR && CONFIG_NCOBMC_ADAPT_WEIGHT #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION - int intra_uv_mode_cost[INTRA_MODES][INTRA_MODES]; + int intra_uv_mode_cost[INTRA_MODES][UV_INTRA_MODES]; int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; #if CONFIG_EXT_PARTITION_TYPES @@ -601,18 +618,10 @@ typedef struct AV1_COMP { TileBufferEnc tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS]; int resize_state; - int resize_scale_num; - int resize_scale_den; - int resize_next_scale_num; - int resize_next_scale_den; int resize_avg_qp; int resize_buffer_underflow; int resize_count; -#if CONFIG_FRAME_SUPERRES - int superres_pending; -#endif // CONFIG_FRAME_SUPERRES - // VARIANCE_AQ segment map refresh int vaq_refresh; @@ -640,6 +649,15 @@ typedef struct AV1_COMP { #if CONFIG_LV_MAP tran_low_t *tcoeff_buf[MAX_MB_PLANE]; #endif + +#if CONFIG_SPEED_REFS + int sb_scanning_pass_idx; +#endif // CONFIG_SPEED_REFS + +#if CONFIG_FLEX_REFS + int extra_arf_allowed; + int bwd_ref_allowed; +#endif // CONFIG_FLEX_REFS } AV1_COMP; void av1_initialize_enc(void); @@ -729,14 +747,6 @@ static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( : NULL; } -static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref( - const AV1_COMP *cpi, const MV_REFERENCE_FRAME ref_frame) { - // Use up-sampled reference frames. - const int buf_idx = - cpi->upsampled_ref_idx[get_ref_frame_map_idx(cpi, ref_frame)]; - return &cpi->upsampled_ref_bufs[buf_idx].buf; -} - #if CONFIG_EXT_REFS || CONFIG_TEMPMV_SIGNALING static INLINE int enc_is_ref_frame_buf(AV1_COMP *cpi, RefCntBuffer *frame_buf) { MV_REFERENCE_FRAME ref_frame; @@ -831,23 +841,22 @@ static INLINE void uref_cnt_fb(EncRefCntBuffer *ubufs, int *uidx, ubufs[new_uidx].ref_count++; } -// Returns 1 if a resize is pending and 0 otherwise. -static INLINE int av1_resize_pending(const struct AV1_COMP *cpi) { - return cpi->resize_scale_num != cpi->resize_next_scale_num || - cpi->resize_scale_den != cpi->resize_next_scale_den; -} - // Returns 1 if a frame is unscaled and 0 otherwise. -static INLINE int av1_resize_unscaled(const struct AV1_COMP *cpi) { - return cpi->resize_scale_num == cpi->resize_scale_den; +static INLINE int av1_resize_unscaled(const AV1_COMMON *cm) { +#if CONFIG_FRAME_SUPERRES + return cm->superres_upscaled_width == cm->render_width && + cm->superres_upscaled_height == cm->render_height; +#else + return cm->width == cm->render_width && cm->height == cm->render_height; +#endif // CONFIG_FRAME_SUPERRES } -// Moves resizing to the next state. This is just setting the numerator and -// denominator to the next numerator and denominator, causing -// av1_resize_pending to subsequently return false. -static INLINE void av1_resize_step(struct AV1_COMP *cpi) { - cpi->resize_scale_num = cpi->resize_next_scale_num; - cpi->resize_scale_den = cpi->resize_next_scale_den; +static INLINE int av1_frame_unscaled(const AV1_COMMON *cm) { +#if CONFIG_FRAME_SUPERRES + return av1_superres_unscaled(cm) && av1_resize_unscaled(cm); +#else + return av1_resize_unscaled(cm); +#endif // CONFIG_FRAME_SUPERRES } #ifdef __cplusplus diff --git a/third_party/aom/av1/encoder/encodetxb.c b/third_party/aom/av1/encoder/encodetxb.c index 731642064..3aa4c183e 100644 --- a/third_party/aom/av1/encoder/encodetxb.c +++ b/third_party/aom/av1/encoder/encodetxb.c @@ -70,38 +70,43 @@ static void write_golomb(aom_writer *w, int level) { } void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd, - aom_writer *w, int block, int plane, - const tran_low_t *tcoeff, uint16_t eob, - TXB_CTX *txb_ctx) { + aom_writer *w, int blk_row, int blk_col, int block, + int plane, TX_SIZE tx_size, const tran_low_t *tcoeff, + uint16_t eob, TXB_CTX *txb_ctx) { aom_prob *nz_map; aom_prob *eob_flag; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const PLANE_TYPE plane_type = get_plane_type(plane); - const TX_SIZE tx_size = get_tx_size(plane, xd); - const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); - const SCAN_ORDER *const scan_order = - get_scan(cm, tx_size, tx_type, is_inter_block(mbmi)); + const TX_SIZE txs_ctx = get_txsize_context(tx_size); + const TX_TYPE tx_type = + av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size); + const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi); const int16_t *scan = scan_order->scan; + const int16_t *iscan = scan_order->iscan; int c; int is_nz; const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2; + const int height = tx_size_high[tx_size]; const int seg_eob = tx_size_2d[tx_size]; - uint8_t txb_mask[32 * 32] = { 0 }; uint16_t update_eob = 0; - aom_write(w, eob == 0, cm->fc->txb_skip[tx_size][txb_ctx->txb_skip_ctx]); + (void)blk_row; + (void)blk_col; + + aom_write(w, eob == 0, cm->fc->txb_skip[txs_ctx][txb_ctx->txb_skip_ctx]); if (eob == 0) return; #if CONFIG_TXK_SEL - av1_write_tx_type(cm, xd, block, plane, w); + av1_write_tx_type(cm, xd, blk_row, blk_col, block, plane, + get_min_tx_size(tx_size), w); #endif - nz_map = cm->fc->nz_map[tx_size][plane_type]; - eob_flag = cm->fc->eob_flag[tx_size][plane_type]; + nz_map = cm->fc->nz_map[txs_ctx][plane_type]; + eob_flag = cm->fc->eob_flag[txs_ctx][plane_type]; for (c = 0; c < eob; ++c) { - int coeff_ctx = get_nz_map_ctx(tcoeff, txb_mask, scan[c], bwl); - int eob_ctx = get_eob_ctx(tcoeff, scan[c], bwl); + int coeff_ctx = get_nz_map_ctx(tcoeff, scan[c], bwl, height, iscan); + int eob_ctx = get_eob_ctx(tcoeff, scan[c], txs_ctx); tran_low_t v = tcoeff[scan[c]]; is_nz = (v != 0); @@ -113,12 +118,11 @@ void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd, if (is_nz) { aom_write(w, c == (eob - 1), eob_flag[eob_ctx]); } - txb_mask[scan[c]] = 1; } int i; for (i = 0; i < NUM_BASE_LEVELS; ++i) { - aom_prob *coeff_base = cm->fc->coeff_base[tx_size][plane_type][i]; + aom_prob *coeff_base = cm->fc->coeff_base[txs_ctx][plane_type][i]; update_eob = 0; for (c = eob - 1; c >= 0; --c) { @@ -129,7 +133,7 @@ void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd, if (level <= i) continue; - ctx = get_base_ctx(tcoeff, scan[c], bwl, i + 1); + ctx = get_base_ctx(tcoeff, scan[c], bwl, height, i + 1); if (level == i + 1) { aom_write(w, 1, coeff_base[ctx]); @@ -161,13 +165,13 @@ void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd, } // level is above 1. - ctx = get_br_ctx(tcoeff, scan[c], bwl); + ctx = get_br_ctx(tcoeff, scan[c], bwl, height); for (idx = 0; idx < COEFF_BASE_RANGE; ++idx) { if (level == (idx + 1 + NUM_BASE_LEVELS)) { - aom_write(w, 1, cm->fc->coeff_lps[tx_size][plane_type][ctx]); + aom_write(w, 1, cm->fc->coeff_lps[txs_ctx][plane_type][ctx]); break; } - aom_write(w, 0, cm->fc->coeff_lps[tx_size][plane_type][ctx]); + aom_write(w, 0, cm->fc->coeff_lps[txs_ctx][plane_type][ctx]); } if (idx < COEFF_BASE_RANGE) continue; @@ -183,7 +187,10 @@ void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, BLOCK_SIZE bsize = mbmi->sb_type; struct macroblockd_plane *pd = &xd->plane[plane]; -#if CONFIG_CB4X4 +#if CONFIG_CHROMA_SUB8X8 + const BLOCK_SIZE plane_bsize = + AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd)); +#elif CONFIG_CB4X4 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); #else const BLOCK_SIZE plane_bsize = @@ -191,7 +198,7 @@ void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, #endif const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane); const int max_blocks_high = max_block_high(xd, plane_bsize, plane); - TX_SIZE tx_size = get_tx_size(plane, xd); + const TX_SIZE tx_size = av1_get_tx_size(plane, xd); const int bkw = tx_size_wide_unit[tx_size]; const int bkh = tx_size_high_unit[tx_size]; const int step = tx_size_wide_unit[tx_size] * tx_size_high_unit[tx_size]; @@ -203,7 +210,8 @@ void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, uint16_t eob = x->mbmi_ext->eobs[plane][block]; TXB_CTX txb_ctx = { x->mbmi_ext->txb_skip_ctx[plane][block], x->mbmi_ext->dc_sign_ctx[plane][block] }; - av1_write_coeffs_txb(cm, xd, w, block, plane, tcoeff, eob, &txb_ctx); + av1_write_coeffs_txb(cm, xd, w, row, col, block, plane, tx_size, tcoeff, + eob, &txb_ctx); block += step; } } @@ -211,7 +219,7 @@ void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, static INLINE void get_base_ctx_set(const tran_low_t *tcoeffs, int c, // raster order - const int bwl, + const int bwl, const int height, int ctx_set[NUM_BASE_LEVELS]) { const int row = c >> bwl; const int col = c - (row << bwl); @@ -226,7 +234,7 @@ static INLINE void get_base_ctx_set(const tran_low_t *tcoeffs, int ref_col = col + base_ref_offset[idx][1]; int pos = (ref_row << bwl) + ref_col; - if (ref_row < 0 || ref_col < 0 || ref_row >= stride || ref_col >= stride) + if (ref_row < 0 || ref_col < 0 || ref_row >= height || ref_col >= stride) continue; abs_coeff = abs(tcoeffs[pos]); @@ -280,12 +288,14 @@ static INLINE int get_base_cost(tran_low_t abs_qc, int ctx, } int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, - int block, TXB_CTX *txb_ctx) { + int blk_row, int blk_col, int block, TX_SIZE tx_size, + TXB_CTX *txb_ctx) { const AV1_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - const TX_SIZE tx_size = get_tx_size(plane, xd); + TX_SIZE txs_ctx = get_txsize_context(tx_size); const PLANE_TYPE plane_type = get_plane_type(plane); - const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + const TX_TYPE tx_type = + av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size); MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const struct macroblock_plane *p = &x->plane[plane]; const int eob = p->eobs[block]; @@ -293,27 +303,26 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, int c, cost; const int seg_eob = AOMMIN(eob, tx_size_2d[tx_size] - 1); int txb_skip_ctx = txb_ctx->txb_skip_ctx; - aom_prob *nz_map = xd->fc->nz_map[tx_size][plane_type]; + aom_prob *nz_map = xd->fc->nz_map[txs_ctx][plane_type]; const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2; - // txb_mask is only initialized for once here. After that, it will be set when - // coding zero map and then reset when coding level 1 info. - uint8_t txb_mask[32 * 32] = { 0 }; + const int height = tx_size_high[tx_size]; + aom_prob(*coeff_base)[COEFF_BASE_CONTEXTS] = - xd->fc->coeff_base[tx_size][plane_type]; + xd->fc->coeff_base[txs_ctx][plane_type]; - const SCAN_ORDER *const scan_order = - get_scan(cm, tx_size, tx_type, is_inter_block(mbmi)); + const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi); const int16_t *scan = scan_order->scan; + const int16_t *iscan = scan_order->iscan; cost = 0; if (eob == 0) { - cost = av1_cost_bit(xd->fc->txb_skip[tx_size][txb_skip_ctx], 1); + cost = av1_cost_bit(xd->fc->txb_skip[txs_ctx][txb_skip_ctx], 1); return cost; } - cost = av1_cost_bit(xd->fc->txb_skip[tx_size][txb_skip_ctx], 0); + cost = av1_cost_bit(xd->fc->txb_skip[txs_ctx][txb_skip_ctx], 0); #if CONFIG_TXK_SEL cost += av1_tx_type_cost(cpi, xd, mbmi->sb_type, plane, tx_size, tx_type); @@ -325,7 +334,7 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, int level = abs(v); if (c < seg_eob) { - int coeff_ctx = get_nz_map_ctx(qcoeff, txb_mask, scan[c], bwl); + int coeff_ctx = get_nz_map_ctx(qcoeff, scan[c], bwl, height, iscan); cost += av1_cost_bit(nz_map[coeff_ctx], is_nz); } @@ -342,7 +351,7 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, cost += av1_cost_bit(128, sign); } - get_base_ctx_set(qcoeff, scan[c], bwl, ctx_ls); + get_base_ctx_set(qcoeff, scan[c], bwl, height, ctx_ls); int i; for (i = 0; i < NUM_BASE_LEVELS; ++i) { @@ -359,15 +368,15 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, int idx; int ctx; - ctx = get_br_ctx(qcoeff, scan[c], bwl); + ctx = get_br_ctx(qcoeff, scan[c], bwl, height); for (idx = 0; idx < COEFF_BASE_RANGE; ++idx) { if (level == (idx + 1 + NUM_BASE_LEVELS)) { cost += - av1_cost_bit(xd->fc->coeff_lps[tx_size][plane_type][ctx], 1); + av1_cost_bit(xd->fc->coeff_lps[txs_ctx][plane_type][ctx], 1); break; } - cost += av1_cost_bit(xd->fc->coeff_lps[tx_size][plane_type][ctx], 0); + cost += av1_cost_bit(xd->fc->coeff_lps[txs_ctx][plane_type][ctx], 0); } if (idx >= COEFF_BASE_RANGE) { @@ -389,13 +398,11 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, } if (c < seg_eob) { - int eob_ctx = get_eob_ctx(qcoeff, scan[c], bwl); - cost += av1_cost_bit(xd->fc->eob_flag[tx_size][plane_type][eob_ctx], + int eob_ctx = get_eob_ctx(qcoeff, scan[c], txs_ctx); + cost += av1_cost_bit(xd->fc->eob_flag[txs_ctx][plane_type][eob_ctx], c == (eob - 1)); } } - - txb_mask[scan[c]] = 1; } return cost; @@ -409,26 +416,26 @@ static INLINE int has_base(tran_low_t qc, int base_idx) { static void gen_base_count_mag_arr(int (*base_count_arr)[MAX_TX_SQUARE], int (*base_mag_arr)[2], const tran_low_t *qcoeff, int stride, - int eob, const int16_t *scan) { + int height, int eob, const int16_t *scan) { for (int c = 0; c < eob; ++c) { const int coeff_idx = scan[c]; // raster order if (!has_base(qcoeff[coeff_idx], 0)) continue; const int row = coeff_idx / stride; const int col = coeff_idx % stride; int *mag = base_mag_arr[coeff_idx]; - get_mag(mag, qcoeff, stride, row, col, base_ref_offset, + get_mag(mag, qcoeff, stride, height, row, col, base_ref_offset, BASE_CONTEXT_POSITION_NUM); for (int i = 0; i < NUM_BASE_LEVELS; ++i) { if (!has_base(qcoeff[coeff_idx], i)) continue; int *count = base_count_arr[i] + coeff_idx; - *count = get_level_count(qcoeff, stride, row, col, i, base_ref_offset, - BASE_CONTEXT_POSITION_NUM); + *count = get_level_count(qcoeff, stride, height, row, col, i, + base_ref_offset, BASE_CONTEXT_POSITION_NUM); } } } static void gen_nz_count_arr(int(*nz_count_arr), const tran_low_t *qcoeff, - int stride, int eob, + int stride, int height, int eob, const SCAN_ORDER *scan_order) { const int16_t *scan = scan_order->scan; const int16_t *iscan = scan_order->iscan; @@ -436,7 +443,8 @@ static void gen_nz_count_arr(int(*nz_count_arr), const tran_low_t *qcoeff, const int coeff_idx = scan[c]; // raster order const int row = coeff_idx / stride; const int col = coeff_idx % stride; - nz_count_arr[coeff_idx] = get_nz_count(qcoeff, stride, row, col, iscan); + nz_count_arr[coeff_idx] = + get_nz_count(qcoeff, stride, height, row, col, iscan); } } @@ -478,8 +486,8 @@ static INLINE int has_br(tran_low_t qc) { } static void gen_br_count_mag_arr(int *br_count_arr, int (*br_mag_arr)[2], - const tran_low_t *qcoeff, int stride, int eob, - const int16_t *scan) { + const tran_low_t *qcoeff, int stride, + int height, int eob, const int16_t *scan) { for (int c = 0; c < eob; ++c) { const int coeff_idx = scan[c]; // raster order if (!has_br(qcoeff[coeff_idx])) continue; @@ -487,9 +495,9 @@ static void gen_br_count_mag_arr(int *br_count_arr, int (*br_mag_arr)[2], const int col = coeff_idx % stride; int *count = br_count_arr + coeff_idx; int *mag = br_mag_arr[coeff_idx]; - *count = get_level_count(qcoeff, stride, row, col, NUM_BASE_LEVELS, + *count = get_level_count(qcoeff, stride, height, row, col, NUM_BASE_LEVELS, br_ref_offset, BR_CONTEXT_POSITION_NUM); - get_mag(mag, qcoeff, stride, row, col, br_ref_offset, + get_mag(mag, qcoeff, stride, height, row, col, br_ref_offset, BR_CONTEXT_POSITION_NUM); } } @@ -543,18 +551,19 @@ static INLINE int get_golomb_cost(int abs_qc) { void gen_txb_cache(TxbCache *txb_cache, TxbInfo *txb_info) { const int16_t *scan = txb_info->scan_order->scan; gen_nz_count_arr(txb_cache->nz_count_arr, txb_info->qcoeff, txb_info->stride, - txb_info->eob, txb_info->scan_order); + txb_info->height, txb_info->eob, txb_info->scan_order); gen_nz_ctx_arr(txb_cache->nz_ctx_arr, txb_cache->nz_count_arr, txb_info->qcoeff, txb_info->bwl, txb_info->eob, txb_info->scan_order); gen_base_count_mag_arr(txb_cache->base_count_arr, txb_cache->base_mag_arr, - txb_info->qcoeff, txb_info->stride, txb_info->eob, - scan); + txb_info->qcoeff, txb_info->stride, txb_info->height, + txb_info->eob, scan); gen_base_ctx_arr(txb_cache->base_ctx_arr, txb_cache->base_count_arr, txb_cache->base_mag_arr, txb_info->qcoeff, txb_info->stride, txb_info->eob, scan); gen_br_count_mag_arr(txb_cache->br_count_arr, txb_cache->br_mag_arr, - txb_info->qcoeff, txb_info->stride, txb_info->eob, scan); + txb_info->qcoeff, txb_info->stride, txb_info->height, + txb_info->eob, scan); gen_br_ctx_arr(txb_cache->br_ctx_arr, txb_cache->br_count_arr, txb_cache->br_mag_arr, txb_info->qcoeff, txb_info->stride, txb_info->eob, scan); @@ -781,7 +790,7 @@ static int try_self_level_down(tran_low_t *low_coeff, int coeff_idx, if (scan_idx < txb_info->seg_eob) { const int eob_ctx = - get_eob_ctx(txb_info->qcoeff, coeff_idx, txb_info->bwl); + get_eob_ctx(txb_info->qcoeff, coeff_idx, txb_info->txs_ctx); cost_diff -= av1_cost_bit(txb_probs->eob_flag[eob_ctx], scan_idx == (txb_info->eob - 1)); } @@ -853,9 +862,13 @@ int try_level_down(int coeff_idx, const TxbCache *txb_cache, const int nb_row = row - sig_ref_offset[i][0]; const int nb_col = col - sig_ref_offset[i][1]; const int nb_coeff_idx = nb_row * txb_info->stride + nb_col; + + if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height && + nb_col < txb_info->stride)) + continue; + const int nb_scan_idx = iscan[nb_coeff_idx]; - if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 && - nb_row < txb_info->stride && nb_col < txb_info->stride) { + if (nb_scan_idx < eob) { const int cost_diff = try_neighbor_level_down_nz( nb_coeff_idx, coeff_idx, txb_cache, txb_probs, txb_info); if (cost_map) @@ -871,9 +884,13 @@ int try_level_down(int coeff_idx, const TxbCache *txb_cache, const int nb_row = row - base_ref_offset[i][0]; const int nb_col = col - base_ref_offset[i][1]; const int nb_coeff_idx = nb_row * txb_info->stride + nb_col; + + if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height && + nb_col < txb_info->stride)) + continue; + const int nb_scan_idx = iscan[nb_coeff_idx]; - if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 && - nb_row < txb_info->stride && nb_col < txb_info->stride) { + if (nb_scan_idx < eob) { const int cost_diff = try_neighbor_level_down_base( nb_coeff_idx, coeff_idx, txb_cache, txb_probs, txb_info); if (cost_map) @@ -889,9 +906,13 @@ int try_level_down(int coeff_idx, const TxbCache *txb_cache, const int nb_row = row - br_ref_offset[i][0]; const int nb_col = col - br_ref_offset[i][1]; const int nb_coeff_idx = nb_row * txb_info->stride + nb_col; + + if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height && + nb_col < txb_info->stride)) + continue; + const int nb_scan_idx = iscan[nb_coeff_idx]; - if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 && - nb_row < txb_info->stride && nb_col < txb_info->stride) { + if (nb_scan_idx < eob) { const int cost_diff = try_neighbor_level_down_br( nb_coeff_idx, coeff_idx, txb_cache, txb_probs, txb_info); if (cost_map) @@ -925,7 +946,7 @@ static int get_low_coeff_cost(int coeff_idx, const TxbCache *txb_cache, cost += get_base_cost(abs_qc, ctx, txb_probs->coeff_base, base_idx); if (scan_idx < txb_info->seg_eob) { const int eob_ctx = - get_eob_ctx(txb_info->qcoeff, coeff_idx, txb_info->bwl); + get_eob_ctx(txb_info->qcoeff, coeff_idx, txb_info->txs_ctx); cost += av1_cost_bit(txb_probs->eob_flag[eob_ctx], scan_idx == (txb_info->eob - 1)); } @@ -982,7 +1003,7 @@ int try_change_eob(int *new_eob, int coeff_idx, const TxbCache *txb_cache, // Note that get_eob_ctx does NOT actually account for qcoeff, so we don't // need to lower down the qcoeff here const int eob_ctx = - get_eob_ctx(txb_info->qcoeff, scan[*new_eob - 1], txb_info->bwl); + get_eob_ctx(txb_info->qcoeff, scan[*new_eob - 1], txb_info->txs_ctx); cost_diff -= av1_cost_bit(txb_probs->eob_flag[eob_ctx], 0); cost_diff += av1_cost_bit(txb_probs->eob_flag[eob_ctx], 1); } else { @@ -1016,10 +1037,14 @@ void update_level_down(int coeff_idx, TxbCache *txb_cache, TxbInfo *txb_info) { for (int i = 0; i < SIG_REF_OFFSET_NUM; ++i) { const int nb_row = row - sig_ref_offset[i][0]; const int nb_col = col - sig_ref_offset[i][1]; + + if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height && + nb_col < txb_info->stride)) + continue; + const int nb_coeff_idx = nb_row * txb_info->stride + nb_col; const int nb_scan_idx = iscan[nb_coeff_idx]; - if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 && - nb_row < txb_info->stride && nb_col < txb_info->stride) { + if (nb_scan_idx < eob) { const int scan_idx = iscan[coeff_idx]; if (scan_idx < nb_scan_idx) { const int level = 1; @@ -1030,7 +1055,7 @@ void update_level_down(int coeff_idx, TxbCache *txb_cache, TxbInfo *txb_info) { const int count = txb_cache->nz_count_arr[nb_coeff_idx]; txb_cache->nz_ctx_arr[nb_coeff_idx][0] = get_nz_map_ctx_from_count( count, txb_info->qcoeff, nb_coeff_idx, txb_info->bwl, iscan); - // int ref_ctx = get_nz_map_ctx2(txb_info->qcoeff, nb_coeff_idx, + // int ref_ctx = get_nz_map_ctx(txb_info->qcoeff, nb_coeff_idx, // txb_info->bwl, iscan); // if (ref_ctx != txb_cache->nz_ctx_arr[nb_coeff_idx][0]) // printf("nz ctx %d ref_ctx %d\n", @@ -1043,11 +1068,15 @@ void update_level_down(int coeff_idx, TxbCache *txb_cache, TxbInfo *txb_info) { const int nb_row = row - base_ref_offset[i][0]; const int nb_col = col - base_ref_offset[i][1]; const int nb_coeff_idx = nb_row * txb_info->stride + nb_col; + + if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height && + nb_col < txb_info->stride)) + continue; + const tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx]; if (!has_base(nb_coeff, 0)) continue; const int nb_scan_idx = iscan[nb_coeff_idx]; - if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 && - nb_row < txb_info->stride && nb_col < txb_info->stride) { + if (nb_scan_idx < eob) { if (row >= nb_row && col >= nb_col) update_mag_arr(txb_cache->base_mag_arr[nb_coeff_idx], abs_qc); const int mag = @@ -1076,11 +1105,15 @@ void update_level_down(int coeff_idx, TxbCache *txb_cache, TxbInfo *txb_info) { const int nb_row = row - br_ref_offset[i][0]; const int nb_col = col - br_ref_offset[i][1]; const int nb_coeff_idx = nb_row * txb_info->stride + nb_col; + + if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height && + nb_col < txb_info->stride)) + continue; + const int nb_scan_idx = iscan[nb_coeff_idx]; const tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx]; if (!has_br(nb_coeff)) continue; - if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 && - nb_row < txb_info->stride && nb_col < txb_info->stride) { + if (nb_scan_idx < eob) { const int level = 1 + NUM_BASE_LEVELS; if (abs_qc == level) { txb_cache->br_count_arr[nb_coeff_idx] -= 1; @@ -1112,8 +1145,8 @@ static int get_coeff_cost(tran_low_t qc, int scan_idx, TxbInfo *txb_info, const int16_t *iscan = txb_info->scan_order->iscan; if (scan_idx < txb_info->seg_eob) { - int coeff_ctx = - get_nz_map_ctx2(txb_info->qcoeff, scan[scan_idx], txb_info->bwl, iscan); + int coeff_ctx = get_nz_map_ctx(txb_info->qcoeff, scan[scan_idx], + txb_info->bwl, txb_info->height, iscan); cost += av1_cost_bit(txb_probs->nz_map[coeff_ctx], is_nz); } @@ -1122,7 +1155,8 @@ static int get_coeff_cost(tran_low_t qc, int scan_idx, TxbInfo *txb_info, txb_ctx->dc_sign_ctx); int ctx_ls[NUM_BASE_LEVELS] = { 0 }; - get_base_ctx_set(txb_info->qcoeff, scan[scan_idx], txb_info->bwl, ctx_ls); + get_base_ctx_set(txb_info->qcoeff, scan[scan_idx], txb_info->bwl, + txb_info->height, ctx_ls); int i; for (i = 0; i < NUM_BASE_LEVELS; ++i) { @@ -1130,14 +1164,15 @@ static int get_coeff_cost(tran_low_t qc, int scan_idx, TxbInfo *txb_info, } if (abs_qc > NUM_BASE_LEVELS) { - int ctx = get_br_ctx(txb_info->qcoeff, scan[scan_idx], txb_info->bwl); + int ctx = get_br_ctx(txb_info->qcoeff, scan[scan_idx], txb_info->bwl, + txb_info->height); cost += get_br_cost(abs_qc, ctx, txb_probs->coeff_lps); cost += get_golomb_cost(abs_qc); } if (scan_idx < txb_info->seg_eob) { int eob_ctx = - get_eob_ctx(txb_info->qcoeff, scan[scan_idx], txb_info->bwl); + get_eob_ctx(txb_info->qcoeff, scan[scan_idx], txb_info->txs_ctx); cost += av1_cost_bit(txb_probs->eob_flag[eob_ctx], scan_idx == (txb_info->eob - 1)); } @@ -1323,8 +1358,7 @@ void try_level_down_facade(LevelDownStats *stats, int scan_idx, test_level_down(coeff_idx, txb_cache, txb_probs, txb_info); #endif } - stats->rd_diff = RDCOST(txb_info->rdmult, txb_info->rddiv, stats->cost_diff, - stats->dist_diff); + stats->rd_diff = RDCOST(txb_info->rdmult, stats->cost_diff, stats->dist_diff); if (stats->rd_diff < 0) stats->update = 1; return; } @@ -1424,18 +1458,17 @@ static int optimize_txb(TxbInfo *txb_info, const TxbProbs *txb_probs, // These numbers are empirically obtained. static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = { -#if CONFIG_EC_ADAPT { 17, 13 }, { 16, 10 }, -#else - { 20, 12 }, { 16, 12 }, -#endif }; -int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, - TX_SIZE tx_size, TXB_CTX *txb_ctx) { +int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane, + int blk_row, int blk_col, int block, TX_SIZE tx_size, + TXB_CTX *txb_ctx) { MACROBLOCKD *const xd = &x->e_mbd; const PLANE_TYPE plane_type = get_plane_type(plane); - const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + const TX_SIZE txs_ctx = get_txsize_context(tx_size); + const TX_TYPE tx_type = + av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size); const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; @@ -1445,34 +1478,34 @@ int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block); const int16_t *dequant = pd->dequant; const int seg_eob = AOMMIN(eob, tx_size_2d[tx_size] - 1); - const aom_prob *nz_map = xd->fc->nz_map[tx_size][plane_type]; + const aom_prob *nz_map = xd->fc->nz_map[txs_ctx][plane_type]; const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2; const int stride = 1 << bwl; + const int height = tx_size_high[tx_size]; aom_prob(*coeff_base)[COEFF_BASE_CONTEXTS] = - xd->fc->coeff_base[tx_size][plane_type]; + xd->fc->coeff_base[txs_ctx][plane_type]; - const aom_prob *coeff_lps = xd->fc->coeff_lps[tx_size][plane_type]; + const aom_prob *coeff_lps = xd->fc->coeff_lps[txs_ctx][plane_type]; const int is_inter = is_inter_block(mbmi); - const SCAN_ORDER *const scan_order = - get_scan(cm, tx_size, tx_type, is_inter_block(mbmi)); + const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi); const TxbProbs txb_probs = { xd->fc->dc_sign[plane_type], nz_map, coeff_base, coeff_lps, - xd->fc->eob_flag[tx_size][plane_type], - xd->fc->txb_skip[tx_size] }; + xd->fc->eob_flag[txs_ctx][plane_type], + xd->fc->txb_skip[txs_ctx] }; const int shift = av1_get_tx_scale(tx_size); const int64_t rdmult = (x->rdmult * plane_rd_mult[is_inter][plane_type] + 2) >> 2; - const int64_t rddiv = x->rddiv; - TxbInfo txb_info = { qcoeff, dqcoeff, tcoeff, dequant, shift, - tx_size, bwl, stride, eob, seg_eob, - scan_order, txb_ctx, rdmult, rddiv }; + TxbInfo txb_info = { qcoeff, dqcoeff, tcoeff, dequant, shift, + tx_size, txs_ctx, bwl, stride, height, + eob, seg_eob, scan_order, txb_ctx, rdmult }; + TxbCache txb_cache; gen_txb_cache(&txb_cache, &txb_info); @@ -1510,9 +1543,9 @@ void av1_update_txb_context_b(int plane, int block, int blk_row, int blk_col, const uint16_t eob = p->eobs[block]; const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); const PLANE_TYPE plane_type = pd->plane_type; - const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); - const SCAN_ORDER *const scan_order = - get_scan(cm, tx_size, tx_type, is_inter_block(mbmi)); + const TX_TYPE tx_type = + av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size); + const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi); (void)plane_bsize; int cul_level = av1_get_txb_entropy_context(qcoeff, scan_order, eob); @@ -1536,25 +1569,28 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row, const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); tran_low_t *tcoeff = BLOCK_OFFSET(x->mbmi_ext->tcoeff[plane], block); const int segment_id = mbmi->segment_id; - const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); - const SCAN_ORDER *const scan_order = - get_scan(cm, tx_size, tx_type, is_inter_block(mbmi)); + const TX_TYPE tx_type = + av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size); + const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi); const int16_t *scan = scan_order->scan; + const int16_t *iscan = scan_order->iscan; const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size); int c, i; TXB_CTX txb_ctx; get_txb_ctx(plane_bsize, tx_size, plane, pd->above_context + blk_col, pd->left_context + blk_row, &txb_ctx); const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2; + const int height = tx_size_high[tx_size]; int cul_level = 0; unsigned int(*nz_map_count)[SIG_COEF_CONTEXTS][2]; - uint8_t txb_mask[32 * 32] = { 0 }; - nz_map_count = &td->counts->nz_map[tx_size][plane_type]; + TX_SIZE txsize_ctx = get_txsize_context(tx_size); + + nz_map_count = &td->counts->nz_map[txsize_ctx][plane_type]; memcpy(tcoeff, qcoeff, sizeof(*tcoeff) * seg_eob); - ++td->counts->txb_skip[tx_size][txb_ctx.txb_skip_ctx][eob == 0]; + ++td->counts->txb_skip[txsize_ctx][txb_ctx.txb_skip_ctx][eob == 0]; x->mbmi_ext->txb_skip_ctx[plane][block] = txb_ctx.txb_skip_ctx; x->mbmi_ext->eobs[plane][block] = eob; @@ -1565,24 +1601,23 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row, } #if CONFIG_TXK_SEL - av1_update_tx_type_count(cm, xd, block, plane, mbmi->sb_type, tx_size, - td->counts); + av1_update_tx_type_count(cm, xd, blk_row, blk_col, block, plane, + mbmi->sb_type, get_min_tx_size(tx_size), td->counts); #endif for (c = 0; c < eob; ++c) { tran_low_t v = qcoeff[scan[c]]; int is_nz = (v != 0); - int coeff_ctx = get_nz_map_ctx(tcoeff, txb_mask, scan[c], bwl); - int eob_ctx = get_eob_ctx(tcoeff, scan[c], bwl); + int coeff_ctx = get_nz_map_ctx(tcoeff, scan[c], bwl, height, iscan); + int eob_ctx = get_eob_ctx(tcoeff, scan[c], txsize_ctx); if (c == seg_eob - 1) break; ++(*nz_map_count)[coeff_ctx][is_nz]; if (is_nz) { - ++td->counts->eob_flag[tx_size][plane_type][eob_ctx][c == (eob - 1)]; + ++td->counts->eob_flag[txsize_ctx][plane_type][eob_ctx][c == (eob - 1)]; } - txb_mask[scan[c]] = 1; } // Reverse process order to handle coefficient level and sign. @@ -1595,10 +1630,10 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row, if (level <= i) continue; - ctx = get_base_ctx(tcoeff, scan[c], bwl, i + 1); + ctx = get_base_ctx(tcoeff, scan[c], bwl, height, i + 1); if (level == i + 1) { - ++td->counts->coeff_base[tx_size][plane_type][i][ctx][1]; + ++td->counts->coeff_base[txsize_ctx][plane_type][i][ctx][1]; if (c == 0) { int dc_sign_ctx = txb_ctx.dc_sign_ctx; @@ -1608,7 +1643,7 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row, cul_level += level; continue; } - ++td->counts->coeff_base[tx_size][plane_type][i][ctx][0]; + ++td->counts->coeff_base[txsize_ctx][plane_type][i][ctx][0]; update_eob = AOMMAX(update_eob, c); } } @@ -1630,13 +1665,13 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row, } // level is above 1. - ctx = get_br_ctx(tcoeff, scan[c], bwl); + ctx = get_br_ctx(tcoeff, scan[c], bwl, height); for (idx = 0; idx < COEFF_BASE_RANGE; ++idx) { if (level == (idx + 1 + NUM_BASE_LEVELS)) { - ++td->counts->coeff_lps[tx_size][plane_type][ctx][1]; + ++td->counts->coeff_lps[txsize_ctx][plane_type][ctx][1]; break; } - ++td->counts->coeff_lps[tx_size][plane_type][ctx][0]; + ++td->counts->coeff_lps[txsize_ctx][plane_type][ctx][0]; } if (idx < COEFF_BASE_RANGE) continue; @@ -1835,46 +1870,74 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane, TX_TYPE txk_end = TX_TYPES - 1; TX_TYPE best_tx_type = txk_start; int64_t best_rd = INT64_MAX; + uint8_t best_eob = 0; const int coeff_ctx = combine_entropy_contexts(*a, *l); + RD_STATS best_rd_stats; TX_TYPE tx_type; + + av1_invalid_rd_stats(&best_rd_stats); + for (tx_type = txk_start; tx_type <= txk_end; ++tx_type) { - if (plane == 0) mbmi->txk_type[block] = tx_type; - TX_TYPE ref_tx_type = - get_tx_type(get_plane_type(plane), xd, block, tx_size); + if (plane == 0) mbmi->txk_type[(blk_row << 4) + blk_col] = tx_type; + TX_TYPE ref_tx_type = av1_get_tx_type(get_plane_type(plane), xd, blk_row, + blk_col, block, tx_size); if (tx_type != ref_tx_type) { - // use get_tx_type() to check if the tx_type is valid for the current mode - // if it's not, we skip it here. + // use av1_get_tx_type() to check if the tx_type is valid for the current + // mode if it's not, we skip it here. continue; } + +#if CONFIG_EXT_TX + int is_inter = is_inter_block(mbmi); + int ext_tx_set = get_ext_tx_set(get_min_tx_size(tx_size), mbmi->sb_type, + is_inter, cm->reduced_tx_set_used); + if (!(is_inter && ext_tx_used_inter[ext_tx_set][tx_type]) && + !(!is_inter && ext_tx_used_intra[ext_tx_set][tx_type])) + continue; +#endif // CONFIG_EXT_TX + RD_STATS this_rd_stats; av1_invalid_rd_stats(&this_rd_stats); av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP); - av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l); + av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size, + a, l); av1_dist_block(cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size, &this_rd_stats.dist, &this_rd_stats.sse, OUTPUT_HAS_PREDICTED_PIXELS); - const SCAN_ORDER *scan_order = - get_scan(cm, tx_size, tx_type, is_inter_block(mbmi)); - this_rd_stats.rate = av1_cost_coeffs( - cpi, x, plane, block, tx_size, scan_order, a, l, use_fast_coef_costing); - int rd = - RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist); + const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, mbmi); + this_rd_stats.rate = + av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size, + scan_order, a, l, use_fast_coef_costing); + int rd = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist); + if (rd < best_rd) { best_rd = rd; - *rd_stats = this_rd_stats; + best_rd_stats = this_rd_stats; best_tx_type = tx_type; + best_eob = x->plane[plane].txb_entropy_ctx[block]; } } - if (plane == 0) mbmi->txk_type[block] = best_tx_type; - // TODO(angiebird): Instead of re-call av1_xform_quant and av1_optimize_b, - // copy the best result in the above tx_type search for loop - av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, - coeff_ctx, AV1_XFORM_QUANT_FP); - av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l); + + av1_merge_rd_stats(rd_stats, &best_rd_stats); + + // if (x->plane[plane].eobs[block] == 0) + // if (best_tx_type != DCT_DCT) + // exit(0); + + if (best_eob == 0 && is_inter_block(mbmi)) best_tx_type = DCT_DCT; + + if (plane == 0) mbmi->txk_type[(blk_row << 4) + blk_col] = best_tx_type; + x->plane[plane].txb_entropy_ctx[block] = best_eob; + if (!is_inter_block(mbmi)) { // intra mode needs decoded result such that the next transform block // can use it for prediction. + av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, + coeff_ctx, AV1_XFORM_QUANT_FP); + av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size, + a, l); + av1_inverse_transform_block_facade(xd, plane, block, blk_row, blk_col, x->plane[plane].eobs[block]); } diff --git a/third_party/aom/av1/encoder/encodetxb.h b/third_party/aom/av1/encoder/encodetxb.h index 836033a54..cbafe59c9 100644 --- a/third_party/aom/av1/encoder/encodetxb.h +++ b/third_party/aom/av1/encoder/encodetxb.h @@ -30,14 +30,15 @@ typedef struct TxbInfo { const int16_t *dequant; int shift; TX_SIZE tx_size; + TX_SIZE txs_ctx; int bwl; int stride; + int height; int eob; int seg_eob; const SCAN_ORDER *scan_order; TXB_CTX *txb_ctx; int64_t rdmult; - int64_t rddiv; } TxbInfo; typedef struct TxbCache { @@ -66,11 +67,12 @@ typedef struct TxbProbs { void av1_alloc_txb_buf(AV1_COMP *cpi); void av1_free_txb_buf(AV1_COMP *cpi); int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, - int block, TXB_CTX *txb_ctx); + int blk_row, int blk_col, int block, TX_SIZE tx_size, + TXB_CTX *txb_ctx); void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd, - aom_writer *w, int block, int plane, - const tran_low_t *tcoeff, uint16_t eob, - TXB_CTX *txb_ctx); + aom_writer *w, int blk_row, int blk_col, int block, + int plane, TX_SIZE tx_size, const tran_low_t *tcoeff, + uint16_t eob, TXB_CTX *txb_ctx); void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, aom_writer *w, int plane); int av1_get_txb_entropy_context(const tran_low_t *qcoeff, @@ -95,8 +97,9 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane, const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l, int use_fast_coef_costing, RD_STATS *rd_stats); #endif -int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, - TX_SIZE tx_size, TXB_CTX *txb_ctx); +int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane, + int blk_row, int blk_col, int block, TX_SIZE tx_size, + TXB_CTX *txb_ctx); #ifdef __cplusplus } #endif diff --git a/third_party/aom/av1/encoder/ethread.c b/third_party/aom/av1/encoder/ethread.c index 7af5f78b6..1aa1d52a2 100644 --- a/third_party/aom/av1/encoder/ethread.c +++ b/third_party/aom/av1/encoder/ethread.c @@ -26,6 +26,10 @@ static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) { td_t->rd_counts.global_motion_used[i]; #endif // CONFIG_GLOBAL_MOTION + td->rd_counts.compound_ref_used_flag |= + td_t->rd_counts.compound_ref_used_flag; + td->rd_counts.single_ref_used_flag |= td_t->rd_counts.single_ref_used_flag; + for (i = 0; i < TX_SIZES; i++) for (j = 0; j < PLANE_TYPES; j++) for (k = 0; k < REF_TYPES; k++) @@ -122,11 +126,9 @@ void av1_encode_tiles_mt(AV1_COMP *cpi) { #if CONFIG_PALETTE // Allocate buffers used by palette coding mode. - if (cpi->common.allow_screen_content_tools) { - CHECK_MEM_ERROR( - cm, thread_data->td->palette_buffer, - aom_memalign(16, sizeof(*thread_data->td->palette_buffer))); - } + CHECK_MEM_ERROR( + cm, thread_data->td->palette_buffer, + aom_memalign(16, sizeof(*thread_data->td->palette_buffer))); #endif // CONFIG_PALETTE // Create threads @@ -168,7 +170,7 @@ void av1_encode_tiles_mt(AV1_COMP *cpi) { } #if CONFIG_PALETTE - if (cpi->common.allow_screen_content_tools && i < num_workers - 1) + if (i < num_workers - 1) thread_data->td->mb.palette_buffer = thread_data->td->palette_buffer; #endif // CONFIG_PALETTE } diff --git a/third_party/aom/av1/encoder/firstpass.c b/third_party/aom/av1/encoder/firstpass.c index 7a0abba2d..e7d78d83e 100644 --- a/third_party/aom/av1/encoder/firstpass.c +++ b/third_party/aom/av1/encoder/firstpass.c @@ -456,6 +456,31 @@ static void set_first_pass_params(AV1_COMP *cpi) { cpi->rc.frames_to_key = INT_MAX; } +#if CONFIG_FLEX_REFS +static double raw_motion_error_stdev(int *raw_motion_err_list, + int raw_motion_err_counts) { + int64_t sum_raw_err = 0; + double raw_err_avg = 0; + double raw_err_stdev = 0; + if (raw_motion_err_counts == 0) return 0; + + int i; + for (i = 0; i < raw_motion_err_counts; i++) { + sum_raw_err += raw_motion_err_list[i]; + } + raw_err_avg = sum_raw_err / raw_motion_err_counts; + for (i = 0; i < raw_motion_err_counts; i++) { + raw_err_stdev += (raw_motion_err_list[i] - raw_err_avg) * + (raw_motion_err_list[i] - raw_err_avg); + } + // Calculate the standard deviation for the motion error of all the inter + // blocks of the 0,0 motion using the last source + // frame as the reference. + raw_err_stdev = sqrt(raw_err_stdev / raw_motion_err_counts); + return raw_err_stdev; +} +#endif // CONFIG_FLEX_REFS + #define UL_INTRA_THRESH 50 #define INVALID_ROW -1 void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) { @@ -506,6 +531,13 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) { od_adapt_ctx pvq_context; #endif +#if CONFIG_FLEX_REFS + int *raw_motion_err_list; + int raw_motion_err_counts = 0; + CHECK_MEM_ERROR( + cm, raw_motion_err_list, + aom_calloc(cm->mb_rows * cm->mb_cols, sizeof(*raw_motion_err_list))); +#endif // CONFIG_FLEX_REFS // First pass code requires valid last and new frame buffers. assert(new_yv12 != NULL); assert(frame_is_intra_only(cm) || (lst_yv12 != NULL)); @@ -968,6 +1000,9 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) { } } } +#if CONFIG_FLEX_REFS + raw_motion_err_list[raw_motion_err_counts++] = raw_motion_error; +#endif // CONFIG_FLEX_REFS } else { sr_coded_error += (int64_t)this_error; } @@ -981,7 +1016,6 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) { recon_yoffset += 16; recon_uvoffset += uv_mb_height; } - // Adjust to the next row of MBs. x->plane[0].src.buf += 16 * x->plane[0].src.stride - 16 * cm->mb_cols; x->plane[1].src.buf += @@ -991,7 +1025,10 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) { aom_clear_system_state(); } - +#if CONFIG_FLEX_REFS + const double raw_err_stdev = + raw_motion_error_stdev(raw_motion_err_list, raw_motion_err_counts); +#endif // CONFIG_FLEX_REFS #if CONFIG_PVQ #if !CONFIG_ANS od_ec_enc_clear(&x->daala_enc.w.ec); @@ -1045,6 +1082,9 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) { fps.intra_skip_pct = (double)intra_skip_count / num_mbs; fps.inactive_zone_rows = (double)image_data_start_row; fps.inactive_zone_cols = (double)0; // TODO(paulwilkins): fix +#if CONFIG_FLEX_REFS + fps.raw_error_stdev = raw_err_stdev; +#endif // CONFIG_FLEX_REFS if (mvcount > 0) { fps.MVr = (double)sum_mvr / mvcount; @@ -1231,27 +1271,6 @@ static void setup_rf_level_maxq(AV1_COMP *cpi) { } } -void av1_calculate_next_scaled_size(const AV1_COMP *cpi, - int *scaled_frame_width, - int *scaled_frame_height) { - *scaled_frame_width = - cpi->oxcf.width * cpi->resize_next_scale_num / cpi->resize_next_scale_den; - *scaled_frame_height = cpi->oxcf.height * cpi->resize_next_scale_num / - cpi->resize_next_scale_den; -} - -#if CONFIG_FRAME_SUPERRES -void av1_calculate_superres_size(const AV1_COMP *cpi, int *encoded_width, - int *encoded_height) { - *encoded_width = cpi->oxcf.scaled_frame_width * - cpi->common.superres_scale_numerator / - SUPERRES_SCALE_DENOMINATOR; - *encoded_height = cpi->oxcf.scaled_frame_height * - cpi->common.superres_scale_numerator / - SUPERRES_SCALE_DENOMINATOR; -} -#endif // CONFIG_FRAME_SUPERRES - void av1_init_second_pass(AV1_COMP *cpi) { const AV1EncoderConfig *const oxcf = &cpi->oxcf; TWO_PASS *const twopass = &cpi->twopass; @@ -1673,6 +1692,9 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits, // (3) The bi-predictive group interval is strictly smaller than the // golden group interval. const int is_bipred_enabled = +#if CONFIG_FLEX_REFS + cpi->bwd_ref_allowed && +#endif rc->source_alt_ref_pending && rc->bipred_group_interval && rc->bipred_group_interval <= (rc->baseline_gf_interval - rc->source_alt_ref_pending); @@ -2046,6 +2068,11 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) { const int is_key_frame = frame_is_intra_only(cm); const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active; +#if CONFIG_FLEX_REFS + cpi->extra_arf_allowed = 1; + cpi->bwd_ref_allowed = 1; +#endif + // Reset the GF group data structures unless this is a key // frame in which case it will already have been done. if (is_key_frame == 0) { @@ -2106,6 +2133,12 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) { } } +#if CONFIG_FLEX_REFS + double avg_sr_coded_error = 0; + double avg_raw_err_stdev = 0; + int non_zero_stdev_count = 0; +#endif // CONFIG_FLEX_REFS + i = 0; while (i < rc->static_scene_max_gf_interval && i < rc->frames_to_key) { ++i; @@ -2129,6 +2162,14 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) { accumulate_frame_motion_stats( &next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); +#if CONFIG_FLEX_REFS + // sum up the metric values of current gf group + avg_sr_coded_error += next_frame.sr_coded_error; + if (next_frame.raw_error_stdev) { + non_zero_stdev_count++; + avg_raw_err_stdev += next_frame.raw_error_stdev; + } +#endif // CONFIG_FLEX_REFS // Accumulate the effect of prediction quality decay. if (!flash_detected) { @@ -2175,7 +2216,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) { *this_frame = next_frame; old_boost_score = boost_score; } - twopass->gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0); // Was the group length constrained by the requirement for a new KF? @@ -2202,11 +2242,35 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Set the interval until the next gf. rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending); - #if CONFIG_EXT_REFS - // Compute how many extra alt_refs we can have - cpi->num_extra_arfs = get_number_of_extra_arfs(rc->baseline_gf_interval, - rc->source_alt_ref_pending); +#if CONFIG_FLEX_REFS + const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs + : cpi->common.MBs; + if (i) avg_sr_coded_error /= i; + if (non_zero_stdev_count) avg_raw_err_stdev /= non_zero_stdev_count; + + // Disable extra alter refs and backward ref for "still" gf group + // zero_motion_accumulator indicates the minimum percentage of (0, 0) motion + // in gf group + // avg_sr_coded_error indicates the average of the sse per pixel of each frame + // in gf group + // avg_raw_err_stdev indicates the average of the standard deviation of (0, 0) + // motion error per block of each frame in gf group + assert(num_mbs > 0); + const int disable_bwd_extarf = + (zero_motion_accumulator > MIN_ZERO_MOTION && + avg_sr_coded_error / num_mbs < MAX_SR_CODED_ERROR && + avg_raw_err_stdev < MAX_RAW_ERR_VAR); + + if (disable_bwd_extarf) cpi->extra_arf_allowed = cpi->bwd_ref_allowed = 0; + + if (!cpi->extra_arf_allowed) + cpi->num_extra_arfs = 0; + else +#endif // CONFIG_FLEX_REFS + // Compute how many extra alt_refs we can have + cpi->num_extra_arfs = get_number_of_extra_arfs(rc->baseline_gf_interval, + rc->source_alt_ref_pending); // Currently at maximum two extra ARFs' are allowed assert(cpi->num_extra_arfs <= MAX_EXT_ARFS); #endif // CONFIG_EXT_REFS @@ -2291,12 +2355,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) { twopass->section_intra_rating = calculate_section_intra_ratio( start_pos, twopass->stats_in_end, rc->baseline_gf_interval); } - - if (oxcf->resize_mode == RESIZE_DYNAMIC) { - // Default to starting GF groups at normal frame size. - // TODO(afergs): Make a function for this - cpi->resize_next_scale_num = cpi->resize_next_scale_den; - } } // Threshold for use of the lagging second reference frame. High second ref @@ -2638,12 +2696,6 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) { // The count of bits left is adjusted elsewhere based on real coded frame // sizes. twopass->modified_error_left -= kf_group_err; - - if (oxcf->resize_mode == RESIZE_DYNAMIC) { - // Default to normal-sized frame on keyframes. - // TODO(afergs): Make a function for this - cpi->resize_next_scale_num = cpi->resize_next_scale_den; - } } // Define the reference buffers that will be updated post encode. @@ -2741,7 +2793,7 @@ static void configure_buffer_updates(AV1_COMP *cpi) { break; case LAST_BIPRED_UPDATE: - cpi->refresh_last_frame = 0; + cpi->refresh_last_frame = 1; cpi->refresh_golden_frame = 0; cpi->refresh_bwd_ref_frame = 0; cpi->refresh_alt_ref_frame = 0; diff --git a/third_party/aom/av1/encoder/firstpass.h b/third_party/aom/av1/encoder/firstpass.h index 43104454c..266766d99 100644 --- a/third_party/aom/av1/encoder/firstpass.h +++ b/third_party/aom/av1/encoder/firstpass.h @@ -52,6 +52,13 @@ typedef struct { #define MIN_EXT_ARF_INTERVAL 4 #endif // CONFIG_EXT_REFS +#if CONFIG_FLEX_REFS +#define MIN_ZERO_MOTION 0.95 +#define MAX_SR_CODED_ERROR 40 +#define MAX_RAW_ERR_VAR 2000 +#define MIN_MV_IN_OUT 0.4 +#endif // CONFIG_FLEX_REFS + #define VLOW_MOTION_THRESHOLD 950 typedef struct { @@ -77,6 +84,10 @@ typedef struct { double new_mv_count; double duration; double count; +#if CONFIG_FLEX_REFS + // standard deviation for (0, 0) motion prediction error + double raw_error_stdev; +#endif // CONFIG_FLEX_REFS } FIRSTPASS_STATS; typedef enum { @@ -177,18 +188,6 @@ void av1_twopass_postencode_update(struct AV1_COMP *cpi); // Post encode update of the rate control parameters for 2-pass void av1_twopass_postencode_update(struct AV1_COMP *cpi); -void av1_calculate_next_scaled_size(const struct AV1_COMP *cpi, - int *scaled_frame_width, - int *scaled_frame_height); - -#if CONFIG_FRAME_SUPERRES -// This is the size after superress scaling, which could be 1:1. -// Superres scaling happens after regular downscaling. -// TODO(afergs): Limit overall reduction to 1/2 of the original size -void av1_calculate_superres_size(const struct AV1_COMP *cpi, int *encoded_width, - int *encoded_height); -#endif // CONFIG_FRAME_SUPERRES - #if CONFIG_EXT_REFS static INLINE int get_number_of_extra_arfs(int interval, int arf_pending) { if (arf_pending && MAX_EXT_ARFS > 0) diff --git a/third_party/aom/av1/encoder/global_motion.c b/third_party/aom/av1/encoder/global_motion.c index 74cbc8ae7..661a1feb4 100644 --- a/third_party/aom/av1/encoder/global_motion.c +++ b/third_party/aom/av1/encoder/global_motion.c @@ -131,8 +131,8 @@ int64_t refine_integerized_param(WarpedMotionParams *wm, #endif // CONFIG_HIGHBITDEPTH uint8_t *ref, int r_width, int r_height, int r_stride, uint8_t *dst, int d_width, - int d_height, int d_stride, - int n_refinements) { + int d_height, int d_stride, int n_refinements, + int64_t best_frame_error) { static const int max_trans_model_params[TRANS_TYPES] = { 0, 2, 4, 6, 8, 8, 8 }; @@ -147,15 +147,16 @@ int64_t refine_integerized_param(WarpedMotionParams *wm, int32_t best_param; force_wmtype(wm, wmtype); - best_error = av1_warp_error(wm, + best_error = av1_warp_error( + wm, #if CONFIG_HIGHBITDEPTH - use_hbd, bd, + use_hbd, bd, #endif // CONFIG_HIGHBITDEPTH - ref, r_width, r_height, r_stride, - dst + border * d_stride + border, border, border, - d_width - 2 * border, d_height - 2 * border, - d_stride, 0, 0, 16, 16); - step = 1 << (n_refinements + 1); + ref, r_width, r_height, r_stride, dst + border * d_stride + border, + border, border, d_width - 2 * border, d_height - 2 * border, d_stride, 0, + 0, SCALE_SUBPEL_SHIFTS, SCALE_SUBPEL_SHIFTS, best_frame_error); + best_error = AOMMIN(best_error, best_frame_error); + step = 1 << (n_refinements - 1); for (i = 0; i < n_refinements; i++, step >>= 1) { for (p = 0; p < n_params; ++p) { int step_dir = 0; @@ -174,7 +175,7 @@ int64_t refine_integerized_param(WarpedMotionParams *wm, #endif // CONFIG_HIGHBITDEPTH ref, r_width, r_height, r_stride, dst + border * d_stride + border, border, border, d_width - 2 * border, d_height - 2 * border, d_stride, - 0, 0, 16, 16); + 0, 0, SCALE_SUBPEL_SHIFTS, SCALE_SUBPEL_SHIFTS, best_error); if (step_error < best_error) { best_error = step_error; best_param = *param; @@ -190,7 +191,7 @@ int64_t refine_integerized_param(WarpedMotionParams *wm, #endif // CONFIG_HIGHBITDEPTH ref, r_width, r_height, r_stride, dst + border * d_stride + border, border, border, d_width - 2 * border, d_height - 2 * border, d_stride, - 0, 0, 16, 16); + 0, 0, SCALE_SUBPEL_SHIFTS, SCALE_SUBPEL_SHIFTS, best_error); if (step_error < best_error) { best_error = step_error; best_param = *param; @@ -209,7 +210,8 @@ int64_t refine_integerized_param(WarpedMotionParams *wm, #endif // CONFIG_HIGHBITDEPTH ref, r_width, r_height, r_stride, dst + border * d_stride + border, border, border, d_width - 2 * border, d_height - 2 * border, - d_stride, 0, 0, 16, 16); + d_stride, 0, 0, SCALE_SUBPEL_SHIFTS, SCALE_SUBPEL_SHIFTS, + best_error); if (step_error < best_error) { best_error = step_error; best_param = *param; diff --git a/third_party/aom/av1/encoder/global_motion.h b/third_party/aom/av1/encoder/global_motion.h index 38509df6a..7fca5327f 100644 --- a/third_party/aom/av1/encoder/global_motion.h +++ b/third_party/aom/av1/encoder/global_motion.h @@ -36,7 +36,8 @@ int64_t refine_integerized_param(WarpedMotionParams *wm, #endif // CONFIG_HIGHBITDEPTH uint8_t *ref, int r_width, int r_height, int r_stride, uint8_t *dst, int d_width, - int d_height, int d_stride, int n_refinements); + int d_height, int d_stride, int n_refinements, + int64_t best_frame_error); /* Computes "num_motions" candidate global motion parameters between two frames. diff --git a/third_party/aom/av1/encoder/hybrid_fwd_txfm.c b/third_party/aom/av1/encoder/hybrid_fwd_txfm.c index c57deed84..85f4b7d9b 100644 --- a/third_party/aom/av1/encoder/hybrid_fwd_txfm.c +++ b/third_party/aom/av1/encoder/hybrid_fwd_txfm.c @@ -18,7 +18,7 @@ #if CONFIG_CHROMA_2X2 static void fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, int lossless) { + int diff_stride, TxfmParam *txfm_param) { tran_high_t a1 = src_diff[0]; tran_high_t b1 = src_diff[1]; tran_high_t c1 = src_diff[diff_stride]; @@ -39,134 +39,151 @@ static void fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff, coeff[2] = (tran_low_t)(4 * c1); coeff[3] = (tran_low_t)(4 * d1); - (void)tx_type; - (void)lossless; + (void)txfm_param; } #endif static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, int lossless) { - if (lossless) { - assert(tx_type == DCT_DCT); + int diff_stride, TxfmParam *txfm_param) { + if (txfm_param->lossless) { + assert(txfm_param->tx_type == DCT_DCT); av1_fwht4x4(src_diff, coeff, diff_stride); return; } - av1_fht4x4(src_diff, coeff, diff_stride, tx_type); +#if CONFIG_LGT + // only C version has LGTs + av1_fht4x4_c(src_diff, coeff, diff_stride, txfm_param); +#else + av1_fht4x4(src_diff, coeff, diff_stride, txfm_param); +#endif } static void fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt) { - (void)fwd_txfm_opt; - av1_fht4x8(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { +#if CONFIG_LGT + av1_fht4x8_c(src_diff, coeff, diff_stride, txfm_param); +#else + av1_fht4x8(src_diff, coeff, diff_stride, txfm_param); +#endif } static void fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt) { - (void)fwd_txfm_opt; - av1_fht8x4(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { +#if CONFIG_LGT + av1_fht8x4_c(src_diff, coeff, diff_stride, txfm_param); +#else + av1_fht8x4(src_diff, coeff, diff_stride, txfm_param); +#endif } static void fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt) { - (void)fwd_txfm_opt; - av1_fht8x16(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { +#if CONFIG_LGT + av1_fht8x16_c(src_diff, coeff, diff_stride, txfm_param); +#else + av1_fht8x16(src_diff, coeff, diff_stride, txfm_param); +#endif } static void fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt) { - (void)fwd_txfm_opt; - av1_fht16x8(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { +#if CONFIG_LGT + av1_fht16x8_c(src_diff, coeff, diff_stride, txfm_param); +#else + av1_fht16x8(src_diff, coeff, diff_stride, txfm_param); +#endif } static void fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt) { - (void)fwd_txfm_opt; - av1_fht16x32(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { + av1_fht16x32(src_diff, coeff, diff_stride, txfm_param); } static void fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt) { - (void)fwd_txfm_opt; - av1_fht32x16(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { + av1_fht32x16(src_diff, coeff, diff_stride, txfm_param); } static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt) { - (void)fwd_txfm_opt; - av1_fht8x8(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { +#if CONFIG_LGT + av1_fht8x8_c(src_diff, coeff, diff_stride, txfm_param); +#else + av1_fht8x8(src_diff, coeff, diff_stride, txfm_param); +#endif } static void fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt) { - (void)fwd_txfm_opt; - av1_fht16x16(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { + av1_fht16x16(src_diff, coeff, diff_stride, txfm_param); } static void fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt) { - (void)fwd_txfm_opt; - av1_fht32x32(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { +#if CONFIG_MRC_TX + // MRC_DCT currently only has a C implementation + if (txfm_param->tx_type == MRC_DCT) { + av1_fht32x32_c(src_diff, coeff, diff_stride, txfm_param); + return; + } +#endif // CONFIG_MRC_TX + av1_fht32x32(src_diff, coeff, diff_stride, txfm_param); } #if CONFIG_TX64X64 static void fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt) { - (void)fwd_txfm_opt; + int diff_stride, TxfmParam *txfm_param) { #if CONFIG_EXT_TX - if (tx_type == IDTX) - av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, tx_type); + if (txfm_param->tx_type == IDTX) + av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, txfm_param->tx_type); else #endif - av1_fht64x64(src_diff, coeff, diff_stride, tx_type); + av1_fht64x64(src_diff, coeff, diff_stride, txfm_param); } #endif // CONFIG_TX64X64 -#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX) static void fwd_txfm_16x4(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt) { - (void)fwd_txfm_opt; - av1_fht16x4(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { +#if CONFIG_LGT + av1_fht16x4_c(src_diff, coeff, diff_stride, txfm_param); +#else + av1_fht16x4(src_diff, coeff, diff_stride, txfm_param); +#endif } static void fwd_txfm_4x16(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt) { - (void)fwd_txfm_opt; - av1_fht4x16(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { +#if CONFIG_LGT + av1_fht4x16_c(src_diff, coeff, diff_stride, txfm_param); +#else + av1_fht4x16(src_diff, coeff, diff_stride, txfm_param); +#endif } static void fwd_txfm_32x8(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt) { - (void)fwd_txfm_opt; - av1_fht32x8(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { +#if CONFIG_LGT + av1_fht32x8_c(src_diff, coeff, diff_stride, txfm_param); +#else + av1_fht32x8(src_diff, coeff, diff_stride, txfm_param); +#endif } static void fwd_txfm_8x32(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt) { - (void)fwd_txfm_opt; - av1_fht8x32(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { +#if CONFIG_LGT + av1_fht8x32_c(src_diff, coeff, diff_stride, txfm_param); +#else + av1_fht8x32(src_diff, coeff, diff_stride, txfm_param); +#endif } -#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#endif -#if CONFIG_HIGHBITDEPTH #if CONFIG_CHROMA_2X2 static void highbd_fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, int lossless, - const int bd) { + int diff_stride, TxfmParam *txfm_param) { tran_high_t a1 = src_diff[0]; tran_high_t b1 = src_diff[1]; tran_high_t c1 = src_diff[diff_stride]; @@ -187,27 +204,27 @@ static void highbd_fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff, coeff[2] = (tran_low_t)(4 * c1); coeff[3] = (tran_low_t)(4 * d1); - (void)tx_type; - (void)lossless; - (void)bd; + (void)txfm_param; } #endif static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, int lossless, - const int bd) { - if (lossless) { + int diff_stride, TxfmParam *txfm_param) { + int32_t *dst_coeff = (int32_t *)coeff; + const int tx_type = txfm_param->tx_type; + const int bd = txfm_param->bd; + if (txfm_param->lossless) { assert(tx_type == DCT_DCT); av1_highbd_fwht4x4(src_diff, coeff, diff_stride); return; } - switch (tx_type) { case DCT_DCT: case ADST_DCT: case DCT_ADST: case ADST_ADST: - av1_fwd_txfm2d_4x4(src_diff, coeff, diff_stride, tx_type, bd); + // fallthrough intended + av1_fwd_txfm2d_4x4(src_diff, dst_coeff, diff_stride, tx_type, bd); break; #if CONFIG_EXT_TX case FLIPADST_DCT: @@ -215,80 +232,79 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, case FLIPADST_FLIPADST: case ADST_FLIPADST: case FLIPADST_ADST: - av1_fwd_txfm2d_4x4(src_diff, coeff, diff_stride, tx_type, bd); + // fallthrough intended + av1_fwd_txfm2d_4x4(src_diff, dst_coeff, diff_stride, tx_type, bd); break; + // use the c version for anything including identity for now case V_DCT: case H_DCT: case V_ADST: case H_ADST: case V_FLIPADST: case H_FLIPADST: - av1_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type); + case IDTX: + // fallthrough intended + av1_fwd_txfm2d_4x4_c(src_diff, dst_coeff, diff_stride, tx_type, bd); break; - case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type); break; #endif // CONFIG_EXT_TX default: assert(0); } } static void highbd_fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt, const int bd) { - (void)fwd_txfm_opt; - (void)bd; - av1_highbd_fht4x8(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { + int32_t *dst_coeff = (int32_t *)coeff; + av1_fwd_txfm2d_4x8_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, + txfm_param->bd); } static void highbd_fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt, const int bd) { - (void)fwd_txfm_opt; - (void)bd; - av1_highbd_fht8x4(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { + int32_t *dst_coeff = (int32_t *)coeff; + av1_fwd_txfm2d_8x4_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, + txfm_param->bd); } static void highbd_fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt, const int bd) { - (void)fwd_txfm_opt; - (void)bd; - av1_highbd_fht8x16(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { + int32_t *dst_coeff = (int32_t *)coeff; + av1_fwd_txfm2d_8x16_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, + txfm_param->bd); } static void highbd_fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt, const int bd) { - (void)fwd_txfm_opt; - (void)bd; - av1_highbd_fht16x8(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { + int32_t *dst_coeff = (int32_t *)coeff; + av1_fwd_txfm2d_16x8_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, + txfm_param->bd); } static void highbd_fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt, const int bd) { - (void)fwd_txfm_opt; - (void)bd; - av1_highbd_fht16x32(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { + int32_t *dst_coeff = (int32_t *)coeff; + av1_fwd_txfm2d_16x32_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, + txfm_param->bd); } static void highbd_fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt, const int bd) { - (void)fwd_txfm_opt; - (void)bd; - av1_highbd_fht32x16(src_diff, coeff, diff_stride, tx_type); + int diff_stride, TxfmParam *txfm_param) { + int32_t *dst_coeff = (int32_t *)coeff; + av1_fwd_txfm2d_32x16_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, + txfm_param->bd); } static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt, const int bd) { - (void)fwd_txfm_opt; + int diff_stride, TxfmParam *txfm_param) { + int32_t *dst_coeff = (int32_t *)coeff; + const int tx_type = txfm_param->tx_type; + const int bd = txfm_param->bd; switch (tx_type) { case DCT_DCT: case ADST_DCT: case DCT_ADST: case ADST_ADST: - av1_fwd_txfm2d_8x8(src_diff, coeff, diff_stride, tx_type, bd); + // fallthrough intended + av1_fwd_txfm2d_8x8(src_diff, dst_coeff, diff_stride, tx_type, bd); break; #if CONFIG_EXT_TX case FLIPADST_DCT: @@ -296,33 +312,37 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, case FLIPADST_FLIPADST: case ADST_FLIPADST: case FLIPADST_ADST: - av1_fwd_txfm2d_8x8(src_diff, coeff, diff_stride, tx_type, bd); + // fallthrough intended + av1_fwd_txfm2d_8x8(src_diff, dst_coeff, diff_stride, tx_type, bd); break; + // use the c version for anything including identity for now case V_DCT: case H_DCT: case V_ADST: case H_ADST: case V_FLIPADST: case H_FLIPADST: - // Use C version since DST exists only in C - av1_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type); + case IDTX: + // fallthrough intended + av1_fwd_txfm2d_8x8_c(src_diff, dst_coeff, diff_stride, tx_type, bd); break; - case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type); break; #endif // CONFIG_EXT_TX default: assert(0); } } static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt, const int bd) { - (void)fwd_txfm_opt; + int diff_stride, TxfmParam *txfm_param) { + int32_t *dst_coeff = (int32_t *)coeff; + const int tx_type = txfm_param->tx_type; + const int bd = txfm_param->bd; switch (tx_type) { case DCT_DCT: case ADST_DCT: case DCT_ADST: case ADST_ADST: - av1_fwd_txfm2d_16x16(src_diff, coeff, diff_stride, tx_type, bd); + // fallthrough intended + av1_fwd_txfm2d_16x16(src_diff, dst_coeff, diff_stride, tx_type, bd); break; #if CONFIG_EXT_TX case FLIPADST_DCT: @@ -330,63 +350,72 @@ static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, case FLIPADST_FLIPADST: case ADST_FLIPADST: case FLIPADST_ADST: - av1_fwd_txfm2d_16x16(src_diff, coeff, diff_stride, tx_type, bd); + // fallthrough intended + av1_fwd_txfm2d_16x16(src_diff, dst_coeff, diff_stride, tx_type, bd); break; + // use the c version for anything including identity for now case V_DCT: case H_DCT: case V_ADST: case H_ADST: case V_FLIPADST: case H_FLIPADST: - // Use C version since DST exists only in C - av1_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type); + case IDTX: + // fallthrough intended + av1_fwd_txfm2d_16x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd); break; - case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type); break; #endif // CONFIG_EXT_TX default: assert(0); } } static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt, const int bd) { - (void)fwd_txfm_opt; + int diff_stride, TxfmParam *txfm_param) { + int32_t *dst_coeff = (int32_t *)coeff; + const int tx_type = txfm_param->tx_type; + const int bd = txfm_param->bd; switch (tx_type) { case DCT_DCT: - av1_fwd_txfm2d_32x32(src_diff, coeff, diff_stride, tx_type, bd); - break; -#if CONFIG_EXT_TX case ADST_DCT: case DCT_ADST: case ADST_ADST: + // fallthrough intended + av1_fwd_txfm2d_32x32(src_diff, dst_coeff, diff_stride, tx_type, bd); + break; +#if CONFIG_EXT_TX case FLIPADST_DCT: case DCT_FLIPADST: case FLIPADST_FLIPADST: case ADST_FLIPADST: case FLIPADST_ADST: + // fallthrough intended + av1_fwd_txfm2d_32x32(src_diff, dst_coeff, diff_stride, tx_type, bd); + break; + // use the c version for anything including identity for now case V_DCT: case H_DCT: case V_ADST: case H_ADST: case V_FLIPADST: case H_FLIPADST: - av1_highbd_fht32x32_c(src_diff, coeff, diff_stride, tx_type); + case IDTX: + // fallthrough intended + av1_fwd_txfm2d_32x32_c(src_diff, dst_coeff, diff_stride, tx_type, bd); break; - case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type); break; #endif // CONFIG_EXT_TX - default: assert(0); break; + default: assert(0); } } #if CONFIG_TX64X64 static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - FWD_TXFM_OPT fwd_txfm_opt, const int bd) { - (void)fwd_txfm_opt; - (void)bd; + int diff_stride, TxfmParam *txfm_param) { + int32_t *dst_coeff = (int32_t *)coeff; + const int tx_type = txfm_param->tx_type; + const int bd = txfm_param->bd; switch (tx_type) { case DCT_DCT: - av1_highbd_fht64x64(src_diff, coeff, diff_stride, tx_type); + av1_fwd_txfm2d_64x64(src_diff, dst_coeff, diff_stride, tx_type, bd); break; #if CONFIG_EXT_TX case ADST_DCT: @@ -403,141 +432,119 @@ static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff, case H_ADST: case V_FLIPADST: case H_FLIPADST: - av1_highbd_fht64x64(src_diff, coeff, diff_stride, tx_type); + // TODO(sarahparker) + // I've deleted the 64x64 implementations that existed in lieu + // of adst, flipadst and identity for simplicity but will bring back + // in a later change. This shouldn't impact performance since + // DCT_DCT is the only extended type currently allowed for 64x64, + // as dictated by get_ext_tx_set_type in blockd.h. + av1_fwd_txfm2d_64x64_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd); + break; + case IDTX: + av1_fwd_idtx_c(src_diff, dst_coeff, diff_stride, 64, tx_type); break; - case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, tx_type); break; #endif // CONFIG_EXT_TX default: assert(0); break; } } #endif // CONFIG_TX64X64 -#endif // CONFIG_HIGHBITDEPTH void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, - FWD_TXFM_PARAM *fwd_txfm_param) { - const int fwd_txfm_opt = FWD_TXFM_OPT_NORMAL; - const TX_TYPE tx_type = fwd_txfm_param->tx_type; - const TX_SIZE tx_size = fwd_txfm_param->tx_size; - const int lossless = fwd_txfm_param->lossless; + TxfmParam *txfm_param) { + const TX_SIZE tx_size = txfm_param->tx_size; switch (tx_size) { #if CONFIG_TX64X64 case TX_64X64: - fwd_txfm_64x64(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); + fwd_txfm_64x64(src_diff, coeff, diff_stride, txfm_param); break; #endif // CONFIG_TX64X64 case TX_32X32: - fwd_txfm_32x32(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); + fwd_txfm_32x32(src_diff, coeff, diff_stride, txfm_param); break; case TX_16X16: - fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); - break; - case TX_8X8: - fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); - break; - case TX_4X8: - fwd_txfm_4x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); - break; - case TX_8X4: - fwd_txfm_8x4(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); + fwd_txfm_16x16(src_diff, coeff, diff_stride, txfm_param); break; + case TX_8X8: fwd_txfm_8x8(src_diff, coeff, diff_stride, txfm_param); break; + case TX_4X8: fwd_txfm_4x8(src_diff, coeff, diff_stride, txfm_param); break; + case TX_8X4: fwd_txfm_8x4(src_diff, coeff, diff_stride, txfm_param); break; case TX_8X16: - fwd_txfm_8x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); + fwd_txfm_8x16(src_diff, coeff, diff_stride, txfm_param); break; case TX_16X8: - fwd_txfm_16x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); + fwd_txfm_16x8(src_diff, coeff, diff_stride, txfm_param); break; case TX_16X32: - fwd_txfm_16x32(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); + fwd_txfm_16x32(src_diff, coeff, diff_stride, txfm_param); break; case TX_32X16: - fwd_txfm_32x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); - break; - case TX_4X4: - fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless); + fwd_txfm_32x16(src_diff, coeff, diff_stride, txfm_param); break; + case TX_4X4: fwd_txfm_4x4(src_diff, coeff, diff_stride, txfm_param); break; #if CONFIG_CHROMA_2X2 - case TX_2X2: - fwd_txfm_2x2(src_diff, coeff, diff_stride, tx_type, lossless); - break; + case TX_2X2: fwd_txfm_2x2(src_diff, coeff, diff_stride, txfm_param); break; #endif -#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX) case TX_4X16: - fwd_txfm_4x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); + fwd_txfm_4x16(src_diff, coeff, diff_stride, txfm_param); break; case TX_16X4: - fwd_txfm_16x4(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); + fwd_txfm_16x4(src_diff, coeff, diff_stride, txfm_param); break; case TX_8X32: - fwd_txfm_8x32(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); + fwd_txfm_8x32(src_diff, coeff, diff_stride, txfm_param); break; case TX_32X8: - fwd_txfm_32x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); + fwd_txfm_32x8(src_diff, coeff, diff_stride, txfm_param); break; -#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#endif default: assert(0); break; } } -#if CONFIG_HIGHBITDEPTH void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, FWD_TXFM_PARAM *fwd_txfm_param) { - const int fwd_txfm_opt = FWD_TXFM_OPT_NORMAL; - const TX_TYPE tx_type = fwd_txfm_param->tx_type; - const TX_SIZE tx_size = fwd_txfm_param->tx_size; - const int lossless = fwd_txfm_param->lossless; - const int bd = fwd_txfm_param->bd; + int diff_stride, TxfmParam *txfm_param) { + const TX_SIZE tx_size = txfm_param->tx_size; switch (tx_size) { #if CONFIG_TX64X64 case TX_64X64: - highbd_fwd_txfm_64x64(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt, - bd); + highbd_fwd_txfm_64x64(src_diff, coeff, diff_stride, txfm_param); break; #endif // CONFIG_TX64X64 case TX_32X32: - highbd_fwd_txfm_32x32(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt, - bd); + highbd_fwd_txfm_32x32(src_diff, coeff, diff_stride, txfm_param); break; case TX_16X16: - highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt, - bd); + highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, txfm_param); break; case TX_8X8: - highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt, - bd); + highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, txfm_param); break; case TX_4X8: - highbd_fwd_txfm_4x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt, - bd); + highbd_fwd_txfm_4x8(src_diff, coeff, diff_stride, txfm_param); break; case TX_8X4: - highbd_fwd_txfm_8x4(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt, - bd); + highbd_fwd_txfm_8x4(src_diff, coeff, diff_stride, txfm_param); break; case TX_8X16: - highbd_fwd_txfm_8x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt, - bd); + highbd_fwd_txfm_8x16(src_diff, coeff, diff_stride, txfm_param); break; case TX_16X8: - highbd_fwd_txfm_16x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt, - bd); + highbd_fwd_txfm_16x8(src_diff, coeff, diff_stride, txfm_param); break; case TX_16X32: - highbd_fwd_txfm_16x32(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt, - bd); + highbd_fwd_txfm_16x32(src_diff, coeff, diff_stride, txfm_param); break; case TX_32X16: - highbd_fwd_txfm_32x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt, - bd); + highbd_fwd_txfm_32x16(src_diff, coeff, diff_stride, txfm_param); break; case TX_4X4: - highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless, bd); + highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, txfm_param); break; #if CONFIG_CHROMA_2X2 case TX_2X2: - highbd_fwd_txfm_2x2(src_diff, coeff, diff_stride, tx_type, lossless, bd); + highbd_fwd_txfm_2x2(src_diff, coeff, diff_stride, txfm_param); break; #endif default: assert(0); break; } } -#endif // CONFIG_HIGHBITDEPTH diff --git a/third_party/aom/av1/encoder/hybrid_fwd_txfm.h b/third_party/aom/av1/encoder/hybrid_fwd_txfm.h index e6fd17275..b25ffb8d8 100644 --- a/third_party/aom/av1/encoder/hybrid_fwd_txfm.h +++ b/third_party/aom/av1/encoder/hybrid_fwd_txfm.h @@ -14,28 +14,15 @@ #include "./aom_config.h" -typedef enum FWD_TXFM_OPT { FWD_TXFM_OPT_NORMAL } FWD_TXFM_OPT; - -typedef struct FWD_TXFM_PARAM { - TX_TYPE tx_type; - TX_SIZE tx_size; - int lossless; -#if CONFIG_HIGHBITDEPTH - int bd; -#endif // CONFIG_HIGHBITDEPTH -} FWD_TXFM_PARAM; - #ifdef __cplusplus extern "C" { #endif void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, - FWD_TXFM_PARAM *fwd_txfm_param); + TxfmParam *txfm_param); -#if CONFIG_HIGHBITDEPTH void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, FWD_TXFM_PARAM *fwd_txfm_param); -#endif // CONFIG_HIGHBITDEPTH + int diff_stride, TxfmParam *txfm_param); #ifdef __cplusplus } // extern "C" diff --git a/third_party/aom/av1/encoder/mcomp.c b/third_party/aom/av1/encoder/mcomp.c index 52080ca0d..4efadff1b 100644 --- a/third_party/aom/av1/encoder/mcomp.c +++ b/third_party/aom/av1/encoder/mcomp.c @@ -228,49 +228,45 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { #define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c) -static INLINE const uint8_t *upre(const uint8_t *buf, int stride, int r, - int c) { - return &buf[(r)*stride + (c)]; -} - /* checks if (r, c) has better score than previous best */ #if CONFIG_EXT_INTER -#define CHECK_BETTER1(v, r, c) \ - if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ - MV this_mv = { r, c }; \ - thismse = upsampled_pref_error( \ - xd, vfp, src_address, src_stride, upre(y, y_stride, r, c), y_stride, \ - second_pred, mask, mask_stride, invert_mask, w, h, &sse); \ - v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \ - v += thismse; \ - if (v < besterr) { \ - besterr = v; \ - br = r; \ - bc = c; \ - *distortion = thismse; \ - *sse1 = sse; \ - } \ - } else { \ - v = INT_MAX; \ +#define CHECK_BETTER1(v, r, c) \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + MV this_mv = { r, c }; \ + thismse = upsampled_pref_error(xd, vfp, src_address, src_stride, \ + pre(y, y_stride, r, c), y_stride, sp(c), \ + sp(r), second_pred, mask, mask_stride, \ + invert_mask, w, h, &sse); \ + v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \ + v += thismse; \ + if (v < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ } #else -#define CHECK_BETTER1(v, r, c) \ - if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ - MV this_mv = { r, c }; \ - thismse = upsampled_pref_error(xd, vfp, src_address, src_stride, \ - upre(y, y_stride, r, c), y_stride, \ - second_pred, w, h, &sse); \ - v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \ - v += thismse; \ - if (v < besterr) { \ - besterr = v; \ - br = r; \ - bc = c; \ - *distortion = thismse; \ - *sse1 = sse; \ - } \ - } else { \ - v = INT_MAX; \ +#define CHECK_BETTER1(v, r, c) \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + MV this_mv = { r, c }; \ + thismse = upsampled_pref_error(xd, vfp, src_address, src_stride, \ + pre(y, y_stride, r, c), y_stride, sp(c), \ + sp(r), second_pred, w, h, &sse); \ + v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \ + v += thismse; \ + if (v < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ } #endif // CONFIG_EXT_INTER @@ -700,16 +696,14 @@ static const MV search_step_table[12] = { }; /* clang-format on */ -static int upsampled_pref_error(const MACROBLOCKD *xd, - const aom_variance_fn_ptr_t *vfp, - const uint8_t *const src, const int src_stride, - const uint8_t *const y, int y_stride, - const uint8_t *second_pred, +static int upsampled_pref_error( + const MACROBLOCKD *xd, const aom_variance_fn_ptr_t *vfp, + const uint8_t *const src, const int src_stride, const uint8_t *const y, + int y_stride, int subpel_x_q3, int subpel_y_q3, const uint8_t *second_pred, #if CONFIG_EXT_INTER - const uint8_t *mask, int mask_stride, - int invert_mask, + const uint8_t *mask, int mask_stride, int invert_mask, #endif - int w, int h, unsigned int *sse) { + int w, int h, unsigned int *sse) { unsigned int besterr; #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -717,15 +711,17 @@ static int upsampled_pref_error(const MACROBLOCKD *xd, if (second_pred != NULL) { #if CONFIG_EXT_INTER if (mask) - aom_highbd_comp_mask_upsampled_pred(pred16, second_pred, w, h, y, - y_stride, mask, mask_stride, - invert_mask); + aom_highbd_comp_mask_upsampled_pred( + pred16, second_pred, w, h, subpel_x_q3, subpel_y_q3, y, y_stride, + mask, mask_stride, invert_mask, xd->bd); else #endif - aom_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y, - y_stride); + aom_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, + subpel_x_q3, subpel_y_q3, y, + y_stride, xd->bd); } else { - aom_highbd_upsampled_pred(pred16, w, h, y, y_stride); + aom_highbd_upsampled_pred(pred16, w, h, subpel_x_q3, subpel_y_q3, y, + y_stride, xd->bd); } besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, sse); @@ -738,13 +734,15 @@ static int upsampled_pref_error(const MACROBLOCKD *xd, if (second_pred != NULL) { #if CONFIG_EXT_INTER if (mask) - aom_comp_mask_upsampled_pred(pred, second_pred, w, h, y, y_stride, mask, + aom_comp_mask_upsampled_pred(pred, second_pred, w, h, subpel_x_q3, + subpel_y_q3, y, y_stride, mask, mask_stride, invert_mask); else #endif - aom_comp_avg_upsampled_pred(pred, second_pred, w, h, y, y_stride); + aom_comp_avg_upsampled_pred(pred, second_pred, w, h, subpel_x_q3, + subpel_y_q3, y, y_stride); } else { - aom_upsampled_pred(pred, w, h, y, y_stride); + aom_upsampled_pred(pred, w, h, subpel_x_q3, subpel_y_q3, y, y_stride); } besterr = vfp->vf(pred, w, src, src_stride, sse); @@ -764,12 +762,12 @@ static unsigned int upsampled_setup_center_error( #endif int w, int h, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion) { - unsigned int besterr = upsampled_pref_error(xd, vfp, src, src_stride, - y + offset, y_stride, second_pred, + unsigned int besterr = upsampled_pref_error( + xd, vfp, src, src_stride, y + offset, y_stride, 0, 0, second_pred, #if CONFIG_EXT_INTER - mask, mask_stride, invert_mask, + mask, mask_stride, invert_mask, #endif - w, h, sse1); + w, h, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); return besterr; @@ -824,7 +822,7 @@ int av1_find_best_sub_pixel_tree( #if CONFIG_EXT_INTER mask, mask_stride, invert_mask, #endif - w, h, (offset * 8), mvjcost, mvcost, sse1, distortion); + w, h, offset, mvjcost, mvcost, sse1, distortion); else besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address, @@ -845,17 +843,15 @@ int av1_find_best_sub_pixel_tree( MV this_mv = { tr, tc }; if (use_upsampled_ref) { - const uint8_t *const pre_address = y + tr * y_stride + tc; - thismse = upsampled_pref_error(xd, vfp, src_address, src_stride, - pre_address, y_stride, second_pred, + pre(y, y_stride, tr, tc), y_stride, + sp(tc), sp(tr), second_pred, #if CONFIG_EXT_INTER mask, mask_stride, invert_mask, #endif w, h, &sse); } else { - const uint8_t *const pre_address = - y + (tr >> 3) * y_stride + (tc >> 3); + const uint8_t *const pre_address = pre(y, y_stride, tr, tc); if (second_pred == NULL) thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, src_stride, &sse); @@ -894,16 +890,15 @@ int av1_find_best_sub_pixel_tree( MV this_mv = { tr, tc }; if (use_upsampled_ref) { - const uint8_t *const pre_address = y + tr * y_stride + tc; - thismse = upsampled_pref_error(xd, vfp, src_address, src_stride, - pre_address, y_stride, second_pred, + pre(y, y_stride, tr, tc), y_stride, + sp(tc), sp(tr), second_pred, #if CONFIG_EXT_INTER mask, mask_stride, invert_mask, #endif w, h, &sse); } else { - const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); + const uint8_t *const pre_address = pre(y, y_stride, tr, tc); if (second_pred == NULL) thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, @@ -992,9 +987,16 @@ unsigned int av1_compute_motion_cost(const AV1_COMP *cpi, MACROBLOCK *const x, } // Refine MV in a small range +#if WARPED_MOTION_SORT_SAMPLES +unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x, + BLOCK_SIZE bsize, int mi_row, int mi_col, + int *pts0, int *pts_inref0, int *pts_mv0, + int total_samples) { +#else unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, int mi_row, int mi_col, int *pts, int *pts_inref) { +#endif // WARPED_MOTION_SORT_SAMPLES const AV1_COMMON *const cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MODE_INFO *mi = xd->mi[0]; @@ -1007,6 +1009,9 @@ unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x, int16_t *tr = &mbmi->mv[0].as_mv.row; int16_t *tc = &mbmi->mv[0].as_mv.col; WarpedMotionParams best_wm_params = mbmi->wm_params[0]; +#if WARPED_MOTION_SORT_SAMPLES + int best_num_proj_ref = mbmi->num_proj_ref[0]; +#endif // WARPED_MOTION_SORT_SAMPLES unsigned int bestmse; int minc, maxc, minr, maxr; const int start = cm->allow_high_precision_mv ? 0 : 4; @@ -1033,6 +1038,16 @@ unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x, if (*tc >= minc && *tc <= maxc && *tr >= minr && *tr <= maxr) { MV this_mv = { *tr, *tc }; +#if WARPED_MOTION_SORT_SAMPLES + int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; + + memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0)); + memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0)); + if (total_samples > 1) + mbmi->num_proj_ref[0] = + sortSamples(pts_mv0, &this_mv, pts, pts_inref, total_samples); +#endif // WARPED_MOTION_SORT_SAMPLES + if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize, *tr, *tc, &mbmi->wm_params[0], mi_row, mi_col)) { thismse = @@ -1041,6 +1056,9 @@ unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x, if (thismse < bestmse) { best_idx = idx; best_wm_params = mbmi->wm_params[0]; +#if WARPED_MOTION_SORT_SAMPLES + best_num_proj_ref = mbmi->num_proj_ref[0]; +#endif // WARPED_MOTION_SORT_SAMPLES bestmse = thismse; } } @@ -1058,7 +1076,9 @@ unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x, *tr = br; *tc = bc; mbmi->wm_params[0] = best_wm_params; - +#if WARPED_MOTION_SORT_SAMPLES + mbmi->num_proj_ref[0] = best_num_proj_ref; +#endif // WARPED_MOTION_SORT_SAMPLES return bestmse; } #endif // CONFIG_WARPED_MOTION @@ -2653,19 +2673,20 @@ int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, #define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c) #undef CHECK_BETTER1 -#define CHECK_BETTER1(v, r, c) \ - if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ - thismse = upsampled_obmc_pref_error( \ - xd, mask, vfp, z, upre(y, y_stride, r, c), y_stride, w, h, &sse); \ - if ((v = MVC(r, c) + thismse) < besterr) { \ - besterr = v; \ - br = r; \ - bc = c; \ - *distortion = thismse; \ - *sse1 = sse; \ - } \ - } else { \ - v = INT_MAX; \ +#define CHECK_BETTER1(v, r, c) \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + thismse = \ + upsampled_obmc_pref_error(xd, mask, vfp, z, pre(y, y_stride, r, c), \ + y_stride, sp(c), sp(r), w, h, &sse); \ + if ((v = MVC(r, c) + thismse) < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ } static unsigned int setup_obmc_center_error( @@ -2684,12 +2705,14 @@ static int upsampled_obmc_pref_error(const MACROBLOCKD *xd, const int32_t *mask, const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc, const uint8_t *const y, int y_stride, - int w, int h, unsigned int *sse) { + int subpel_x_q3, int subpel_y_q3, int w, + int h, unsigned int *sse) { unsigned int besterr; #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]); - aom_highbd_upsampled_pred(pred16, w, h, y, y_stride); + aom_highbd_upsampled_pred(pred16, w, h, subpel_x_q3, subpel_y_q3, y, + y_stride, xd->bd); besterr = vfp->ovf(CONVERT_TO_BYTEPTR(pred16), w, wsrc, mask, sse); } else { @@ -2698,7 +2721,7 @@ static int upsampled_obmc_pref_error(const MACROBLOCKD *xd, const int32_t *mask, DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); (void)xd; #endif // CONFIG_HIGHBITDEPTH - aom_upsampled_pred(pred, w, h, y, y_stride); + aom_upsampled_pred(pred, w, h, subpel_x_q3, subpel_y_q3, y, y_stride); besterr = vfp->ovf(pred, w, wsrc, mask, sse); #if CONFIG_HIGHBITDEPTH @@ -2714,18 +2737,17 @@ static unsigned int upsampled_setup_obmc_center_error( int h, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion) { unsigned int besterr = upsampled_obmc_pref_error( - xd, mask, vfp, wsrc, y + offset, y_stride, w, h, sse1); + xd, mask, vfp, wsrc, y + offset, y_stride, 0, 0, w, h, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); return besterr; } int av1_find_best_obmc_sub_pixel_tree_up( - const AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, MV *bestmv, - const MV *ref_mv, int allow_hp, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, - int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1, - int is_second, int use_upsampled_ref) { + MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, + int error_per_bit, const aom_variance_fn_ptr_t *vfp, int forced_stop, + int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion, + unsigned int *sse1, int is_second, int use_upsampled_ref) { const int32_t *wsrc = x->wsrc_buf; const int32_t *mask = x->mask_buf; const int *const z = wsrc; @@ -2756,21 +2778,11 @@ int av1_find_best_obmc_sub_pixel_tree_up( int y_stride; const uint8_t *y; - const struct buf_2d backup_pred = pd->pre[is_second]; int minc, maxc, minr, maxr; av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, ref_mv); - if (use_upsampled_ref) { - int ref = xd->mi[0]->mbmi.ref_frame[is_second]; - const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref); - setup_pred_plane(&pd->pre[is_second], mbmi->sb_type, - upsampled_ref->y_buffer, upsampled_ref->y_crop_width, - upsampled_ref->y_crop_height, upsampled_ref->y_stride, - (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x, - pd->subsampling_y); - } y = pd->pre[is_second].buf; y_stride = pd->pre[is_second].stride; offset = bestmv->row * y_stride + bestmv->col; @@ -2784,7 +2796,7 @@ int av1_find_best_obmc_sub_pixel_tree_up( if (use_upsampled_ref) besterr = upsampled_setup_obmc_center_error( xd, mask, bestmv, ref_mv, error_per_bit, vfp, z, y, y_stride, w, h, - (offset * 8), mvjcost, mvcost, sse1, distortion); + offset, mvjcost, mvcost, sse1, distortion); else besterr = setup_obmc_center_error(mask, bestmv, ref_mv, error_per_bit, vfp, z, y, y_stride, offset, mvjcost, mvcost, @@ -2797,15 +2809,13 @@ int av1_find_best_obmc_sub_pixel_tree_up( tc = bc + search_step[idx].col; if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { MV this_mv = { tr, tc }; + const uint8_t *const pre_address = pre(y, y_stride, tr, tc); if (use_upsampled_ref) { - const uint8_t *const pre_address = y + tr * y_stride + tc; - - thismse = upsampled_obmc_pref_error( - xd, mask, vfp, src_address, pre_address, y_stride, w, h, &sse); + thismse = + upsampled_obmc_pref_error(xd, mask, vfp, src_address, pre_address, + y_stride, sp(tc), sp(tr), w, h, &sse); } else { - const uint8_t *const pre_address = - y + (tr >> 3) * y_stride + (tc >> 3); thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr), src_address, mask, &sse); } @@ -2833,15 +2843,12 @@ int av1_find_best_obmc_sub_pixel_tree_up( MV this_mv = { tr, tc }; if (use_upsampled_ref) { - const uint8_t *const pre_address = y + tr * y_stride + tc; - thismse = upsampled_obmc_pref_error(xd, mask, vfp, src_address, - pre_address, y_stride, w, h, &sse); + pre(y, y_stride, tr, tc), y_stride, + sp(tc), sp(tr), w, h, &sse); } else { - const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); - - thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr), src_address, - mask, &sse); + thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), + src_address, mask, &sse); } cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, @@ -2889,10 +2896,6 @@ int av1_find_best_obmc_sub_pixel_tree_up( bestmv->row = br; bestmv->col = bc; - if (use_upsampled_ref) { - pd->pre[is_second] = backup_pred; - } - return besterr; } diff --git a/third_party/aom/av1/encoder/mcomp.h b/third_party/aom/av1/encoder/mcomp.h index 7e8b4b29d..733e415ce 100644 --- a/third_party/aom/av1/encoder/mcomp.h +++ b/third_party/aom/av1/encoder/mcomp.h @@ -143,11 +143,10 @@ int av1_obmc_full_pixel_diamond(const struct AV1_COMP *cpi, MACROBLOCK *x, const aom_variance_fn_ptr_t *fn_ptr, const MV *ref_mv, MV *dst_mv, int is_second); int av1_find_best_obmc_sub_pixel_tree_up( - const struct AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, - MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, - int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1, - int is_second, int use_upsampled_ref); + MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, + int error_per_bit, const aom_variance_fn_ptr_t *vfp, int forced_stop, + int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion, + unsigned int *sse1, int is_second, int use_upsampled_ref); #endif // CONFIG_MOTION_VAR #ifdef __cplusplus } // extern "C" @@ -157,10 +156,18 @@ int av1_find_best_obmc_sub_pixel_tree_up( unsigned int av1_compute_motion_cost(const struct AV1_COMP *cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, int mi_row, int mi_col, const MV *this_mv); +#if WARPED_MOTION_SORT_SAMPLES +unsigned int av1_refine_warped_mv(const struct AV1_COMP *cpi, + MACROBLOCK *const x, BLOCK_SIZE bsize, + int mi_row, int mi_col, int *pts0, + int *pts_inref0, int *pts_mv0, + int total_samples); +#else unsigned int av1_refine_warped_mv(const struct AV1_COMP *cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, int mi_row, int mi_col, int *pts, int *pts_inref); +#endif // WARPED_MOTION_SORT_SAMPLES #endif // CONFIG_WARPED_MOTION #endif // AV1_ENCODER_MCOMP_H_ diff --git a/third_party/aom/av1/encoder/palette.c b/third_party/aom/av1/encoder/palette.c index 235964dde..bac06cd17 100644 --- a/third_party/aom/av1/encoder/palette.c +++ b/third_party/aom/av1/encoder/palette.c @@ -145,27 +145,6 @@ int av1_remove_duplicates(float *centroids, int num_centroids) { return num_unique; } -int av1_count_colors(const uint8_t *src, int stride, int rows, int cols) { - int n = 0, r, c, i, val_count[256]; - uint8_t val; - memset(val_count, 0, sizeof(val_count)); - - for (r = 0; r < rows; ++r) { - for (c = 0; c < cols; ++c) { - val = src[r * stride + c]; - ++val_count[val]; - } - } - - for (i = 0; i < 256; ++i) { - if (val_count[i]) { - ++n; - } - } - - return n; -} - #if CONFIG_PALETTE_DELTA_ENCODING static int delta_encode_cost(const int *colors, int num, int bit_depth, int min_val) { @@ -291,30 +270,3 @@ int av1_palette_color_cost_uv(const PALETTE_MODE_INFO *const pmi, return 2 * bit_depth * n * av1_cost_bit(128, 0); #endif // CONFIG_PALETTE_DELTA_ENCODING } - -#if CONFIG_HIGHBITDEPTH -int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols, - int bit_depth) { - int n = 0, r, c, i; - uint16_t val; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - int val_count[1 << 12]; - - assert(bit_depth <= 12); - memset(val_count, 0, (1 << 12) * sizeof(val_count[0])); - for (r = 0; r < rows; ++r) { - for (c = 0; c < cols; ++c) { - val = src[r * stride + c]; - ++val_count[val]; - } - } - - for (i = 0; i < (1 << bit_depth); ++i) { - if (val_count[i]) { - ++n; - } - } - - return n; -} -#endif // CONFIG_HIGHBITDEPTH diff --git a/third_party/aom/av1/encoder/palette.h b/third_party/aom/av1/encoder/palette.h index f5a3c1bdd..8afe5a782 100644 --- a/third_party/aom/av1/encoder/palette.h +++ b/third_party/aom/av1/encoder/palette.h @@ -36,14 +36,6 @@ void av1_k_means(const float *data, float *centroids, uint8_t *indices, int n, // method. int av1_remove_duplicates(float *centroids, int num_centroids); -// Returns the number of colors in 'src'. -int av1_count_colors(const uint8_t *src, int stride, int rows, int cols); -#if CONFIG_HIGHBITDEPTH -// Same as av1_count_colors(), but for high-bitdepth mode. -int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols, - int bit_depth); -#endif // CONFIG_HIGHBITDEPTH - #if CONFIG_PALETTE_DELTA_ENCODING // Given a color cache and a set of base colors, find if each cache color is // present in the base colors, record the binary results in "cache_color_found". diff --git a/third_party/aom/av1/encoder/pickcdef.c b/third_party/aom/av1/encoder/pickcdef.c index da64fb48d..e4ec38826 100644 --- a/third_party/aom/av1/encoder/pickcdef.c +++ b/third_party/aom/av1/encoder/pickcdef.c @@ -19,13 +19,19 @@ #include "av1/common/reconinter.h" #include "av1/encoder/encoder.h" +#define REDUCED_STRENGTHS 8 +#define REDUCED_TOTAL_STRENGTHS (REDUCED_STRENGTHS * CLPF_STRENGTHS) #define TOTAL_STRENGTHS (DERING_STRENGTHS * CLPF_STRENGTHS) +static int priconv[REDUCED_STRENGTHS] = { 0, 1, 2, 3, 4, 7, 12, 25 }; + /* Search for the best strength to add as an option, knowing we already selected nb_strengths options. */ static uint64_t search_one(int *lev, int nb_strengths, - uint64_t mse[][TOTAL_STRENGTHS], int sb_count) { + uint64_t mse[][TOTAL_STRENGTHS], int sb_count, + int fast) { uint64_t tot_mse[TOTAL_STRENGTHS]; + const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS; int i, j; uint64_t best_tot_mse = (uint64_t)1 << 63; int best_id = 0; @@ -40,13 +46,13 @@ static uint64_t search_one(int *lev, int nb_strengths, } } /* Find best mse when adding each possible new option. */ - for (j = 0; j < TOTAL_STRENGTHS; j++) { + for (j = 0; j < total_strengths; j++) { uint64_t best = best_mse; if (mse[i][j] < best) best = mse[i][j]; tot_mse[j] += best; } } - for (j = 0; j < TOTAL_STRENGTHS; j++) { + for (j = 0; j < total_strengths; j++) { if (tot_mse[j] < best_tot_mse) { best_tot_mse = tot_mse[j]; best_id = j; @@ -59,9 +65,10 @@ static uint64_t search_one(int *lev, int nb_strengths, /* Search for the best luma+chroma strength to add as an option, knowing we already selected nb_strengths options. */ static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths, - uint64_t (**mse)[TOTAL_STRENGTHS], - int sb_count) { + uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count, + int fast) { uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS]; + const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS; int i, j; uint64_t best_tot_mse = (uint64_t)1 << 63; int best_id0 = 0; @@ -79,9 +86,9 @@ static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths, } } /* Find best mse when adding each possible new option. */ - for (j = 0; j < TOTAL_STRENGTHS; j++) { + for (j = 0; j < total_strengths; j++) { int k; - for (k = 0; k < TOTAL_STRENGTHS; k++) { + for (k = 0; k < total_strengths; k++) { uint64_t best = best_mse; uint64_t curr = mse[0][i][j]; curr += mse[1][i][k]; @@ -90,9 +97,9 @@ static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths, } } } - for (j = 0; j < TOTAL_STRENGTHS; j++) { + for (j = 0; j < total_strengths; j++) { int k; - for (k = 0; k < TOTAL_STRENGTHS; k++) { + for (k = 0; k < total_strengths; k++) { if (tot_mse[j][k] < best_tot_mse) { best_tot_mse = tot_mse[j][k]; best_id0 = j; @@ -108,20 +115,23 @@ static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths, /* Search for the set of strengths that minimizes mse. */ static uint64_t joint_strength_search(int *best_lev, int nb_strengths, uint64_t mse[][TOTAL_STRENGTHS], - int sb_count) { + int sb_count, int fast) { uint64_t best_tot_mse; int i; best_tot_mse = (uint64_t)1 << 63; /* Greedy search: add one strength options at a time. */ for (i = 0; i < nb_strengths; i++) { - best_tot_mse = search_one(best_lev, i, mse, sb_count); + best_tot_mse = search_one(best_lev, i, mse, sb_count, fast); } /* Trying to refine the greedy search by reconsidering each already-selected option. */ - for (i = 0; i < 4 * nb_strengths; i++) { - int j; - for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1]; - best_tot_mse = search_one(best_lev, nb_strengths - 1, mse, sb_count); + if (!fast) { + for (i = 0; i < 4 * nb_strengths; i++) { + int j; + for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1]; + best_tot_mse = + search_one(best_lev, nb_strengths - 1, mse, sb_count, fast); + } } return best_tot_mse; } @@ -130,13 +140,14 @@ static uint64_t joint_strength_search(int *best_lev, int nb_strengths, static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1, int nb_strengths, uint64_t (**mse)[TOTAL_STRENGTHS], - int sb_count) { + int sb_count, int fast) { uint64_t best_tot_mse; int i; best_tot_mse = (uint64_t)1 << 63; /* Greedy search: add one strength options at a time. */ for (i = 0; i < nb_strengths; i++) { - best_tot_mse = search_one_dual(best_lev0, best_lev1, i, mse, sb_count); + best_tot_mse = + search_one_dual(best_lev0, best_lev1, i, mse, sb_count, fast); } /* Trying to refine the greedy search by reconsidering each already-selected option. */ @@ -146,8 +157,8 @@ static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1, best_lev0[j] = best_lev0[j + 1]; best_lev1[j] = best_lev1[j + 1]; } - best_tot_mse = - search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse, sb_count); + best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse, + sb_count, fast); } return best_tot_mse; } @@ -269,12 +280,12 @@ uint64_t compute_dering_dist(uint16_t *dst, int dstride, uint16_t *src, } void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, - AV1_COMMON *cm, MACROBLOCKD *xd) { + AV1_COMMON *cm, MACROBLOCKD *xd, int fast) { int r, c; int sbr, sbc; uint16_t *src[3]; uint16_t *ref_coeff[3]; - dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE]; + dering_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64]; int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } }; int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } }; int stride[3]; @@ -289,8 +300,8 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, uint64_t best_tot_mse = (uint64_t)1 << 63; uint64_t tot_mse; int sb_count; - int nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE; - int nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE; + int nvsb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; + int nhsb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; int *sb_index = aom_malloc(nvsb * nhsb * sizeof(*sb_index)); int *selected_strength = aom_malloc(nvsb * nhsb * sizeof(*sb_index)); uint64_t(*mse[2])[TOTAL_STRENGTHS]; @@ -302,6 +313,7 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, int quantizer; double lambda; int nplanes = 3; + const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS; DECLARE_ALIGNED(32, uint16_t, inbuf[OD_DERING_INBUF_SIZE]); uint16_t *in; DECLARE_ALIGNED(32, uint16_t, tmp_dst[MAX_SB_SQUARE]); @@ -375,22 +387,23 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, int nvb, nhb; int gi; int dirinit = 0; - nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc); - nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr); - cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride + - MAX_MIB_SIZE * sbc] + nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * sbc); + nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * sbr); + cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride + + MI_SIZE_64X64 * sbc] ->mbmi.cdef_strength = -1; - if (sb_all_skip(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE)) continue; - dering_count = sb_compute_dering_list(cm, sbr * MAX_MIB_SIZE, - sbc * MAX_MIB_SIZE, dlist, 1); + if (sb_all_skip(cm, sbr * MI_SIZE_64X64, sbc * MI_SIZE_64X64)) continue; + dering_count = sb_compute_dering_list(cm, sbr * MI_SIZE_64X64, + sbc * MI_SIZE_64X64, dlist, 1); for (pli = 0; pli < nplanes; pli++) { for (i = 0; i < OD_DERING_INBUF_SIZE; i++) inbuf[i] = OD_DERING_VERY_LARGE; - for (gi = 0; gi < TOTAL_STRENGTHS; gi++) { + for (gi = 0; gi < total_strengths; gi++) { int threshold; uint64_t curr_mse; int clpf_strength; threshold = gi / CLPF_STRENGTHS; + if (fast) threshold = priconv[threshold]; if (pli > 0 && !chroma_dering) threshold = 0; /* We avoid filtering the pixels for which some of the pixels to average @@ -406,8 +419,8 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, if (clpf_strength == 0) copy_sb16_16(&in[(-yoff * OD_FILT_BSTRIDE - xoff)], OD_FILT_BSTRIDE, src[pli], - (sbr * MAX_MIB_SIZE << mi_high_l2[pli]) - yoff, - (sbc * MAX_MIB_SIZE << mi_wide_l2[pli]) - xoff, + (sbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff, + (sbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff, stride[pli], ysize, xsize); od_dering(clpf_strength ? NULL : (uint8_t *)in, OD_FILT_BSTRIDE, tmp_dst, in, xdec[pli], ydec[pli], dir, &dirinit, var, pli, @@ -416,8 +429,8 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, dering_damping, coeff_shift, clpf_strength != 0, 1); curr_mse = compute_dering_dist( ref_coeff[pli] + - (sbr * MAX_MIB_SIZE << mi_high_l2[pli]) * stride[pli] + - (sbc * MAX_MIB_SIZE << mi_wide_l2[pli]), + (sbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] + + (sbc * MI_SIZE_64X64 << mi_wide_l2[pli]), stride[pli], tmp_dst, dlist, dering_count, bsize[pli], coeff_shift, pli); if (pli < 2) @@ -425,7 +438,7 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, else mse[1][sb_count][gi] += curr_mse; sb_index[sb_count] = - MAX_MIB_SIZE * sbr * cm->mi_stride + MAX_MIB_SIZE * sbc; + MI_SIZE_64X64 * sbr * cm->mi_stride + MI_SIZE_64X64 * sbc; } } sb_count++; @@ -440,10 +453,10 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, nb_strengths = 1 << i; if (nplanes >= 3) tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths, - mse, sb_count); + mse, sb_count, fast); else - tot_mse = - joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count); + tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count, + fast); /* Count superblock signalling cost. */ tot_mse += (uint64_t)(sb_count * lambda * i); /* Count header signalling cost. */ @@ -477,6 +490,17 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, selected_strength[i] = best_gi; cm->mi_grid_visible[sb_index[i]]->mbmi.cdef_strength = best_gi; } + + if (fast) { + for (int j = 0; j < nb_strengths; j++) { + cm->cdef_strengths[j] = + priconv[cm->cdef_strengths[j] / CLPF_STRENGTHS] * CLPF_STRENGTHS + + (cm->cdef_strengths[j] % CLPF_STRENGTHS); + cm->cdef_uv_strengths[j] = + priconv[cm->cdef_uv_strengths[j] / CLPF_STRENGTHS] * CLPF_STRENGTHS + + (cm->cdef_uv_strengths[j] % CLPF_STRENGTHS); + } + } cm->cdef_dering_damping = dering_damping; cm->cdef_clpf_damping = clpf_damping; aom_free(mse[0]); diff --git a/third_party/aom/av1/encoder/picklpf.c b/third_party/aom/av1/encoder/picklpf.c index fc0ea485d..26fd55ef0 100644 --- a/third_party/aom/av1/encoder/picklpf.c +++ b/third_party/aom/av1/encoder/picklpf.c @@ -38,13 +38,23 @@ int av1_get_max_filter_level(const AV1_COMP *cpi) { static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd, AV1_COMP *const cpi, int filt_level, - int partial_frame) { + int partial_frame +#if CONFIG_UV_LVL + , + int plane +#endif + ) { AV1_COMMON *const cm = &cpi->common; int64_t filt_err; #if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_CB4X4 +#if CONFIG_UV_LVL + av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, + plane, partial_frame); +#else av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, 1, partial_frame); +#endif // CONFIG_UV_LVL #else if (cpi->num_workers > 1) av1_loop_filter_frame_mt(cm->frame_to_show, cm, cpi->td.mb.e_mbd.plane, @@ -55,6 +65,40 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd, 1, partial_frame); #endif +#if CONFIG_UV_LVL +#if CONFIG_HIGHBITDEPTH + if (cm->use_highbitdepth) { + if (plane == 0) + filt_err = aom_highbd_get_y_sse(sd, cm->frame_to_show); + else if (plane == 1) + filt_err = aom_highbd_get_u_sse(sd, cm->frame_to_show); + else + filt_err = aom_highbd_get_v_sse(sd, cm->frame_to_show); + } else { + if (plane == 0) + filt_err = aom_get_y_sse(sd, cm->frame_to_show); + else if (plane == 1) + filt_err = aom_get_u_sse(sd, cm->frame_to_show); + else + filt_err = aom_get_v_sse(sd, cm->frame_to_show); + } +#else + if (plane == 0) + filt_err = aom_get_y_sse(sd, cm->frame_to_show); + else if (plane == 1) + filt_err = aom_get_u_sse(sd, cm->frame_to_show); + else + filt_err = aom_get_v_sse(sd, cm->frame_to_show); +#endif // CONFIG_HIGHBITDEPTH + + // Re-instate the unfiltered frame + if (plane == 0) + aom_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); + else if (plane == 1) + aom_yv12_copy_u(&cpi->last_frame_uf, cm->frame_to_show); + else + aom_yv12_copy_v(&cpi->last_frame_uf, cm->frame_to_show); +#else #if CONFIG_HIGHBITDEPTH if (cm->use_highbitdepth) { filt_err = aom_highbd_get_y_sse(sd, cm->frame_to_show); @@ -67,12 +111,18 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd, // Re-instate the unfiltered frame aom_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); +#endif // CONFIG_UV_LVL return filt_err; } int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi, - int partial_frame, double *best_cost_ret) { + int partial_frame, double *best_cost_ret +#if CONFIG_UV_LVL + , + int plane +#endif + ) { const AV1_COMMON *const cm = &cpi->common; const struct loopfilter *const lf = &cm->lf; const int min_filter_level = 0; @@ -82,9 +132,20 @@ int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi, int filt_best; MACROBLOCK *x = &cpi->td.mb; - // Start the search at the previous frame filter level unless it is now out of - // range. +// Start the search at the previous frame filter level unless it is now out of +// range. +#if CONFIG_UV_LVL + int lvl; + switch (plane) { + case 0: lvl = lf->filter_level; break; + case 1: lvl = lf->filter_level_u; break; + case 2: lvl = lf->filter_level_v; break; + default: lvl = lf->filter_level; break; + } + int filt_mid = clamp(lvl, min_filter_level, max_filter_level); +#else int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level); +#endif // CONFIG_UV_LVL int filter_step = filt_mid < 16 ? 4 : filt_mid / 4; // Sum squared error at each filter level int64_t ss_err[MAX_LOOP_FILTER + 1]; @@ -92,10 +153,23 @@ int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi, // Set each entry to -1 memset(ss_err, 0xFF, sizeof(ss_err)); +#if CONFIG_UV_LVL + if (plane == 0) + aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf); + else if (plane == 1) + aom_yv12_copy_u(cm->frame_to_show, &cpi->last_frame_uf); + else if (plane == 2) + aom_yv12_copy_v(cm->frame_to_show, &cpi->last_frame_uf); +#else // Make a copy of the unfiltered / processed recon buffer aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf); +#endif // CONFIG_UV_LVL +#if CONFIG_UV_LVL + best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame, plane); +#else best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame); +#endif // CONFIG_UV_LVL filt_best = filt_mid; ss_err[filt_mid] = best_err; @@ -115,7 +189,12 @@ int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi, if (filt_direction <= 0 && filt_low != filt_mid) { // Get Low filter error score if (ss_err[filt_low] < 0) { +#if CONFIG_UV_LVL + ss_err[filt_low] = + try_filter_frame(sd, cpi, filt_low, partial_frame, plane); +#else ss_err[filt_low] = try_filter_frame(sd, cpi, filt_low, partial_frame); +#endif // CONFIG_UV_LVL } // If value is close to the best so far then bias towards a lower loop // filter value. @@ -131,7 +210,12 @@ int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi, // Now look at filt_high if (filt_direction >= 0 && filt_high != filt_mid) { if (ss_err[filt_high] < 0) { +#if CONFIG_UV_LVL + ss_err[filt_high] = + try_filter_frame(sd, cpi, filt_high, partial_frame, plane); +#else ss_err[filt_high] = try_filter_frame(sd, cpi, filt_high, partial_frame); +#endif // CONFIG_UV_LVL } // If value is significantly better than previous best, bias added against // raising filter value @@ -154,8 +238,7 @@ int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi, // Update best error best_err = ss_err[filt_best]; - if (best_cost_ret) - *best_cost_ret = RDCOST_DBL(x->rdmult, x->rddiv, 0, best_err); + if (best_cost_ret) *best_cost_ret = RDCOST_DBL(x->rdmult, 0, best_err); return filt_best; } @@ -198,14 +281,16 @@ void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi, if (cm->frame_type == KEY_FRAME) filt_guess -= 4; lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level); } else { +#if CONFIG_UV_LVL + lf->filter_level = av1_search_filter_level( + sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 0); + lf->filter_level_u = av1_search_filter_level( + sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 1); + lf->filter_level_v = av1_search_filter_level( + sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 2); +#else lf->filter_level = av1_search_filter_level( sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL); +#endif // CONFIG_UV_LVL } - -#if CONFIG_EXT_TILE - // TODO(any): 0 loopfilter level is only necessary if individual tile - // decoding is required. We need to communicate this requirement to this - // code and force loop filter level 0 only if required. - if (cm->tile_encoding_mode) lf->filter_level = 0; -#endif // CONFIG_EXT_TILE } diff --git a/third_party/aom/av1/encoder/picklpf.h b/third_party/aom/av1/encoder/picklpf.h index 3c0a83462..bd248d114 100644 --- a/third_party/aom/av1/encoder/picklpf.h +++ b/third_party/aom/av1/encoder/picklpf.h @@ -21,8 +21,13 @@ extern "C" { struct yv12_buffer_config; struct AV1_COMP; int av1_get_max_filter_level(const AV1_COMP *cpi); +#if CONFIG_UV_LVL +int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi, + int partial_frame, double *err, int plane); +#else int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi, int partial_frame, double *err); +#endif void av1_pick_filter_level(const struct yv12_buffer_config *sd, struct AV1_COMP *cpi, LPF_PICK_METHOD method); #ifdef __cplusplus diff --git a/third_party/aom/av1/encoder/pickrst.c b/third_party/aom/av1/encoder/pickrst.c index 4a446d24e..fec68377a 100644 --- a/third_party/aom/av1/encoder/pickrst.c +++ b/third_party/aom/av1/encoder/pickrst.c @@ -437,8 +437,8 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, int width, height, src_stride, dgd_stride; uint8_t *dgd_buffer, *src_buffer; if (plane == AOM_PLANE_Y) { - width = cm->width; - height = cm->height; + width = src->y_crop_width; + height = src->y_crop_height; src_buffer = src->y_buffer; src_stride = src->y_stride; dgd_buffer = dgd->y_buffer; @@ -478,7 +478,7 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, (1 << plane)); // #bits when a tile is not restored bits = av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 0); - cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); + cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err); best_tile_cost[tile_idx] = DBL_MAX; search_selfguided_restoration( dgd_buffer + v_start * dgd_stride + h_start, h_end - h_start, @@ -498,7 +498,7 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, &ref_sgrproj_info) << AV1_PROB_COST_SHIFT; bits += av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 1); - cost_sgrproj = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); + cost_sgrproj = RDCOST_DBL(x->rdmult, (bits >> 4), err); if (cost_sgrproj >= cost_norestore) { type[tile_idx] = RESTORE_NONE; } else { @@ -531,7 +531,7 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, } err = try_restoration_frame(src, cpi, rsi, (1 << plane), partial_frame, dst_frame); - cost_sgrproj = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); + cost_sgrproj = RDCOST_DBL(x->rdmult, (bits >> 4), err); return cost_sgrproj; } @@ -985,8 +985,8 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, int width, height, src_stride, dgd_stride; uint8_t *dgd_buffer, *src_buffer; if (plane == AOM_PLANE_Y) { - width = cm->width; - height = cm->height; + width = src->y_crop_width; + height = src->y_crop_height; src_buffer = src->y_buffer; src_stride = src->y_stride; dgd_buffer = dgd->y_buffer; @@ -1039,7 +1039,7 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, (1 << plane)); // #bits when a tile is not restored bits = av1_cost_bit(RESTORE_NONE_WIENER_PROB, 0); - cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); + cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err); best_tile_cost[tile_idx] = DBL_MAX; av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width, @@ -1081,7 +1081,7 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info) << AV1_PROB_COST_SHIFT; bits += av1_cost_bit(RESTORE_NONE_WIENER_PROB, 1); - cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); + cost_wiener = RDCOST_DBL(x->rdmult, (bits >> 4), err); if (cost_wiener >= cost_norestore) { type[tile_idx] = RESTORE_NONE; } else { @@ -1114,7 +1114,7 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, } err = try_restoration_frame(src, cpi, rsi, 1 << plane, partial_frame, dst_frame); - cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); + cost_wiener = RDCOST_DBL(x->rdmult, (bits >> 4), err); return cost_wiener; } @@ -1133,8 +1133,8 @@ static double search_norestore(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, int h_start, h_end, v_start, v_end; int width, height; if (plane == AOM_PLANE_Y) { - width = cm->width; - height = cm->height; + width = src->y_crop_width; + height = src->y_crop_height; } else { width = src->uv_crop_width; height = src->uv_crop_height; @@ -1160,13 +1160,14 @@ static double search_norestore(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, // RD cost associated with no restoration err = sse_restoration_frame(cm, src, cm->frame_to_show, (1 << plane)); bits = frame_level_restore_bits[RESTORE_NONE] << AV1_PROB_COST_SHIFT; - cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); + cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err); return cost_norestore; } static double search_switchable_restoration( - AV1_COMP *cpi, int partial_frame, int plane, RestorationInfo *rsi, - double *tile_cost[RESTORE_SWITCHABLE_TYPES]) { + const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, int partial_frame, int plane, + RestorationType *const restore_types[RESTORE_SWITCHABLE_TYPES], + double *const tile_cost[RESTORE_SWITCHABLE_TYPES], RestorationInfo *rsi) { AV1_COMMON *const cm = &cpi->common; MACROBLOCK *x = &cpi->td.mb; double cost_switchable = 0; @@ -1174,11 +1175,11 @@ static double search_switchable_restoration( RestorationType r; int width, height; if (plane == AOM_PLANE_Y) { - width = cm->width; - height = cm->height; + width = src->y_crop_width; + height = src->y_crop_height; } else { - width = ROUND_POWER_OF_TWO(cm->width, cm->subsampling_x); - height = ROUND_POWER_OF_TWO(cm->height, cm->subsampling_y); + width = src->uv_crop_width; + height = src->uv_crop_height; } const int ntiles = av1_get_rest_ntiles( width, height, cm->rst_info[plane].restoration_tilesize, NULL, NULL, NULL, @@ -1192,16 +1193,17 @@ static double search_switchable_restoration( rsi->frame_restoration_type = RESTORE_SWITCHABLE; bits = frame_level_restore_bits[rsi->frame_restoration_type] << AV1_PROB_COST_SHIFT; - cost_switchable = RDCOST_DBL(x->rdmult, x->rddiv, bits >> 4, 0); + cost_switchable = RDCOST_DBL(x->rdmult, bits >> 4, 0); for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - double best_cost = RDCOST_DBL( - x->rdmult, x->rddiv, (cpi->switchable_restore_cost[RESTORE_NONE] >> 4), - tile_cost[RESTORE_NONE][tile_idx]); + double best_cost = + RDCOST_DBL(x->rdmult, (cpi->switchable_restore_cost[RESTORE_NONE] >> 4), + tile_cost[RESTORE_NONE][tile_idx]); rsi->restoration_type[tile_idx] = RESTORE_NONE; for (r = 1; r < RESTORE_SWITCHABLE_TYPES; r++) { if (force_restore_type != 0) if (r != force_restore_type) continue; int tilebits = 0; + if (restore_types[r][tile_idx] != r) continue; if (r == RESTORE_WIENER) tilebits += count_wiener_bits(&rsi->wiener_info[tile_idx], &ref_wiener_info); @@ -1210,8 +1212,8 @@ static double search_switchable_restoration( count_sgrproj_bits(&rsi->sgrproj_info[tile_idx], &ref_sgrproj_info); tilebits <<= AV1_PROB_COST_SHIFT; tilebits += cpi->switchable_restore_cost[r]; - double cost = RDCOST_DBL(x->rdmult, x->rddiv, tilebits >> 4, - tile_cost[r][tile_idx]); + double cost = + RDCOST_DBL(x->rdmult, tilebits >> 4, tile_cost[r][tile_idx]); if (cost < best_cost) { rsi->restoration_type[tile_idx] = r; @@ -1243,14 +1245,17 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, RestorationType *restore_types[RESTORE_SWITCHABLE_TYPES]; double best_cost_restore; RestorationType r, best_restore; - - const int ntiles_y = av1_get_rest_ntiles(cm->width, cm->height, - cm->rst_info[0].restoration_tilesize, - NULL, NULL, NULL, NULL); + const int ywidth = src->y_crop_width; + const int yheight = src->y_crop_height; + const int uvwidth = src->uv_crop_width; + const int uvheight = src->uv_crop_height; + + const int ntiles_y = + av1_get_rest_ntiles(ywidth, yheight, cm->rst_info[0].restoration_tilesize, + NULL, NULL, NULL, NULL); const int ntiles_uv = av1_get_rest_ntiles( - ROUND_POWER_OF_TWO(cm->width, cm->subsampling_x), - ROUND_POWER_OF_TWO(cm->height, cm->subsampling_y), - cm->rst_info[1].restoration_tilesize, NULL, NULL, NULL, NULL); + uvwidth, uvheight, cm->rst_info[1].restoration_tilesize, NULL, NULL, NULL, + NULL); // Assume ntiles_uv is never larger that ntiles_y and so the same arrays work. for (r = 0; r < RESTORE_SWITCHABLE_TYPES; r++) { @@ -1270,9 +1275,9 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, tile_cost[r], &cpi->trial_frame_rst); } if (plane == AOM_PLANE_Y) - cost_restore[RESTORE_SWITCHABLE] = - search_switchable_restoration(cpi, method == LPF_PICK_FROM_SUBIMAGE, - plane, &cm->rst_info[plane], tile_cost); + cost_restore[RESTORE_SWITCHABLE] = search_switchable_restoration( + src, cpi, method == LPF_PICK_FROM_SUBIMAGE, plane, restore_types, + tile_cost, &cm->rst_info[plane]); else cost_restore[RESTORE_SWITCHABLE] = DBL_MAX; best_cost_restore = DBL_MAX; diff --git a/third_party/aom/av1/encoder/ransac.c b/third_party/aom/av1/encoder/ransac.c index bbd2d179c..c6e3675be 100644 --- a/third_party/aom/av1/encoder/ransac.c +++ b/third_party/aom/av1/encoder/ransac.c @@ -139,6 +139,8 @@ static void normalize_homography(double *pts, int n, double *T) { double msqe = 0; double scale; int i; + + assert(n > 0); for (i = 0; i < n; ++i, p += 2) { mean[0] += p[0]; mean[1] += p[1]; @@ -821,13 +823,15 @@ static int ransac(const int *matched_points, int npoints, // Recompute the motions using only the inliers. for (i = 0; i < num_desired_motions; ++i) { - copy_points_at_indices(points1, corners1, motions[i].inlier_indices, - motions[i].num_inliers); - copy_points_at_indices(points2, corners2, motions[i].inlier_indices, - motions[i].num_inliers); - - find_transformation(motions[i].num_inliers, points1, points2, - params_by_motion + (MAX_PARAMDIM - 1) * i); + if (motions[i].num_inliers >= minpts) { + copy_points_at_indices(points1, corners1, motions[i].inlier_indices, + motions[i].num_inliers); + copy_points_at_indices(points2, corners2, motions[i].inlier_indices, + motions[i].num_inliers); + + find_transformation(motions[i].num_inliers, points1, points2, + params_by_motion + (MAX_PARAMDIM - 1) * i); + } num_inliers_by_motion[i] = motions[i].num_inliers; } diff --git a/third_party/aom/av1/encoder/ratectrl.c b/third_party/aom/av1/encoder/ratectrl.c index 4552c674e..b546fdffa 100644 --- a/third_party/aom/av1/encoder/ratectrl.c +++ b/third_party/aom/av1/encoder/ratectrl.c @@ -94,8 +94,8 @@ static int kf_high = 5000; static int kf_low = 400; double av1_resize_rate_factor(const AV1_COMP *cpi) { - return (double)(cpi->resize_scale_den * cpi->resize_scale_den) / - (cpi->resize_scale_num * cpi->resize_scale_num); + return (double)(cpi->oxcf.width * cpi->oxcf.height) / + (cpi->common.width * cpi->common.height); } // Functions to compute the active minq lookup table entries based on a @@ -1081,7 +1081,7 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int *bottom_index, } // Modify active_best_quality for downscaled normal frames. - if (!av1_resize_unscaled(cpi) && !frame_is_kf_gf_arf(cpi)) { + if (!av1_frame_unscaled(cm) && !frame_is_kf_gf_arf(cpi)) { int qdelta = av1_compute_qdelta_by_rate( rc, cm->frame_type, active_best_quality, 2.0, cm->bit_depth); active_best_quality = @@ -1164,7 +1164,7 @@ void av1_rc_set_frame_target(AV1_COMP *cpi, int target) { rc->this_frame_target = target; // Modify frame size target when down-scaled. - if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC && !av1_resize_unscaled(cpi)) + if (!av1_frame_unscaled(cm)) rc->this_frame_target = (int)(rc->this_frame_target * av1_resize_rate_factor(cpi)); @@ -1663,3 +1663,64 @@ void av1_set_target_rate(AV1_COMP *cpi) { vbr_rate_correction(cpi, &target_rate); av1_rc_set_frame_target(cpi, target_rate); } + +static unsigned int lcg_rand16(unsigned int *state) { + *state = (unsigned int)(*state * 1103515245ULL + 12345); + return *state / 65536 % 32768; +} + +uint8_t av1_calculate_next_resize_scale(const AV1_COMP *cpi) { + static unsigned int seed = 56789; + const AV1EncoderConfig *oxcf = &cpi->oxcf; + if (oxcf->pass == 1) return SCALE_DENOMINATOR; + uint8_t new_num = SCALE_DENOMINATOR; + + switch (oxcf->resize_mode) { + case RESIZE_NONE: new_num = SCALE_DENOMINATOR; break; + case RESIZE_FIXED: + if (cpi->common.frame_type == KEY_FRAME) + new_num = oxcf->resize_kf_scale_numerator; + else + new_num = oxcf->resize_scale_numerator; + break; + case RESIZE_DYNAMIC: + // RESIZE_DYNAMIC: Just random for now. + new_num = lcg_rand16(&seed) % 4 + 13; + break; + default: assert(0); + } + return new_num; +} + +#if CONFIG_FRAME_SUPERRES +// TODO(afergs): Rename av1_rc_update_superres_scale(...)? +uint8_t av1_calculate_next_superres_scale(const AV1_COMP *cpi, int width, + int height) { + static unsigned int seed = 34567; + const AV1EncoderConfig *oxcf = &cpi->oxcf; + if (oxcf->pass == 1) return SCALE_DENOMINATOR; + uint8_t new_num = SCALE_DENOMINATOR; + + switch (oxcf->superres_mode) { + case SUPERRES_NONE: new_num = SCALE_DENOMINATOR; break; + case SUPERRES_FIXED: + if (cpi->common.frame_type == KEY_FRAME) + new_num = oxcf->superres_kf_scale_numerator; + else + new_num = oxcf->superres_scale_numerator; + break; + case SUPERRES_DYNAMIC: + // SUPERRES_DYNAMIC: Just random for now. + new_num = lcg_rand16(&seed) % 9 + 8; + break; + default: assert(0); + } + + // Make sure overall reduction is no more than 1/2 of the source size. + av1_calculate_scaled_size(&width, &height, new_num); + if (width * 2 < oxcf->width || height * 2 < oxcf->height) + new_num = SCALE_DENOMINATOR; + + return new_num; +} +#endif // CONFIG_FRAME_SUPERRES diff --git a/third_party/aom/av1/encoder/ratectrl.h b/third_party/aom/av1/encoder/ratectrl.h index 61bb0c224..4ebdfadd6 100644 --- a/third_party/aom/av1/encoder/ratectrl.h +++ b/third_party/aom/av1/encoder/ratectrl.h @@ -256,6 +256,11 @@ void av1_set_target_rate(struct AV1_COMP *cpi); int av1_resize_one_pass_cbr(struct AV1_COMP *cpi); +uint8_t av1_calculate_next_resize_scale(const struct AV1_COMP *cpi); +#if CONFIG_FRAME_SUPERRES +uint8_t av1_calculate_next_superres_scale(const struct AV1_COMP *cpi, int width, + int height); +#endif // CONFIG_FRAME_SUPERRES #ifdef __cplusplus } // extern "C" #endif diff --git a/third_party/aom/av1/encoder/rd.c b/third_party/aom/av1/encoder/rd.c index 94c3bb96d..da3b6e209 100644 --- a/third_party/aom/av1/encoder/rd.c +++ b/third_party/aom/av1/encoder/rd.c @@ -50,14 +50,15 @@ // certain modes are assumed to be based on 8x8 blocks. // This table is used to correct for block size. // The factors here are << 2 (2 = x0.5, 32 = x8 etc). -static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = { -#if CONFIG_CB4X4 +static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = { +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 2, 2, 2, #endif - 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, + 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, #if CONFIG_EXT_PARTITION - 48, 48, 64 + 48, 48, 64, #endif // CONFIG_EXT_PARTITION + 4, 4, 8, 8 }; static void fill_mode_costs(AV1_COMP *cpi) { @@ -66,16 +67,16 @@ static void fill_mode_costs(AV1_COMP *cpi) { for (i = 0; i < INTRA_MODES; ++i) for (j = 0; j < INTRA_MODES; ++j) - av1_cost_tokens(cpi->y_mode_costs[i][j], av1_kf_y_mode_prob[i][j], - av1_intra_mode_tree); + av1_cost_tokens_from_cdf(cpi->y_mode_costs[i][j], av1_kf_y_mode_cdf[i][j], + av1_intra_mode_inv); for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) - av1_cost_tokens(cpi->mbmode_cost[i], fc->y_mode_prob[i], - av1_intra_mode_tree); + av1_cost_tokens_from_cdf(cpi->mbmode_cost[i], fc->y_mode_cdf[i], + av1_intra_mode_inv); for (i = 0; i < INTRA_MODES; ++i) - av1_cost_tokens(cpi->intra_uv_mode_cost[i], fc->uv_mode_prob[i], - av1_intra_mode_tree); + av1_cost_tokens_from_cdf(cpi->intra_uv_mode_cost[i], fc->uv_mode_cdf[i], + av1_intra_mode_inv); for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) av1_cost_tokens(cpi->switchable_interp_costs[i], @@ -83,20 +84,18 @@ static void fill_mode_costs(AV1_COMP *cpi) { #if CONFIG_PALETTE for (i = 0; i < PALETTE_BLOCK_SIZES; ++i) { - av1_cost_tokens(cpi->palette_y_size_cost[i], - av1_default_palette_y_size_prob[i], av1_palette_size_tree); - av1_cost_tokens(cpi->palette_uv_size_cost[i], - av1_default_palette_uv_size_prob[i], av1_palette_size_tree); + av1_cost_tokens_from_cdf(cpi->palette_y_size_cost[i], + fc->palette_y_size_cdf[i], NULL); + av1_cost_tokens_from_cdf(cpi->palette_uv_size_cost[i], + fc->palette_uv_size_cdf[i], NULL); } for (i = 0; i < PALETTE_SIZES; ++i) { for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) { - av1_cost_tokens(cpi->palette_y_color_cost[i][j], - av1_default_palette_y_color_index_prob[i][j], - av1_palette_color_index_tree[i]); - av1_cost_tokens(cpi->palette_uv_color_cost[i][j], - av1_default_palette_uv_color_index_prob[i][j], - av1_palette_color_index_tree[i]); + av1_cost_tokens_from_cdf(cpi->palette_y_color_cost[i][j], + fc->palette_y_color_index_cdf[i][j], NULL); + av1_cost_tokens_from_cdf(cpi->palette_uv_color_cost[i][j], + fc->palette_uv_color_index_cdf[i][j], NULL); } } #endif // CONFIG_PALETTE @@ -147,8 +146,9 @@ static void fill_mode_costs(AV1_COMP *cpi) { av1_switchable_restore_tree); #endif // CONFIG_LOOP_RESTORATION #if CONFIG_GLOBAL_MOTION - av1_cost_tokens(cpi->gmtype_cost, fc->global_motion_types_prob, - av1_global_motion_types_tree); + for (i = 0; i < TRANS_TYPES; ++i) + cpi->gmtype_cost[i] = (1 + (i > 0 ? GLOBAL_TYPE_BITS : 0)) + << AV1_PROB_COST_SHIFT; #endif // CONFIG_GLOBAL_MOTION } @@ -301,7 +301,7 @@ static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd) { 0, MAXQ); const int q = compute_rd_thresh_factor(qindex, cm->bit_depth); - for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) { + for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { // Threshold here seems unnecessarily harsh but fine given actual // range of values used for cpi->sf.thresh_mult[]. const int t = q * rd_thresh_block_size_factor[bsize]; @@ -350,7 +350,6 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) { aom_clear_system_state(); - rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128). rd->RDMULT = av1_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); set_error_per_bit(x, rd->RDMULT); @@ -367,6 +366,16 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) { x->mvcost = x->mv_cost_stack[0]; x->nmvjointcost = x->nmv_vec_cost[0]; +#if CONFIG_INTRABC + if (frame_is_intra_only(cm) && cm->allow_screen_content_tools && + cpi->oxcf.pass != 1) { + av1_build_nmv_cost_table( + x->nmv_vec_cost[0], + cm->allow_high_precision_mv ? x->nmvcost_hp[0] : x->nmvcost[0], + &cm->fc->ndvc, MV_SUBPEL_NONE); + } +#endif + if (cpi->oxcf.pass != 1) { av1_fill_token_costs(x->token_costs, cm->fc->coef_probs); @@ -434,6 +443,12 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) { av1_cost_tokens((int *)cpi->inter_compound_mode_cost[i], cm->fc->inter_compound_mode_probs[i], av1_inter_compound_mode_tree); +#if CONFIG_COMPOUND_SINGLEREF + for (i = 0; i < INTER_MODE_CONTEXTS; ++i) + av1_cost_tokens((int *)cpi->inter_singleref_comp_mode_cost[i], + cm->fc->inter_singleref_comp_mode_probs[i], + av1_inter_singleref_comp_mode_tree); +#endif // CONFIG_COMPOUND_SINGLEREF #if CONFIG_INTERINTRA for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) av1_cost_tokens((int *)cpi->interintra_mode_cost[i], @@ -442,16 +457,22 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) { #endif // CONFIG_INTERINTRA #endif // CONFIG_EXT_INTER #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION - for (i = BLOCK_8X8; i < BLOCK_SIZES; i++) { + for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) { av1_cost_tokens((int *)cpi->motion_mode_cost[i], cm->fc->motion_mode_prob[i], av1_motion_mode_tree); } #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION - for (i = BLOCK_8X8; i < BLOCK_SIZES; i++) { + for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) { cpi->motion_mode_cost1[i][0] = av1_cost_bit(cm->fc->obmc_prob[i], 0); cpi->motion_mode_cost1[i][1] = av1_cost_bit(cm->fc->obmc_prob[i], 1); } #endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION +#if CONFIG_MOTION_VAR && CONFIG_NCOBMC_ADAPT_WEIGHT + for (i = ADAPT_OVERLAP_BLOCK_8X8; i < ADAPT_OVERLAP_BLOCKS; ++i) { + av1_cost_tokens((int *)cpi->ncobmc_mode_cost[i], + cm->fc->ncobmc_mode_prob[i], av1_ncobmc_mode_tree); + } +#endif #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION } } @@ -648,7 +669,7 @@ static void get_entropy_contexts_plane( for (i = 0; i < num_4x4_h; i += 8) t_left[i] = !!*(const uint64_t *)&left[i]; break; -#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX) case TX_4X16: for (i = 0; i < num_4x4_w; i += 2) t_above[i] = !!*(const uint16_t *)&above[i]; @@ -675,7 +696,7 @@ static void get_entropy_contexts_plane( for (i = 0; i < num_4x4_h; i += 4) t_left[i] = !!*(const uint32_t *)&left[i]; break; -#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#endif default: assert(0 && "Invalid transform size."); break; } @@ -749,7 +770,7 @@ static void get_entropy_contexts_plane( for (i = 0; i < num_4x4_h; i += 4) t_left[i] = !!*(const uint32_t *)&left[i]; break; -#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX) case TX_4X16: memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w); for (i = 0; i < num_4x4_h; i += 4) @@ -772,7 +793,7 @@ static void get_entropy_contexts_plane( for (i = 0; i < num_4x4_h; i += 2) t_left[i] = !!*(const uint16_t *)&left[i]; break; -#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#endif default: assert(0 && "Invalid transform size."); break; } } @@ -781,7 +802,7 @@ void av1_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, const struct macroblockd_plane *pd, ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE], ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE]) { -#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2 +#if CONFIG_CHROMA_SUB8X8 const BLOCK_SIZE plane_bsize = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd)); #else @@ -983,6 +1004,54 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) { #if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + rd->thresh_mult[THR_SR_NEAREST_NEARMV] += 1200; +#if CONFIG_EXT_REFS + rd->thresh_mult[THR_SR_NEAREST_NEARL2] += 1200; + rd->thresh_mult[THR_SR_NEAREST_NEARL3] += 1200; + rd->thresh_mult[THR_SR_NEAREST_NEARB] += 1200; +#endif // CONFIG_EXT_REFS + rd->thresh_mult[THR_SR_NEAREST_NEARA] += 1200; + rd->thresh_mult[THR_SR_NEAREST_NEARG] += 1200; + + /* + rd->thresh_mult[THR_SR_NEAREST_NEWMV] += 1200; +#if CONFIG_EXT_REFS + rd->thresh_mult[THR_SR_NEAREST_NEWL2] += 1200; + rd->thresh_mult[THR_SR_NEAREST_NEWL3] += 1200; + rd->thresh_mult[THR_SR_NEAREST_NEWB] += 1200; +#endif // CONFIG_EXT_REFS + rd->thresh_mult[THR_SR_NEAREST_NEWA] += 1200; + rd->thresh_mult[THR_SR_NEAREST_NEWG] += 1200;*/ + + rd->thresh_mult[THR_SR_NEAR_NEWMV] += 1500; +#if CONFIG_EXT_REFS + rd->thresh_mult[THR_SR_NEAR_NEWL2] += 1500; + rd->thresh_mult[THR_SR_NEAR_NEWL3] += 1500; + rd->thresh_mult[THR_SR_NEAR_NEWB] += 1500; +#endif // CONFIG_EXT_REFS + rd->thresh_mult[THR_SR_NEAR_NEWA] += 1500; + rd->thresh_mult[THR_SR_NEAR_NEWG] += 1500; + + rd->thresh_mult[THR_SR_ZERO_NEWMV] += 2000; +#if CONFIG_EXT_REFS + rd->thresh_mult[THR_SR_ZERO_NEWL2] += 2000; + rd->thresh_mult[THR_SR_ZERO_NEWL3] += 2000; + rd->thresh_mult[THR_SR_ZERO_NEWB] += 2000; +#endif // CONFIG_EXT_REFS + rd->thresh_mult[THR_SR_ZERO_NEWA] += 2000; + rd->thresh_mult[THR_SR_ZERO_NEWG] += 2000; + + rd->thresh_mult[THR_SR_NEW_NEWMV] += 1700; +#if CONFIG_EXT_REFS + rd->thresh_mult[THR_SR_NEW_NEWL2] += 1700; + rd->thresh_mult[THR_SR_NEW_NEWL3] += 1700; + rd->thresh_mult[THR_SR_NEW_NEWB] += 1700; +#endif // CONFIG_EXT_REFS + rd->thresh_mult[THR_SR_NEW_NEWA] += 1700; + rd->thresh_mult[THR_SR_NEW_NEWG] += 1700; +#endif // CONFIG_COMPOUND_SINGLEREF + rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] += 1000; #if CONFIG_EXT_REFS rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] += 1000; @@ -994,6 +1063,13 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) { rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] += 1000; rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] += 1000; rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] += 1000; + +#if CONFIG_EXT_COMP_REFS + rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] += 1000; + rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] += 1000; + rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] += 1000; + rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] += 1000; +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS #else // CONFIG_EXT_INTER @@ -1009,6 +1085,12 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) { rd->thresh_mult[THR_COMP_NEARESTL2B] += 1000; rd->thresh_mult[THR_COMP_NEARESTL3B] += 1000; rd->thresh_mult[THR_COMP_NEARESTGB] += 1000; +#if CONFIG_EXT_COMP_REFS + rd->thresh_mult[THR_COMP_NEARESTLL2] += 1000; + rd->thresh_mult[THR_COMP_NEARESTLL3] += 1000; + rd->thresh_mult[THR_COMP_NEARESTLG] += 1000; + rd->thresh_mult[THR_COMP_NEARESTBA] += 1000; +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS #endif // CONFIG_EXT_INTER @@ -1081,6 +1163,40 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) { rd->thresh_mult[THR_COMP_NEW_NEARGB] += 1700; rd->thresh_mult[THR_COMP_NEW_NEWGB] += 2000; rd->thresh_mult[THR_COMP_ZERO_ZEROGB] += 2500; + +#if CONFIG_EXT_COMP_REFS + rd->thresh_mult[THR_COMP_NEAR_NEARLL2] += 1200; + rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] += 1500; + rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] += 1500; + rd->thresh_mult[THR_COMP_NEAR_NEWLL2] += 1700; + rd->thresh_mult[THR_COMP_NEW_NEARLL2] += 1700; + rd->thresh_mult[THR_COMP_NEW_NEWLL2] += 2000; + rd->thresh_mult[THR_COMP_ZERO_ZEROLL2] += 2500; + + rd->thresh_mult[THR_COMP_NEAR_NEARLL3] += 1200; + rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] += 1500; + rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] += 1500; + rd->thresh_mult[THR_COMP_NEAR_NEWLL3] += 1700; + rd->thresh_mult[THR_COMP_NEW_NEARLL3] += 1700; + rd->thresh_mult[THR_COMP_NEW_NEWLL3] += 2000; + rd->thresh_mult[THR_COMP_ZERO_ZEROLL3] += 2500; + + rd->thresh_mult[THR_COMP_NEAR_NEARLG] += 1200; + rd->thresh_mult[THR_COMP_NEAREST_NEWLG] += 1500; + rd->thresh_mult[THR_COMP_NEW_NEARESTLG] += 1500; + rd->thresh_mult[THR_COMP_NEAR_NEWLG] += 1700; + rd->thresh_mult[THR_COMP_NEW_NEARLG] += 1700; + rd->thresh_mult[THR_COMP_NEW_NEWLG] += 2000; + rd->thresh_mult[THR_COMP_ZERO_ZEROLG] += 2500; + + rd->thresh_mult[THR_COMP_NEAR_NEARBA] += 1200; + rd->thresh_mult[THR_COMP_NEAREST_NEWBA] += 1500; + rd->thresh_mult[THR_COMP_NEW_NEARESTBA] += 1500; + rd->thresh_mult[THR_COMP_NEAR_NEWBA] += 1700; + rd->thresh_mult[THR_COMP_NEW_NEARBA] += 1700; + rd->thresh_mult[THR_COMP_NEW_NEWBA] += 2000; + rd->thresh_mult[THR_COMP_ZERO_ZEROBA] += 2500; +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS #else // CONFIG_EXT_INTER @@ -1105,6 +1221,17 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) { rd->thresh_mult[THR_COMP_NEWL3B] += 2000; rd->thresh_mult[THR_COMP_NEARGB] += 1500; rd->thresh_mult[THR_COMP_NEWGB] += 2000; + +#if CONFIG_EXT_COMP_REFS + rd->thresh_mult[THR_COMP_NEARLL2] += 1500; + rd->thresh_mult[THR_COMP_NEWLL2] += 2000; + rd->thresh_mult[THR_COMP_NEARLL3] += 1500; + rd->thresh_mult[THR_COMP_NEWLL3] += 2000; + rd->thresh_mult[THR_COMP_NEARLG] += 1500; + rd->thresh_mult[THR_COMP_NEWLG] += 2000; + rd->thresh_mult[THR_COMP_NEARBA] += 1500; + rd->thresh_mult[THR_COMP_NEWBA] += 2000; +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS rd->thresh_mult[THR_COMP_ZEROLA] += 2500; @@ -1119,6 +1246,13 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) { rd->thresh_mult[THR_COMP_ZEROL2B] += 2500; rd->thresh_mult[THR_COMP_ZEROL3B] += 2500; rd->thresh_mult[THR_COMP_ZEROGB] += 2500; + +#if CONFIG_EXT_COMP_REFS + rd->thresh_mult[THR_COMP_ZEROLL2] += 2500; + rd->thresh_mult[THR_COMP_ZEROLL3] += 2500; + rd->thresh_mult[THR_COMP_ZEROLG] += 2500; + rd->thresh_mult[THR_COMP_ZEROBA] += 2500; +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS #endif // CONFIG_EXT_INTER diff --git a/third_party/aom/av1/encoder/rd.h b/third_party/aom/av1/encoder/rd.h index 5c3eee493..ea5115b41 100644 --- a/third_party/aom/av1/encoder/rd.h +++ b/third_party/aom/av1/encoder/rd.h @@ -30,12 +30,13 @@ extern "C" { #define RDDIV_BITS 7 #define RD_EPB_SHIFT 6 -#define RDCOST(RM, DM, R, D) \ - (ROUND_POWER_OF_TWO(((int64_t)R) * (RM), AV1_PROB_COST_SHIFT) + (D << DM)) +#define RDCOST(RM, R, D) \ + (ROUND_POWER_OF_TWO(((int64_t)R) * (RM), AV1_PROB_COST_SHIFT) + \ + (D << RDDIV_BITS)) -#define RDCOST_DBL(RM, DM, R, D) \ +#define RDCOST_DBL(RM, R, D) \ (((((double)(R)) * (RM)) / (double)(1 << AV1_PROB_COST_SHIFT)) + \ - ((double)(D) * (1 << (DM)))) + ((double)(D) * (1 << RDDIV_BITS))) #define QIDX_SKIP_THRESH 115 @@ -96,6 +97,54 @@ typedef enum { #if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + THR_SR_NEAREST_NEARMV, +#if CONFIG_EXT_REFS + THR_SR_NEAREST_NEARL2, + THR_SR_NEAREST_NEARL3, + THR_SR_NEAREST_NEARB, +#endif // CONFIG_EXT_REFS + THR_SR_NEAREST_NEARG, + THR_SR_NEAREST_NEARA, + + /* + THR_SR_NEAREST_NEWMV, +#if CONFIG_EXT_REFS + THR_SR_NEAREST_NEWL2, + THR_SR_NEAREST_NEWL3, + THR_SR_NEAREST_NEWB, +#endif // CONFIG_EXT_REFS + THR_SR_NEAREST_NEWG, + THR_SR_NEAREST_NEWA,*/ + + THR_SR_NEAR_NEWMV, +#if CONFIG_EXT_REFS + THR_SR_NEAR_NEWL2, + THR_SR_NEAR_NEWL3, + THR_SR_NEAR_NEWB, +#endif // CONFIG_EXT_REFS + THR_SR_NEAR_NEWG, + THR_SR_NEAR_NEWA, + + THR_SR_ZERO_NEWMV, +#if CONFIG_EXT_REFS + THR_SR_ZERO_NEWL2, + THR_SR_ZERO_NEWL3, + THR_SR_ZERO_NEWB, +#endif // CONFIG_EXT_REFS + THR_SR_ZERO_NEWG, + THR_SR_ZERO_NEWA, + + THR_SR_NEW_NEWMV, +#if CONFIG_EXT_REFS + THR_SR_NEW_NEWL2, + THR_SR_NEW_NEWL3, + THR_SR_NEW_NEWB, +#endif // CONFIG_EXT_REFS + THR_SR_NEW_NEWG, + THR_SR_NEW_NEWA, +#endif // CONFIG_COMPOUND_SINGLEREF + THR_COMP_NEAREST_NEARESTLA, #if CONFIG_EXT_REFS THR_COMP_NEAREST_NEARESTL2A, @@ -107,6 +156,12 @@ typedef enum { THR_COMP_NEAREST_NEARESTL2B, THR_COMP_NEAREST_NEARESTL3B, THR_COMP_NEAREST_NEARESTGB, +#if CONFIG_EXT_COMP_REFS + THR_COMP_NEAREST_NEARESTLL2, + THR_COMP_NEAREST_NEARESTLL3, + THR_COMP_NEAREST_NEARESTLG, + THR_COMP_NEAREST_NEARESTBA, +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS #else // CONFIG_EXT_INTER @@ -122,6 +177,12 @@ typedef enum { THR_COMP_NEARESTL2B, THR_COMP_NEARESTL3B, THR_COMP_NEARESTGB, +#if CONFIG_EXT_COMP_REFS + THR_COMP_NEARESTLL2, + THR_COMP_NEARESTLL3, + THR_COMP_NEARESTLG, + THR_COMP_NEARESTBA, +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS #endif // CONFIG_EXT_INTER @@ -138,8 +199,6 @@ typedef enum { #if CONFIG_EXT_INTER - THR_COMP_NEAR_NEARESTLA, - THR_COMP_NEAREST_NEARLA, THR_COMP_NEAR_NEARLA, THR_COMP_NEW_NEARESTLA, THR_COMP_NEAREST_NEWLA, @@ -149,8 +208,6 @@ typedef enum { THR_COMP_ZERO_ZEROLA, #if CONFIG_EXT_REFS - THR_COMP_NEAR_NEARESTL2A, - THR_COMP_NEAREST_NEARL2A, THR_COMP_NEAR_NEARL2A, THR_COMP_NEW_NEARESTL2A, THR_COMP_NEAREST_NEWL2A, @@ -159,8 +216,6 @@ typedef enum { THR_COMP_NEW_NEWL2A, THR_COMP_ZERO_ZEROL2A, - THR_COMP_NEAR_NEARESTL3A, - THR_COMP_NEAREST_NEARL3A, THR_COMP_NEAR_NEARL3A, THR_COMP_NEW_NEARESTL3A, THR_COMP_NEAREST_NEWL3A, @@ -170,8 +225,6 @@ typedef enum { THR_COMP_ZERO_ZEROL3A, #endif // CONFIG_EXT_REFS - THR_COMP_NEAR_NEARESTGA, - THR_COMP_NEAREST_NEARGA, THR_COMP_NEAR_NEARGA, THR_COMP_NEW_NEARESTGA, THR_COMP_NEAREST_NEWGA, @@ -181,8 +234,6 @@ typedef enum { THR_COMP_ZERO_ZEROGA, #if CONFIG_EXT_REFS - THR_COMP_NEAR_NEARESTLB, - THR_COMP_NEAREST_NEARLB, THR_COMP_NEAR_NEARLB, THR_COMP_NEW_NEARESTLB, THR_COMP_NEAREST_NEWLB, @@ -191,8 +242,6 @@ typedef enum { THR_COMP_NEW_NEWLB, THR_COMP_ZERO_ZEROLB, - THR_COMP_NEAR_NEARESTL2B, - THR_COMP_NEAREST_NEARL2B, THR_COMP_NEAR_NEARL2B, THR_COMP_NEW_NEARESTL2B, THR_COMP_NEAREST_NEWL2B, @@ -201,8 +250,6 @@ typedef enum { THR_COMP_NEW_NEWL2B, THR_COMP_ZERO_ZEROL2B, - THR_COMP_NEAR_NEARESTL3B, - THR_COMP_NEAREST_NEARL3B, THR_COMP_NEAR_NEARL3B, THR_COMP_NEW_NEARESTL3B, THR_COMP_NEAREST_NEWL3B, @@ -211,8 +258,6 @@ typedef enum { THR_COMP_NEW_NEWL3B, THR_COMP_ZERO_ZEROL3B, - THR_COMP_NEAR_NEARESTGB, - THR_COMP_NEAREST_NEARGB, THR_COMP_NEAR_NEARGB, THR_COMP_NEW_NEARESTGB, THR_COMP_NEAREST_NEWGB, @@ -220,6 +265,40 @@ typedef enum { THR_COMP_NEAR_NEWGB, THR_COMP_NEW_NEWGB, THR_COMP_ZERO_ZEROGB, + +#if CONFIG_EXT_COMP_REFS + THR_COMP_NEAR_NEARLL2, + THR_COMP_NEW_NEARESTLL2, + THR_COMP_NEAREST_NEWLL2, + THR_COMP_NEW_NEARLL2, + THR_COMP_NEAR_NEWLL2, + THR_COMP_NEW_NEWLL2, + THR_COMP_ZERO_ZEROLL2, + + THR_COMP_NEAR_NEARLL3, + THR_COMP_NEW_NEARESTLL3, + THR_COMP_NEAREST_NEWLL3, + THR_COMP_NEW_NEARLL3, + THR_COMP_NEAR_NEWLL3, + THR_COMP_NEW_NEWLL3, + THR_COMP_ZERO_ZEROLL3, + + THR_COMP_NEAR_NEARLG, + THR_COMP_NEW_NEARESTLG, + THR_COMP_NEAREST_NEWLG, + THR_COMP_NEW_NEARLG, + THR_COMP_NEAR_NEWLG, + THR_COMP_NEW_NEWLG, + THR_COMP_ZERO_ZEROLG, + + THR_COMP_NEAR_NEARBA, + THR_COMP_NEW_NEARESTBA, + THR_COMP_NEAREST_NEWBA, + THR_COMP_NEW_NEARBA, + THR_COMP_NEAR_NEWBA, + THR_COMP_NEW_NEWBA, + THR_COMP_ZERO_ZEROBA, +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS #else // CONFIG_EXT_INTER @@ -244,6 +323,17 @@ typedef enum { THR_COMP_NEWL3B, THR_COMP_NEARGB, THR_COMP_NEWGB, + +#if CONFIG_EXT_COMP_REFS + THR_COMP_NEARLL2, + THR_COMP_NEWLL2, + THR_COMP_NEARLL3, + THR_COMP_NEWLL3, + THR_COMP_NEARLG, + THR_COMP_NEWLG, + THR_COMP_NEARBA, + THR_COMP_NEWBA, +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS THR_COMP_ZEROLA, @@ -258,6 +348,13 @@ typedef enum { THR_COMP_ZEROL2B, THR_COMP_ZEROL3B, THR_COMP_ZEROGB, + +#if CONFIG_EXT_COMP_REFS + THR_COMP_ZEROLL2, + THR_COMP_ZEROLL3, + THR_COMP_ZEROLG, + THR_COMP_ZEROBA, +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS #endif // CONFIG_EXT_INTER @@ -344,12 +441,11 @@ typedef struct RD_OPT { int thresh_mult[MAX_MODES]; int thresh_mult_sub8x8[MAX_REFS]; - int threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES]; + int threshes[MAX_SEGMENTS][BLOCK_SIZES_ALL][MAX_MODES]; int64_t prediction_type_threshes[TOTAL_REFS_PER_FRAME][REFERENCE_MODES]; int RDMULT; - int RDDIV; } RD_OPT; static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) { @@ -361,7 +457,9 @@ static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) { rd_stats->rdcost = 0; rd_stats->sse = 0; rd_stats->skip = 1; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + rd_stats->zero_rate = 0; + rd_stats->ref_rdcost = INT64_MAX; +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 rd_stats->dist_y = 0; #endif #if CONFIG_RD_DEBUG @@ -388,7 +486,9 @@ static INLINE void av1_invalid_rd_stats(RD_STATS *rd_stats) { rd_stats->rdcost = INT64_MAX; rd_stats->sse = INT64_MAX; rd_stats->skip = 0; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + rd_stats->zero_rate = 0; + rd_stats->ref_rdcost = INT64_MAX; +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 rd_stats->dist_y = INT64_MAX; #endif #if CONFIG_RD_DEBUG @@ -415,7 +515,7 @@ static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst, rd_stats_dst->dist += rd_stats_src->dist; rd_stats_dst->sse += rd_stats_src->sse; rd_stats_dst->skip &= rd_stats_src->skip; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 rd_stats_dst->dist_y += rd_stats_src->dist_y; #endif #if CONFIG_RD_DEBUG diff --git a/third_party/aom/av1/encoder/rdopt.c b/third_party/aom/av1/encoder/rdopt.c index 2a537a06a..43b00b83b 100644 --- a/third_party/aom/av1/encoder/rdopt.c +++ b/third_party/aom/av1/encoder/rdopt.c @@ -63,7 +63,7 @@ #endif // CONFIG_PVQ #if CONFIG_PVQ || CONFIG_DAALA_DIST #include "av1/common/pvq.h" -#endif // CONFIG_PVQ || CONFIG_DAALA_DIST +#endif // CONFIG_PVQ || CONFIG_DIST_8X8 #if CONFIG_DUAL_FILTER #define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS) #if USE_EXTRA_FILTER @@ -113,8 +113,14 @@ static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = { #endif // CONFIG_EXT_REFS #if CONFIG_EXT_REFS +#if CONFIG_EXT_COMP_REFS +#define SECOND_REF_FRAME_MASK \ + ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | (1 << GOLDEN_FRAME) | \ + (1 << LAST2_FRAME) | 0x01) // NOLINT +#else // !CONFIG_EXT_COMP_REFS #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01) -#else +#endif // CONFIG_EXT_COMP_REFS +#else // !CONFIG_EXT_REFS #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01) #endif // CONFIG_EXT_REFS @@ -126,6 +132,11 @@ static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = { #define FILTER_FAST_SEARCH 1 #endif // CONFIG_EXT_INTRA +// Setting this to 1 will disable trellis optimization within the +// transform search. Trellis optimization will still be applied +// in the final encode. +#define DISABLE_TRELLISQ_SEARCH 0 + const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671, // vert -7.7051, -3.2234, -3.6193, 3.4533 }; // horz @@ -191,6 +202,56 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { // TODO(zoeliu): May need to reconsider the order on the modes to check #if CONFIG_EXT_INTER + +#if CONFIG_COMPOUND_SINGLEREF + // Single ref comp mode + { SR_NEAREST_NEARMV, { LAST_FRAME, NONE_FRAME } }, +#if CONFIG_EXT_REFS + { SR_NEAREST_NEARMV, { LAST2_FRAME, NONE_FRAME } }, + { SR_NEAREST_NEARMV, { LAST3_FRAME, NONE_FRAME } }, + { SR_NEAREST_NEARMV, { BWDREF_FRAME, NONE_FRAME } }, +#endif // CONFIG_EXT_REFS + { SR_NEAREST_NEARMV, { GOLDEN_FRAME, NONE_FRAME } }, + { SR_NEAREST_NEARMV, { ALTREF_FRAME, NONE_FRAME } }, + + /* + { SR_NEAREST_NEWMV, { LAST_FRAME, NONE_FRAME } }, +#if CONFIG_EXT_REFS + { SR_NEAREST_NEWMV, { LAST2_FRAME, NONE_FRAME } }, + { SR_NEAREST_NEWMV, { LAST3_FRAME, NONE_FRAME } }, + { SR_NEAREST_NEWMV, { BWDREF_FRAME, NONE_FRAME } }, +#endif // CONFIG_EXT_REFS + { SR_NEAREST_NEWMV, { GOLDEN_FRAME, NONE_FRAME } }, + { SR_NEAREST_NEWMV, { ALTREF_FRAME, NONE_FRAME } },*/ + + { SR_NEAR_NEWMV, { LAST_FRAME, NONE_FRAME } }, +#if CONFIG_EXT_REFS + { SR_NEAR_NEWMV, { LAST2_FRAME, NONE_FRAME } }, + { SR_NEAR_NEWMV, { LAST3_FRAME, NONE_FRAME } }, + { SR_NEAR_NEWMV, { BWDREF_FRAME, NONE_FRAME } }, +#endif // CONFIG_EXT_REFS + { SR_NEAR_NEWMV, { GOLDEN_FRAME, NONE_FRAME } }, + { SR_NEAR_NEWMV, { ALTREF_FRAME, NONE_FRAME } }, + + { SR_ZERO_NEWMV, { LAST_FRAME, NONE_FRAME } }, +#if CONFIG_EXT_REFS + { SR_ZERO_NEWMV, { LAST2_FRAME, NONE_FRAME } }, + { SR_ZERO_NEWMV, { LAST3_FRAME, NONE_FRAME } }, + { SR_ZERO_NEWMV, { BWDREF_FRAME, NONE_FRAME } }, +#endif // CONFIG_EXT_REFS + { SR_ZERO_NEWMV, { GOLDEN_FRAME, NONE_FRAME } }, + { SR_ZERO_NEWMV, { ALTREF_FRAME, NONE_FRAME } }, + + { SR_NEW_NEWMV, { LAST_FRAME, NONE_FRAME } }, +#if CONFIG_EXT_REFS + { SR_NEW_NEWMV, { LAST2_FRAME, NONE_FRAME } }, + { SR_NEW_NEWMV, { LAST3_FRAME, NONE_FRAME } }, + { SR_NEW_NEWMV, { BWDREF_FRAME, NONE_FRAME } }, +#endif // CONFIG_EXT_REFS + { SR_NEW_NEWMV, { GOLDEN_FRAME, NONE_FRAME } }, + { SR_NEW_NEWMV, { ALTREF_FRAME, NONE_FRAME } }, +#endif // CONFIG_COMPOUND_SINGLEREF + { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } }, #if CONFIG_EXT_REFS { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } }, @@ -202,6 +263,13 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } }, { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } }, { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } }, + +#if CONFIG_EXT_COMP_REFS + { NEAREST_NEARESTMV, { LAST_FRAME, LAST2_FRAME } }, + { NEAREST_NEARESTMV, { LAST_FRAME, LAST3_FRAME } }, + { NEAREST_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEAREST_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } }, +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS #else // CONFIG_EXT_INTER @@ -217,6 +285,13 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { { NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } }, { NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } }, { NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } }, + +#if CONFIG_EXT_COMP_REFS + { NEARESTMV, { LAST_FRAME, LAST2_FRAME } }, + { NEARESTMV, { LAST_FRAME, LAST3_FRAME } }, + { NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } }, +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS #endif // CONFIG_EXT_INTER @@ -297,9 +372,43 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } }, { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } }, { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } }, + +#if CONFIG_EXT_COMP_REFS + { NEAR_NEARMV, { LAST_FRAME, LAST2_FRAME } }, + { NEW_NEARESTMV, { LAST_FRAME, LAST2_FRAME } }, + { NEAREST_NEWMV, { LAST_FRAME, LAST2_FRAME } }, + { NEW_NEARMV, { LAST_FRAME, LAST2_FRAME } }, + { NEAR_NEWMV, { LAST_FRAME, LAST2_FRAME } }, + { NEW_NEWMV, { LAST_FRAME, LAST2_FRAME } }, + { ZERO_ZEROMV, { LAST_FRAME, LAST2_FRAME } }, + + { NEAR_NEARMV, { LAST_FRAME, LAST3_FRAME } }, + { NEW_NEARESTMV, { LAST_FRAME, LAST3_FRAME } }, + { NEAREST_NEWMV, { LAST_FRAME, LAST3_FRAME } }, + { NEW_NEARMV, { LAST_FRAME, LAST3_FRAME } }, + { NEAR_NEWMV, { LAST_FRAME, LAST3_FRAME } }, + { NEW_NEWMV, { LAST_FRAME, LAST3_FRAME } }, + { ZERO_ZEROMV, { LAST_FRAME, LAST3_FRAME } }, + + { NEAR_NEARMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEW_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEAREST_NEWMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEW_NEARMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEAR_NEWMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEW_NEWMV, { LAST_FRAME, GOLDEN_FRAME } }, + { ZERO_ZEROMV, { LAST_FRAME, GOLDEN_FRAME } }, + + { NEAR_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } }, + { NEW_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } }, + { NEAREST_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } }, + { NEW_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } }, + { NEAR_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } }, + { NEW_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } }, + { ZERO_ZEROMV, { BWDREF_FRAME, ALTREF_FRAME } }, +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS -#else // CONFIG_EXT_INTER +#else // !CONFIG_EXT_INTER { NEARMV, { LAST_FRAME, ALTREF_FRAME } }, { NEWMV, { LAST_FRAME, ALTREF_FRAME } }, @@ -321,6 +430,17 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { { NEWMV, { LAST3_FRAME, BWDREF_FRAME } }, { NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } }, { NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } }, + +#if CONFIG_EXT_COMP_REFS + { NEARMV, { LAST_FRAME, LAST2_FRAME } }, + { NEWMV, { LAST_FRAME, LAST2_FRAME } }, + { NEARMV, { LAST_FRAME, LAST3_FRAME } }, + { NEWMV, { LAST_FRAME, LAST3_FRAME } }, + { NEARMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEWMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEARMV, { BWDREF_FRAME, ALTREF_FRAME } }, + { NEWMV, { BWDREF_FRAME, ALTREF_FRAME } }, +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS { ZEROMV, { LAST_FRAME, ALTREF_FRAME } }, @@ -335,6 +455,13 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { { ZEROMV, { LAST2_FRAME, BWDREF_FRAME } }, { ZEROMV, { LAST3_FRAME, BWDREF_FRAME } }, { ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } }, + +#if CONFIG_EXT_COMP_REFS + { ZEROMV, { LAST_FRAME, LAST2_FRAME } }, + { ZEROMV, { LAST_FRAME, LAST3_FRAME } }, + { ZEROMV, { LAST_FRAME, GOLDEN_FRAME } }, + { ZEROMV, { BWDREF_FRAME, ALTREF_FRAME } }, +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS #endif // CONFIG_EXT_INTER @@ -385,6 +512,35 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { #endif // CONFIG_EXT_INTER }; +static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = { + DC_PRED, H_PRED, V_PRED, +#if CONFIG_ALT_INTRA + SMOOTH_PRED, +#endif // CONFIG_ALT_INTRA + TM_PRED, +#if CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV + SMOOTH_V_PRED, SMOOTH_H_PRED, +#endif // CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV + D135_PRED, D207_PRED, D153_PRED, D63_PRED, D117_PRED, D45_PRED, +}; + +#if CONFIG_CFL +static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = { + UV_DC_PRED, UV_H_PRED, UV_V_PRED, +#if CONFIG_ALT_INTRA + UV_SMOOTH_PRED, +#endif // CONFIG_ALT_INTRA + UV_TM_PRED, +#if CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV + UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED, +#endif // CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV + UV_D135_PRED, UV_D207_PRED, UV_D153_PRED, + UV_D63_PRED, UV_D117_PRED, UV_D45_PRED, +}; +#else +#define uv_rd_search_mode_order intra_rd_search_mode_order +#endif // CONFIG_CFL + #if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE static INLINE int write_uniform_cost(int n, int v) { const int l = get_unsigned_bits(n); @@ -404,7 +560,7 @@ static INLINE int write_uniform_cost(int n, int v) { #define FAST_EXT_TX_EDST_MARGIN 0.3 #if CONFIG_DAALA_DIST -static int od_compute_var_4x4(od_coeff *x, int stride) { +static int od_compute_var_4x4(uint16_t *x, int stride) { int sum; int s2; int i; @@ -420,7 +576,7 @@ static int od_compute_var_4x4(od_coeff *x, int stride) { s2 += t * t; } } - // TODO(yushin) : Check wheter any changes are required for high bit depth. + return (s2 - (sum * sum >> 4)) >> 4; } @@ -431,8 +587,8 @@ static int od_compute_var_4x4(od_coeff *x, int stride) { #define OD_DIST_LP_MID (5) #define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2) -static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x, - od_coeff *y, od_coeff *e_lp, int stride) { +static double od_compute_dist_8x8(int use_activity_masking, uint16_t *x, + uint16_t *y, od_coeff *e_lp, int stride) { double sum; int min_var; double mean_var; @@ -444,8 +600,7 @@ static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x, double vardist; vardist = 0; - OD_ASSERT(qm != OD_FLAT_QM); - (void)qm; + #if 1 min_var = INT_MAX; mean_var = 0; @@ -490,22 +645,61 @@ static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x, } // Note : Inputs x and y are in a pixel domain -static double od_compute_dist(int qm, int activity_masking, od_coeff *x, - od_coeff *y, int bsize_w, int bsize_h, - int qindex) { +static double od_compute_dist_common(int activity_masking, uint16_t *x, + uint16_t *y, int bsize_w, int bsize_h, + int qindex, od_coeff *tmp, + od_coeff *e_lp) { + int i, j; + double sum = 0; + const int mid = OD_DIST_LP_MID; + + for (j = 0; j < bsize_w; j++) { + e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j]; + e_lp[(bsize_h - 1) * bsize_w + j] = mid * tmp[(bsize_h - 1) * bsize_w + j] + + 2 * tmp[(bsize_h - 2) * bsize_w + j]; + } + for (i = 1; i < bsize_h - 1; i++) { + for (j = 0; j < bsize_w; j++) { + e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] + + tmp[(i - 1) * bsize_w + j] + + tmp[(i + 1) * bsize_w + j]; + } + } + for (i = 0; i < bsize_h; i += 8) { + for (j = 0; j < bsize_w; j += 8) { + sum += od_compute_dist_8x8(activity_masking, &x[i * bsize_w + j], + &y[i * bsize_w + j], &e_lp[i * bsize_w + j], + bsize_w); + } + } + /* Scale according to linear regression against SSE, for 8x8 blocks. */ + if (activity_masking) { + sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) + + (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0); + } else { + sum *= qindex >= 128 + ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128) + : qindex <= 43 ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43) + : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43); + } + + return sum; +} + +static double od_compute_dist(uint16_t *x, uint16_t *y, int bsize_w, + int bsize_h, int qindex) { int i; double sum; sum = 0; assert(bsize_w >= 8 && bsize_h >= 8); - if (qm == OD_FLAT_QM) { - for (i = 0; i < bsize_w * bsize_h; i++) { - double tmp; - tmp = x[i] - y[i]; - sum += tmp * tmp; - } - } else { +#if CONFIG_PVQ + int activity_masking = 1; +#else + int activity_masking = 0; +#endif + { int j; DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]); DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]); @@ -525,63 +719,242 @@ static double od_compute_dist(int qm, int activity_masking, od_coeff *x, e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1]; } } - for (j = 0; j < bsize_w; j++) { - e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j]; - e_lp[(bsize_h - 1) * bsize_w + j] = - mid * tmp[(bsize_h - 1) * bsize_w + j] + - 2 * tmp[(bsize_h - 2) * bsize_w + j]; - } - for (i = 1; i < bsize_h - 1; i++) { + sum = od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h, + qindex, tmp, e_lp); + } + return sum; +} + +static double od_compute_dist_diff(uint16_t *x, int16_t *e, int bsize_w, + int bsize_h, int qindex) { + int i; + double sum; + sum = 0; + + assert(bsize_w >= 8 && bsize_h >= 8); + +#if CONFIG_PVQ + int activity_masking = 1; +#else + int activity_masking = 0; +#endif + { + int j; + DECLARE_ALIGNED(16, uint16_t, y[MAX_TX_SQUARE]); + DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]); + DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]); + int mid = OD_DIST_LP_MID; + for (i = 0; i < bsize_h; i++) { for (j = 0; j < bsize_w; j++) { - e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] + - tmp[(i - 1) * bsize_w + j] + - tmp[(i + 1) * bsize_w + j]; + y[i * bsize_w + j] = x[i * bsize_w + j] - e[i * bsize_w + j]; } } - for (i = 0; i < bsize_h; i += 8) { - for (j = 0; j < bsize_w; j += 8) { - sum += od_compute_dist_8x8(qm, activity_masking, &x[i * bsize_w + j], - &y[i * bsize_w + j], &e_lp[i * bsize_w + j], - bsize_w); + for (i = 0; i < bsize_h; i++) { + tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1]; + tmp[i * bsize_w + bsize_w - 1] = + mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2]; + for (j = 1; j < bsize_w - 1; j++) { + tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + + e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1]; } } - /* Scale according to linear regression against SSE, for 8x8 blocks. */ - if (activity_masking) { - sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) + - (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0); - } else { - sum *= qindex >= 128 - ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128) - : qindex <= 43 - ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43) - : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43); - } + sum = od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h, + qindex, tmp, e_lp); } return sum; } +#endif // CONFIG_DAALA_DIST + +#if CONFIG_DIST_8X8 +#define NEW_FUTURE_DIST 0 +int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCKD *xd, + const uint8_t *src, int src_stride, const uint8_t *dst, + int dst_stride, const BLOCK_SIZE tx_bsize, int bsw, + int bsh, int visible_w, int visible_h, int qindex) { + int64_t d = 0; -int64_t av1_daala_dist(const uint8_t *src, int src_stride, const uint8_t *dst, - int dst_stride, int bsw, int bsh, int qm, - int use_activity_masking, int qindex) { +#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST int i, j; - int64_t d; - DECLARE_ALIGNED(16, od_coeff, orig[MAX_TX_SQUARE]); - DECLARE_ALIGNED(16, od_coeff, rec[MAX_TX_SQUARE]); - assert(qm == OD_HVS_QM); + DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]); + DECLARE_ALIGNED(16, uint16_t, rec[MAX_TX_SQUARE]); + (void)cpi; + (void)tx_bsize; +#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST + +#if !CONFIG_HIGHBITDEPTH + (void)xd; +#endif + +#if !CONFIG_DAALA_DIST + (void)qindex; +#endif + +#if !CONFIG_DAALA_DIST || !NEW_FUTURE_DIST + (void)xd; + (void)bsw, (void)bsh; + (void)visible_w, (void)visible_h; +#endif + +#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + for (j = 0; j < bsh; j++) + for (i = 0; i < bsw; i++) + orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; + + if ((bsw == visible_w) && (bsh == visible_h)) { + for (j = 0; j < bsh; j++) + for (i = 0; i < bsw; i++) + rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i]; + } else { + for (j = 0; j < visible_h; j++) + for (i = 0; i < visible_w; i++) + rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i]; + + if (visible_w < bsw) { + for (j = 0; j < bsh; j++) + for (i = visible_w; i < bsw; i++) + rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; + } + + if (visible_h < bsh) { + for (j = visible_h; j < bsh; j++) + for (i = 0; i < bsw; i++) + rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; + } + } + } else { +#endif + for (j = 0; j < bsh; j++) + for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i]; + + if ((bsw == visible_w) && (bsh == visible_h)) { + for (j = 0; j < bsh; j++) + for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i]; + } else { + for (j = 0; j < visible_h; j++) + for (i = 0; i < visible_w; i++) + rec[j * bsw + i] = dst[j * dst_stride + i]; - for (j = 0; j < bsh; j++) - for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i]; + if (visible_w < bsw) { + for (j = 0; j < bsh; j++) + for (i = visible_w; i < bsw; i++) + rec[j * bsw + i] = src[j * src_stride + i]; + } - for (j = 0; j < bsh; j++) - for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i]; + if (visible_h < bsh) { + for (j = visible_h; j < bsh; j++) + for (i = 0; i < bsw; i++) rec[j * bsw + i] = src[j * src_stride + i]; + } + } +#if CONFIG_HIGHBITDEPTH + } +#endif // CONFIG_HIGHBITDEPTH +#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST + +#if CONFIG_DAALA_DIST + d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex); +#elif NEW_FUTURE_DIST + // Call new 8x8-wise distortion function here, for example + for (i = 0; i < bsh; i += 8) { + for (j = 0; j < bsw; j += 8) { + d += + av1_compute_dist_8x8(&orig[i * bsw + j], &rec[i * bsw + j], bsw, bsh); + } + } +#else + // Otherwise, MSE by default + unsigned sse; + // TODO(Any): Use even faster function which does not calculate variance + cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse); + d = sse; +#endif // CONFIG_DAALA_DIST - d = (int64_t)od_compute_dist(qm, use_activity_masking, orig, rec, bsw, bsh, - qindex); return d; } + +static int64_t av1_dist_8x8_diff(const MACROBLOCKD *xd, const uint8_t *src, + int src_stride, const int16_t *diff, + int diff_stride, int bsw, int bsh, + int visible_w, int visible_h, int qindex) { + int64_t d = 0; + +#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST + int i, j; + + DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]); + DECLARE_ALIGNED(16, int16_t, diff16[MAX_TX_SQUARE]); +#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST + +#if !CONFIG_HIGHBITDEPTH + (void)xd; +#endif + +#if !CONFIG_DAALA_DIST + (void)qindex; +#endif + +#if !CONFIG_DAALA_DIST || !NEW_FUTURE_DIST + (void)xd; + (void)src, (void)src_stride; + (void)bsw, (void)bsh; + (void)visible_w, (void)visible_h; +#endif + +#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + for (j = 0; j < bsh; j++) + for (i = 0; i < bsw; i++) + orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; + } else { +#endif + for (j = 0; j < bsh; j++) + for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i]; +#if CONFIG_HIGHBITDEPTH + } +#endif // CONFIG_HIGHBITDEPTH + + if ((bsw == visible_w) && (bsh == visible_h)) { + for (j = 0; j < bsh; j++) + for (i = 0; i < bsw; i++) diff16[j * bsw + i] = diff[j * diff_stride + i]; + } else { + for (j = 0; j < visible_h; j++) + for (i = 0; i < visible_w; i++) + diff16[j * bsw + i] = diff[j * diff_stride + i]; + + if (visible_w < bsw) { + for (j = 0; j < bsh; j++) + for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0; + } + + if (visible_h < bsh) { + for (j = visible_h; j < bsh; j++) + for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0; + } + } +#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST + +#if CONFIG_DAALA_DIST + d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex); +#elif NEW_FUTURE_DIST + // Call new 8x8-wise distortion function (with diff inpu) here, for example + for (i = 0; i < bsh; i += 8) { + for (j = 0; j < bsw; j += 8) { + d += av1_compute_dist_8x8_diff(&orig[i * bsw + j], &diff16[i * bsw + j], + bsw, bsh); + } + } +#else + // Otherwise, MSE by default + d = aom_sum_squares_2d_i16(diff, diff_stride, bsw, bsh); #endif // CONFIG_DAALA_DIST + return d; +} +#endif // CONFIG_DIST_8X8 + static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize, const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, @@ -892,11 +1265,11 @@ static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize, for (plane = plane_from; plane <= plane_to; ++plane) { struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; -#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2 +#if CONFIG_CHROMA_SUB8X8 const BLOCK_SIZE bs = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd)); #else const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); -#endif // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2 +#endif // CONFIG_CHROMA_SUB8X8 unsigned int sse; int rate; @@ -1068,7 +1441,7 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, #if !CONFIG_VAR_TX && !CONFIG_SUPERTX // Check for consistency of tx_size with mode info - assert(tx_size == get_tx_size(plane, xd)); + assert(tx_size == av1_get_tx_size(plane, xd)); #endif // !CONFIG_VAR_TX && !CONFIG_SUPERTX (void)cm; @@ -1144,10 +1517,12 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, #endif // !CONFIG_LV_MAP int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, - int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order, - const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l, - int use_fast_coef_costing) { + int blk_row, int blk_col, int block, TX_SIZE tx_size, + const SCAN_ORDER *scan_order, const ENTROPY_CONTEXT *a, + const ENTROPY_CONTEXT *l, int use_fast_coef_costing) { #if !CONFIG_LV_MAP + (void)blk_row; + (void)blk_col; const AV1_COMMON *const cm = &cpi->common; return cost_coeffs(cm, x, plane, block, tx_size, scan_order, a, l, use_fast_coef_costing); @@ -1158,13 +1533,11 @@ int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const struct macroblockd_plane *pd = &xd->plane[plane]; const BLOCK_SIZE bsize = mbmi->sb_type; -#if CONFIG_CB4X4 -#if CONFIG_CHROMA_2X2 - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); -#else +#if CONFIG_CHROMA_SUB8X8 const BLOCK_SIZE plane_bsize = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd)); -#endif // CONFIG_CHROMA_2X2 +#elif CONFIG_CB4X4 + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); #else // CONFIG_CB4X4 const BLOCK_SIZE plane_bsize = get_plane_block_size(AOMMAX(BLOCK_8X8, bsize), pd); @@ -1172,7 +1545,8 @@ int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, TXB_CTX txb_ctx; get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx); - return av1_cost_coeffs_txb(cpi, x, plane, block, &txb_ctx); + return av1_cost_coeffs_txb(cpi, x, plane, blk_row, blk_col, block, tx_size, + &txb_ctx); #endif // !CONFIG_LV_MAP } #endif // !CONFIG_PVQ || CONFIG_VAR_TX @@ -1182,9 +1556,9 @@ static void get_txb_dimensions(const MACROBLOCKD *xd, int plane, BLOCK_SIZE plane_bsize, int blk_row, int blk_col, BLOCK_SIZE tx_bsize, int *width, int *height, int *visible_width, int *visible_height) { -#if !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT) +#if !(CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)) assert(tx_bsize <= plane_bsize); -#endif // !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT) +#endif int txb_height = block_size_high[tx_bsize]; int txb_width = block_size_wide[tx_bsize]; const int block_height = block_size_high[plane_bsize]; @@ -1208,19 +1582,31 @@ static void get_txb_dimensions(const MACROBLOCKD *xd, int plane, clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height); } -// Compute the pixel domain sum square error on all visible 4x4s in the +// Compute the pixel domain distortion from src and dst on all visible 4x4s in +// the // transform block. -static unsigned pixel_sse(const AV1_COMP *const cpi, const MACROBLOCKD *xd, - int plane, const uint8_t *src, const int src_stride, - const uint8_t *dst, const int dst_stride, int blk_row, - int blk_col, const BLOCK_SIZE plane_bsize, - const BLOCK_SIZE tx_bsize) { +static unsigned pixel_dist(const AV1_COMP *const cpi, const MACROBLOCK *x, + int plane, const uint8_t *src, const int src_stride, + const uint8_t *dst, const int dst_stride, + int blk_row, int blk_col, + const BLOCK_SIZE plane_bsize, + const BLOCK_SIZE tx_bsize) { int txb_rows, txb_cols, visible_rows, visible_cols; + const MACROBLOCKD *xd = &x->e_mbd; + get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, &txb_cols, &txb_rows, &visible_cols, &visible_rows); assert(visible_rows > 0); assert(visible_cols > 0); -#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT + +#if CONFIG_DIST_8X8 + if (plane == 0 && txb_cols >= 8 && txb_rows >= 8) + return av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride, tx_bsize, + txb_cols, txb_rows, visible_cols, visible_rows, + x->qindex); +#endif // CONFIG_DIST_8X8 + +#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX) if ((txb_rows == visible_rows && txb_cols == visible_cols) && tx_bsize < BLOCK_SIZES) { #else @@ -1242,36 +1628,86 @@ static unsigned pixel_sse(const AV1_COMP *const cpi, const MACROBLOCKD *xd, return sse; } -// Compute the squares sum squares on all visible 4x4s in the transform block. -static int64_t sum_squares_visible(const MACROBLOCKD *xd, int plane, - const int16_t *diff, const int diff_stride, - int blk_row, int blk_col, - const BLOCK_SIZE plane_bsize, - const BLOCK_SIZE tx_bsize) { +// Compute the pixel domain distortion from diff on all visible 4x4s in the +// transform block. +static int64_t pixel_diff_dist(const MACROBLOCK *x, int plane, + const int16_t *diff, const int diff_stride, + int blk_row, int blk_col, + const BLOCK_SIZE plane_bsize, + const BLOCK_SIZE tx_bsize) { int visible_rows, visible_cols; + const MACROBLOCKD *xd = &x->e_mbd; +#if CONFIG_DIST_8X8 + int txb_height = block_size_high[tx_bsize]; + int txb_width = block_size_wide[tx_bsize]; + const int src_stride = x->plane[plane].src.stride; + const int src_idx = (blk_row * src_stride + blk_col) << tx_size_wide_log2[0]; + const uint8_t *src = &x->plane[plane].src.buf[src_idx]; +#endif + get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL, NULL, &visible_cols, &visible_rows); - return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, visible_rows); + +#if CONFIG_DIST_8X8 + if (plane == 0 && txb_width >= 8 && txb_height >= 8) + return av1_dist_8x8_diff(xd, src, src_stride, diff, diff_stride, txb_width, + txb_height, visible_cols, visible_rows, x->qindex); + else +#endif + return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, + visible_rows); +} + +#if CONFIG_PALETTE || CONFIG_INTRABC +int av1_count_colors(const uint8_t *src, int stride, int rows, int cols) { + int val_count[256]; + memset(val_count, 0, sizeof(val_count)); + for (int r = 0; r < rows; ++r) { + for (int c = 0; c < cols; ++c) { + ++val_count[src[r * stride + c]]; + } + } + int n = 0; + for (int i = 0; i < 256; ++i) { + if (val_count[i]) ++n; + } + return n; } +#if CONFIG_HIGHBITDEPTH +int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols, + int bit_depth) { + assert(bit_depth <= 12); + const uint16_t *src = CONVERT_TO_SHORTPTR(src8); + int val_count[1 << 12]; + memset(val_count, 0, (1 << 12) * sizeof(val_count[0])); + for (int r = 0; r < rows; ++r) { + for (int c = 0; c < cols; ++c) { + ++val_count[src[r * stride + c]]; + } + } + int n = 0; + for (int i = 0; i < (1 << bit_depth); ++i) { + if (val_count[i]) ++n; + } + return n; +} +#endif // CONFIG_HIGHBITDEPTH +#endif // CONFIG_PALETTE || CONFIG_INTRABC + void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col, TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse, OUTPUT_STATUS output_status) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; -#if CONFIG_DAALA_DIST - int qm = OD_HVS_QM; - int use_activity_masking = 0; -#if CONFIG_PVQ - use_activity_masking = x->daala_enc.use_activity_masking; -#endif // CONFIG_PVQ +#if CONFIG_DIST_8X8 struct macroblockd_plane *const pd = &xd->plane[plane]; -#else // CONFIG_DAALA_DIST +#else // CONFIG_DIST_8X8 const struct macroblockd_plane *const pd = &xd->plane[plane]; -#endif // CONFIG_DAALA_DIST +#endif // CONFIG_DIST_8X8 - if (cpi->sf.use_transform_domain_distortion && !CONFIG_DAALA_DIST) { + if (cpi->sf.use_transform_domain_distortion && !CONFIG_DIST_8X8) { // Transform domain distortion computation is more efficient as it does // not involve an inverse transform, but it is less accurate. const int buffer_length = tx_size_2d[tx_size]; @@ -1292,19 +1728,21 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, &this_sse) >> shift; #endif // CONFIG_HIGHBITDEPTH -#elif CONFIG_HIGHBITDEPTH - const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8; - *out_dist = - av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse, bd) >> - shift; -#else - *out_dist = - av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift; +#else // !CONFIG_PVQ +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + *out_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length, + &this_sse, xd->bd) >> + shift; + else +#endif + *out_dist = + av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift; #endif // CONFIG_PVQ *out_sse = this_sse >> shift; } else { const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; -#if !CONFIG_PVQ || CONFIG_DAALA_DIST +#if !CONFIG_PVQ || CONFIG_DIST_8X8 const int bsw = block_size_wide[tx_bsize]; const int bsh = block_size_high[tx_bsize]; #endif @@ -1323,34 +1761,13 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, assert(cpi != NULL); assert(tx_size_wide_log2[0] == tx_size_high_log2[0]); -#if CONFIG_DAALA_DIST - if (plane == 0 && bsw >= 8 && bsh >= 8) { - if (output_status == OUTPUT_HAS_DECODED_PIXELS) { - const int pred_stride = block_size_wide[plane_bsize]; - const int pred_idx = (blk_row * pred_stride + blk_col) - << tx_size_wide_log2[0]; - const int16_t *pred = &pd->pred[pred_idx]; - int i, j; - DECLARE_ALIGNED(16, uint8_t, pred8[MAX_TX_SQUARE]); - - for (j = 0; j < bsh; j++) - for (i = 0; i < bsw; i++) - pred8[j * bsw + i] = pred[j * pred_stride + i]; - *out_sse = av1_daala_dist(src, src_stride, pred8, bsw, bsw, bsh, qm, - use_activity_masking, x->qindex); - } else { - *out_sse = av1_daala_dist(src, src_stride, dst, dst_stride, bsw, bsh, - qm, use_activity_masking, x->qindex); - } - } else -#endif // CONFIG_DAALA_DIST { const int diff_stride = block_size_wide[plane_bsize]; const int diff_idx = (blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]; const int16_t *diff = &p->src_diff[diff_idx]; - *out_sse = sum_squares_visible(xd, plane, diff, diff_stride, blk_row, - blk_col, plane_bsize, tx_bsize); + *out_sse = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col, + plane_bsize, tx_bsize); #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) *out_sse = ROUND_POWER_OF_TWO(*out_sse, (xd->bd - 8) * 2); @@ -1360,15 +1777,8 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, if (eob) { if (output_status == OUTPUT_HAS_DECODED_PIXELS) { -#if CONFIG_DAALA_DIST - if (plane == 0 && bsw >= 8 && bsh >= 8) - *out_dist = av1_daala_dist(src, src_stride, dst, dst_stride, bsw, bsh, - qm, use_activity_masking, x->qindex); - else -#endif // CONFIG_DAALA_DIST - *out_dist = - pixel_sse(cpi, xd, plane, src, src_stride, dst, dst_stride, - blk_row, blk_col, plane_bsize, tx_bsize); + *out_dist = pixel_dist(cpi, x, plane, src, src_stride, dst, dst_stride, + blk_row, blk_col, plane_bsize, tx_bsize); } else { #if CONFIG_HIGHBITDEPTH uint8_t *recon; @@ -1399,37 +1809,44 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, #endif // !CONFIG_PVQ const PLANE_TYPE plane_type = get_plane_type(plane); - TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); - - av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, recon, - MAX_TX_SIZE, eob); - -#if CONFIG_DAALA_DIST - if (plane == 0 && bsw >= 8 && bsh >= 8) { - *out_dist = av1_daala_dist(src, src_stride, recon, MAX_TX_SIZE, bsw, - bsh, qm, use_activity_masking, x->qindex); - } else { - if (plane == 0) { - // Save decoded pixels for inter block in pd->pred to avoid - // block_8x8_rd_txfm_daala_dist() need to produce them - // by calling av1_inverse_transform_block() again. - const int pred_stride = block_size_wide[plane_bsize]; - const int pred_idx = (blk_row * pred_stride + blk_col) - << tx_size_wide_log2[0]; - int16_t *pred = &pd->pred[pred_idx]; - int i, j; + TX_TYPE tx_type = + av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size); + av1_inverse_transform_block(xd, dqcoeff, +#if CONFIG_LGT + xd->mi[0]->mbmi.mode, +#endif + tx_type, tx_size, recon, MAX_TX_SIZE, eob); + +#if CONFIG_DIST_8X8 + if (plane == 0 && (bsw < 8 || bsh < 8)) { + // Save decoded pixels for inter block in pd->pred to avoid + // block_8x8_rd_txfm_daala_dist() need to produce them + // by calling av1_inverse_transform_block() again. + const int pred_stride = block_size_wide[plane_bsize]; + const int pred_idx = (blk_row * pred_stride + blk_col) + << tx_size_wide_log2[0]; + int16_t *pred = &pd->pred[pred_idx]; + int i, j; +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + for (j = 0; j < bsh; j++) + for (i = 0; i < bsw; i++) + pred[j * pred_stride + i] = + CONVERT_TO_SHORTPTR(recon)[j * MAX_TX_SIZE + i]; + } else { +#endif for (j = 0; j < bsh; j++) for (i = 0; i < bsw; i++) pred[j * pred_stride + i] = recon[j * MAX_TX_SIZE + i]; +#if CONFIG_HIGHBITDEPTH } -#endif // CONFIG_DAALA_DIST - *out_dist = - pixel_sse(cpi, xd, plane, src, src_stride, recon, MAX_TX_SIZE, - blk_row, blk_col, plane_bsize, tx_bsize); -#if CONFIG_DAALA_DIST +#endif // CONFIG_HIGHBITDEPTH } -#endif // CONFIG_DAALA_DIST +#endif // CONFIG_DIST_8X8 + *out_dist = + pixel_dist(cpi, x, plane, src, src_stride, recon, MAX_TX_SIZE, + blk_row, blk_col, plane_bsize, tx_bsize); } *out_dist *= 16; } else { @@ -1453,33 +1870,25 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, int64_t rd1, rd2, rd; RD_STATS this_rd_stats; - assert(tx_size == get_tx_size(plane, xd)); +#if !CONFIG_SUPERTX && !CONFIG_VAR_TX + assert(tx_size == av1_get_tx_size(plane, xd)); +#endif // !CONFIG_SUPERTX av1_init_rd_stats(&this_rd_stats); if (args->exit_early) return; if (!is_inter_block(mbmi)) { -#if CONFIG_CFL - -#if CONFIG_EC_ADAPT - FRAME_CONTEXT *const ec_ctx = xd->tile_ctx; -#else - FRAME_CONTEXT *const ec_ctx = cm->fc; -#endif // CONFIG_EC_ADAPT - - av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col, - blk_row, tx_size, plane_bsize); -#else av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size); -#endif #if CONFIG_DPCM_INTRA const int block_raster_idx = av1_block_index_to_raster_order(tx_size, block); - const PREDICTION_MODE mode = - (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode; - TX_TYPE tx_type = get_tx_type((plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV, - xd, block, tx_size); + const PREDICTION_MODE mode = (plane == AOM_PLANE_Y) + ? get_y_mode(xd->mi[0], block_raster_idx) + : get_uv_mode(mbmi->uv_mode); + TX_TYPE tx_type = + av1_get_tx_type((plane == AOM_PLANE_Y) ? PLANE_TYPE_Y : PLANE_TYPE_UV, + xd, blk_row, blk_col, block, tx_size); if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) { int8_t skip; av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col, @@ -1496,9 +1905,36 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, #if !CONFIG_TXK_SEL // full forward transform and quantization const int coeff_ctx = combine_entropy_contexts(*a, *l); +#if DISABLE_TRELLISQ_SEARCH + av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, + coeff_ctx, AV1_XFORM_QUANT_B); +#else av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP); - av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l); + + const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2; + tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); + tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); + const int buffer_length = tx_size_2d[tx_size]; + int64_t tmp_dist; + int64_t tmp; +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + tmp_dist = + av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd) >> + shift; + else +#endif + tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp) >> shift; + + if (RDCOST(x->rdmult, 0, tmp_dist) + args->this_rd < args->best_rd) { + av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size, + a, l); + } else { + args->exit_early = 1; + return; + } +#endif // DISABLE_TRELLISQ_SEARCH if (!is_inter_block(mbmi)) { struct macroblock_plane *const p = &x->plane[plane]; @@ -1518,25 +1954,27 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, const int dst_stride = pd->dst.stride; uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; - cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size); + // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is + // intra predicted. + cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size, plane_bsize); } #endif #if CONFIG_DPCM_INTRA CALCULATE_RD : {} #endif // CONFIG_DPCM_INTRA - rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist); + rd = RDCOST(x->rdmult, 0, this_rd_stats.dist); if (args->this_rd + rd > args->best_rd) { args->exit_early = 1; return; } #if !CONFIG_PVQ const PLANE_TYPE plane_type = get_plane_type(plane); - const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); - const SCAN_ORDER *scan_order = - get_scan(cm, tx_size, tx_type, is_inter_block(mbmi)); + const TX_TYPE tx_type = + av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size); + const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, mbmi); this_rd_stats.rate = - av1_cost_coeffs(cpi, x, plane, block, tx_size, scan_order, a, l, - args->use_fast_coef_costing); + av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size, + scan_order, a, l, args->use_fast_coef_costing); #else // !CONFIG_PVQ this_rd_stats.rate = x->rate; #endif // !CONFIG_PVQ @@ -1554,22 +1992,12 @@ CALCULATE_RD : {} av1_set_txb_context(x, plane, block, tx_size, a, l); #endif // !CONFIG_PVQ - rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist); - rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse); + rd1 = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist); + rd2 = RDCOST(x->rdmult, 0, this_rd_stats.sse); // TODO(jingning): temporarily enabled only for luma component rd = AOMMIN(rd1, rd2); -#if CONFIG_DAALA_DIST - if (plane == 0 && plane_bsize >= BLOCK_8X8 && - (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4)) { - this_rd_stats.dist = 0; - this_rd_stats.sse = 0; - rd = 0; - x->rate_4x4[block] = this_rd_stats.rate; - } -#endif // CONFIG_DAALA_DIST - #if !CONFIG_PVQ this_rd_stats.skip &= !x->plane[plane].eobs[block]; #else @@ -1579,113 +2007,93 @@ CALCULATE_RD : {} args->this_rd += rd; - if (args->this_rd > args->best_rd) { - args->exit_early = 1; - return; +#if CONFIG_DIST_8X8 + if (!(plane == 0 && plane_bsize >= BLOCK_8X8 && + (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))) { +#endif + if (args->this_rd > args->best_rd) { + args->exit_early = 1; + return; + } +#if CONFIG_DIST_8X8 } +#endif } -#if CONFIG_DAALA_DIST -static void block_8x8_rd_txfm_daala_dist(int plane, int block, int blk_row, - int blk_col, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { - struct rdcost_block_args *args = arg; - MACROBLOCK *const x = args->x; +#if CONFIG_DIST_8X8 +static void dist_8x8_sub8x8_txfm_rd(const AV1_COMP *const cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, + struct rdcost_block_args *args) { MACROBLOCKD *const xd = &x->e_mbd; + const struct macroblockd_plane *const pd = &xd->plane[0]; + const struct macroblock_plane *const p = &x->plane[0]; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - int64_t rd, rd1, rd2; - RD_STATS this_rd_stats; - int qm = OD_HVS_QM; - int use_activity_masking = 0; - - (void)tx_size; - - assert(plane == 0); - assert(plane_bsize >= BLOCK_8X8); -#if CONFIG_PVQ - use_activity_masking = x->daala_enc.use_activity_masking; -#endif // CONFIG_PVQ - av1_init_rd_stats(&this_rd_stats); + const int src_stride = p->src.stride; + const int dst_stride = pd->dst.stride; + const uint8_t *src = &p->src.buf[0]; + const uint8_t *dst = &pd->dst.buf[0]; + const int16_t *pred = &pd->pred[0]; + const int bw = block_size_wide[bsize]; + const int bh = block_size_high[bsize]; - if (args->exit_early) return; + int i, j; + int64_t rd, rd1, rd2; + unsigned int tmp1, tmp2; + int qindex = x->qindex; - { - const struct macroblock_plane *const p = &x->plane[plane]; - struct macroblockd_plane *const pd = &xd->plane[plane]; + assert((bw & 0x07) == 0); + assert((bh & 0x07) == 0); - const int src_stride = p->src.stride; - const int dst_stride = pd->dst.stride; - const int diff_stride = block_size_wide[plane_bsize]; +#if CONFIG_HIGHBITDEPTH + uint8_t *pred8; + DECLARE_ALIGNED(16, uint16_t, pred16[MAX_TX_SQUARE]); - const uint8_t *src = - &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]]; - const uint8_t *dst = - &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + pred8 = CONVERT_TO_BYTEPTR(pred16); + else + pred8 = (uint8_t *)pred16; +#else + DECLARE_ALIGNED(16, uint8_t, pred8[MAX_TX_SQUARE]); +#endif // CONFIG_HIGHBITDEPTH - unsigned int tmp1, tmp2; - int qindex = x->qindex; - const int pred_stride = block_size_wide[plane_bsize]; - const int pred_idx = (blk_row * pred_stride + blk_col) - << tx_size_wide_log2[0]; - int16_t *pred = &pd->pred[pred_idx]; - int i, j; - const int tx_blk_size = 8; - - DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]); - - for (j = 0; j < tx_blk_size; j++) - for (i = 0; i < tx_blk_size; i++) - pred8[j * tx_blk_size + i] = pred[j * diff_stride + i]; - - tmp1 = av1_daala_dist(src, src_stride, pred8, tx_blk_size, 8, 8, qm, - use_activity_masking, qindex); - tmp2 = av1_daala_dist(src, src_stride, dst, dst_stride, 8, 8, qm, - use_activity_masking, qindex); - - if (!is_inter_block(mbmi)) { - this_rd_stats.sse = (int64_t)tmp1 * 16; - this_rd_stats.dist = (int64_t)tmp2 * 16; - } else { - // For inter mode, the decoded pixels are provided in pd->pred, - // while the predicted pixels are in dst. - this_rd_stats.sse = (int64_t)tmp2 * 16; - this_rd_stats.dist = (int64_t)tmp1 * 16; - } +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + for (j = 0; j < bh; j++) + for (i = 0; i < bw; i++) + CONVERT_TO_SHORTPTR(pred8)[j * bw + i] = pred[j * bw + i]; + } else { +#endif + for (j = 0; j < bh; j++) + for (i = 0; i < bw; i++) pred8[j * bw + i] = pred[j * bw + i]; +#if CONFIG_HIGHBITDEPTH } +#endif // CONFIG_HIGHBITDEPTH - rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist); - if (args->this_rd + rd > args->best_rd) { - args->exit_early = 1; - return; + tmp1 = av1_dist_8x8(cpi, xd, src, src_stride, pred8, bw, bsize, bw, bh, bw, + bh, qindex); + tmp2 = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride, bsize, bw, bh, + bw, bh, qindex); + + if (!is_inter_block(mbmi)) { + args->rd_stats.sse = (int64_t)tmp1 * 16; + args->rd_stats.dist = (int64_t)tmp2 * 16; + } else { + // For inter mode, the decoded pixels are provided in pd->pred, + // while the predicted pixels are in dst. + args->rd_stats.sse = (int64_t)tmp2 * 16; + args->rd_stats.dist = (int64_t)tmp1 * 16; } - { - const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane); - const uint8_t txw_unit = tx_size_wide_unit[tx_size]; - const uint8_t txh_unit = tx_size_high_unit[tx_size]; - const int step = txw_unit * txh_unit; - int offset_h = tx_size_high_unit[TX_4X4]; - // The rate of the current 8x8 block is the sum of four 4x4 blocks in it. - this_rd_stats.rate = - x->rate_4x4[block - max_blocks_wide * offset_h - step] + - x->rate_4x4[block - max_blocks_wide * offset_h] + - x->rate_4x4[block - step] + x->rate_4x4[block]; - } - rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist); - rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse); + rd1 = RDCOST(x->rdmult, args->rd_stats.rate, args->rd_stats.dist); + rd2 = RDCOST(x->rdmult, 0, args->rd_stats.sse); rd = AOMMIN(rd1, rd2); - args->rd_stats.dist += this_rd_stats.dist; - args->rd_stats.sse += this_rd_stats.sse; - - args->this_rd += rd; + args->rd_stats.rdcost = rd; + args->this_rd = rd; - if (args->this_rd > args->best_rd) { - args->exit_early = 1; - return; - } + if (args->this_rd > args->best_rd) args->exit_early = 1; } -#endif // CONFIG_DAALA_DIST +#endif // CONFIG_DIST_8X8 static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi, RD_STATS *rd_stats, int64_t ref_best_rd, int plane, @@ -1705,15 +2113,13 @@ static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi, av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); -#if CONFIG_DAALA_DIST - if (plane == 0 && bsize >= BLOCK_8X8 && + av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm, + &args); +#if CONFIG_DIST_8X8 + if (!args.exit_early && plane == 0 && bsize >= BLOCK_8X8 && (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4)) - av1_foreach_8x8_transformed_block_in_yplane( - xd, bsize, block_rd_txfm, block_8x8_rd_txfm_daala_dist, &args); - else -#endif // CONFIG_DAALA_DIST - av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm, - &args); + dist_8x8_sub8x8_txfm_rd(cpi, x, bsize, &args); +#endif if (args.exit_early) { av1_invalid_rd_stats(rd_stats); @@ -1768,8 +2174,14 @@ static int tx_size_cost(const AV1_COMP *const cpi, const MACROBLOCK *const x, const MACROBLOCKD *const xd = &x->e_mbd; const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - const int tx_select = - cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8; + const int tx_select = cm->tx_mode == TX_MODE_SELECT && +#if CONFIG_EXT_PARTITION_TYPES + // Currently these block shapes can only use 4x4 + // transforms + mbmi->sb_type != BLOCK_4X16 && + mbmi->sb_type != BLOCK_16X4 && +#endif + mbmi->sb_type >= BLOCK_8X8; if (tx_select) { const int is_inter = is_inter_block(mbmi); @@ -1779,11 +2191,11 @@ static int tx_size_cost(const AV1_COMP *const cpi, const MACROBLOCK *const x, const int depth = tx_size_to_depth(coded_tx_size); const int tx_size_ctx = get_tx_size_context(xd); int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth]; -#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX) if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size) r_tx_size += av1_cost_bit(cm->fc->quarter_tx_size_prob, tx_size == quarter_txsize_lookup[bsize]); -#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#endif return r_tx_size; } else { return 0; @@ -1796,6 +2208,10 @@ int av1_tx_type_cost(const AV1_COMP *cpi, const MACROBLOCKD *xd, TX_TYPE tx_type) { if (plane > 0) return 0; +#if CONFIG_VAR_TX + tx_size = get_min_tx_size(tx_size); +#endif + const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const int is_inter = is_inter_block(mbmi); #if CONFIG_EXT_TX @@ -1844,6 +2260,9 @@ static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, const int r_tx_size = tx_size_cost(cpi, x, bs, tx_size); +#if CONFIG_PVQ + assert(tx_size >= TX_4X4); +#endif // CONFIG_PVQ assert(skip_prob > 0); #if CONFIG_EXT_TX && CONFIG_RECT_TX assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed_bsize(bs))); @@ -1864,21 +2283,20 @@ static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, if (rd_stats->skip) { if (is_inter) { - rd = RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse); + rd = RDCOST(x->rdmult, s1, rd_stats->sse); } else { - rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select, - rd_stats->sse); + rd = RDCOST(x->rdmult, s1 + r_tx_size * tx_select, rd_stats->sse); } } else { - rd = RDCOST(x->rdmult, x->rddiv, - rd_stats->rate + s0 + r_tx_size * tx_select, rd_stats->dist); + rd = RDCOST(x->rdmult, rd_stats->rate + s0 + r_tx_size * tx_select, + rd_stats->dist); } if (tx_select) rd_stats->rate += r_tx_size; if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !(rd_stats->skip)) - rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse)); + rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse)); return rd; } @@ -1895,6 +2313,11 @@ static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs, // transforms should be considered for pruning prune = prune_tx_types(cpi, bs, x, xd, -1); +#if CONFIG_MRC_TX + // MRC_DCT only implemented for TX_32X32 so only include this tx in + // the search for TX_32X32 + if (tx_type == MRC_DCT && tx_size != TX_32X32) return 1; +#endif // CONFIG_MRC_TX if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) return 1; if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, tx_size)) return 1; @@ -1929,7 +2352,8 @@ static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs, return 0; } -#if CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT) +#if CONFIG_EXT_INTER && \ + (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT || CONFIG_INTERINTRA) static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs, MACROBLOCK *x, int *r, int64_t *d, int *s, int64_t *sse, int64_t ref_best_rd) { @@ -2020,14 +2444,13 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x, av1_tx_type_cost(cpi, xd, bs, plane, mbmi->tx_size, tx_type); if (this_rd_stats.skip) - this_rd = RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse); + this_rd = RDCOST(x->rdmult, s1, this_rd_stats.sse); else - this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + s0, - this_rd_stats.dist); + this_rd = + RDCOST(x->rdmult, this_rd_stats.rate + s0, this_rd_stats.dist); if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] && !this_rd_stats.skip) - this_rd = - AOMMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse)); + this_rd = AOMMIN(this_rd, RDCOST(x->rdmult, s1, this_rd_stats.sse)); if (this_rd < best_rd) { best_rd = this_rd; @@ -2068,13 +2491,12 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x, continue; } if (this_rd_stats.skip) - this_rd = RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse); + this_rd = RDCOST(x->rdmult, s1, this_rd_stats.sse); else - this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + s0, - this_rd_stats.dist); - if (is_inter && !xd->lossless[mbmi->segment_id] && !this_rd_stats.skip) this_rd = - AOMMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse)); + RDCOST(x->rdmult, this_rd_stats.rate + s0, this_rd_stats.dist); + if (is_inter && !xd->lossless[mbmi->segment_id] && !this_rd_stats.skip) + this_rd = AOMMIN(this_rd, RDCOST(x->rdmult, s1, this_rd_stats.sse)); if (this_rd < best_rd) { best_rd = this_rd; @@ -2129,7 +2551,6 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi, TX_TYPE best_tx_type = DCT_DCT; #if CONFIG_TXK_SEL TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; - const int num_blk = bsize_to_num_blk(bs); #endif // CONFIG_TXK_SEL const int tx_select = cm->tx_mode == TX_MODE_SELECT; const int is_inter = is_inter_block(mbmi); @@ -2171,8 +2592,7 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi, rect_tx_size); if (rd < best_rd) { #if CONFIG_TXK_SEL - memcpy(best_txk_type, mbmi->txk_type, - sizeof(best_txk_type[0]) * num_blk); + memcpy(best_txk_type, mbmi->txk_type, sizeof(best_txk_type[0]) * 256); #endif best_tx_type = tx_type; best_tx_size = rect_tx_size; @@ -2278,8 +2698,7 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi, last_rd = rd; if (rd < best_rd) { #if CONFIG_TXK_SEL - memcpy(best_txk_type, mbmi->txk_type, - sizeof(best_txk_type[0]) * num_blk); + memcpy(best_txk_type, mbmi->txk_type, sizeof(best_txk_type[0]) * 256); #endif best_tx_type = tx_type; best_tx_size = n; @@ -2295,7 +2714,7 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi, mbmi->tx_size = best_tx_size; mbmi->tx_type = best_tx_type; #if CONFIG_TXK_SEL - memcpy(mbmi->txk_type, best_txk_type, sizeof(best_txk_type[0]) * num_blk); + memcpy(mbmi->txk_type, best_txk_type, sizeof(best_txk_type[0]) * 256); #endif #if CONFIG_VAR_TX @@ -2366,21 +2785,7 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x, int block = 0; for (row = 0; row < max_blocks_high; row += stepr) { for (col = 0; col < max_blocks_wide; col += stepc) { -#if CONFIG_CFL - const struct macroblockd_plane *const pd = &xd->plane[0]; - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); - -#if CONFIG_EC_ADAPT - FRAME_CONTEXT *const ec_ctx = xd->tile_ctx; -#else - FRAME_CONTEXT *const ec_ctx = cpi->common.fc; -#endif // CONFIG_EC_ADAPT - - av1_predict_intra_block_encoder_facade(x, ec_ctx, 0, block, col, row, - tx_size, plane_bsize); -#else av1_predict_intra_block_facade(xd, 0, block, col, row, tx_size); -#endif block += step; } } @@ -2388,7 +2793,8 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x, model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &this_rd_stats.rate, &this_rd_stats.dist, &this_rd_stats.skip, &temp_sse); #if CONFIG_EXT_INTRA - if (av1_is_directional_mode(mbmi->mode, bsize)) { + if (av1_is_directional_mode(mbmi->mode, bsize) && + av1_use_angle_delta(bsize)) { mode_cost += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, MAX_ANGLE_DELTA + mbmi->angle_delta[0]); } @@ -2405,8 +2811,8 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x, } } #endif // CONFIG_FILTER_INTRA - this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + mode_cost, - this_rd_stats.dist); + this_rd = + RDCOST(x->rdmult, this_rd_stats.rate + mode_cost, this_rd_stats.dist); return this_rd; } @@ -2620,7 +3026,7 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x, super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd); if (tokenonly_rd_stats.rate == INT_MAX) continue; this_rate = tokenonly_rd_stats.rate + palette_mode_cost; - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist); + this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); if (!xd->lossless[mbmi->segment_id] && mbmi->sb_type >= BLOCK_8X8) { tokenonly_rd_stats.rate -= tx_size_cost(cpi, x, bsize, mbmi->tx_size); } @@ -2773,15 +3179,17 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( src_stride, dst, dst_stride, xd->bd); #endif if (is_lossless) { - TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size); - const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0); + TX_TYPE tx_type = + av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size); + const SCAN_ORDER *scan_order = + get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi); const int coeff_ctx = combine_entropy_contexts(tempa[idx], templ[idy]); #if !CONFIG_PVQ av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP); - ratey += av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order, - tempa + idx, templ + idy, + ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size, + scan_order, tempa + idx, templ + idy, cpi->sf.use_fast_coef_costing); skip = (p->eobs[block] == 0); can_skip &= skip; @@ -2806,28 +3214,38 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( templ[idy] = !skip; can_skip &= skip; #endif - if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) + if (RDCOST(x->rdmult, ratey, distortion) >= best_rd) goto next_highbd; #if CONFIG_PVQ if (!skip) #endif av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block), +#if CONFIG_LGT + mode, +#endif DCT_DCT, tx_size, dst, dst_stride, p->eobs[block]); } else { int64_t dist; unsigned int tmp; - TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size); - const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0); + TX_TYPE tx_type = + av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size); + const SCAN_ORDER *scan_order = + get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi); const int coeff_ctx = combine_entropy_contexts(tempa[idx], templ[idy]); #if !CONFIG_PVQ +#if DISABLE_TRELLISQ_SEARCH + av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8, + tx_size, coeff_ctx, AV1_XFORM_QUANT_B); +#else av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP); - av1_optimize_b(cm, x, 0, block, BLOCK_8X8, tx_size, tempa + idx, - templ + idy); - ratey += av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order, - tempa + idx, templ + idy, + av1_optimize_b(cm, x, 0, 0, 0, block, BLOCK_8X8, tx_size, + tempa + idx, templ + idy); +#endif // DISABLE_TRELLISQ_SEARCH + ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size, + scan_order, tempa + idx, templ + idy, cpi->sf.use_fast_coef_costing); skip = (p->eobs[block] == 0); can_skip &= skip; @@ -2855,19 +3273,22 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( if (!skip) #endif av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block), +#if CONFIG_LGT + mode, +#endif tx_type, tx_size, dst, dst_stride, p->eobs[block]); cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp); dist = (int64_t)tmp << 4; distortion += dist; - if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) + if (RDCOST(x->rdmult, ratey, distortion) >= best_rd) goto next_highbd; } } } rate += ratey; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); + this_rd = RDCOST(x->rdmult, rate, distortion); if (this_rd < best_rd) { *bestrate = rate; @@ -2966,14 +3387,24 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( aom_subtract_block(tx_height, tx_width, src_diff, 8, src, src_stride, dst, dst_stride); #endif // !CONFIG_PVQ - - TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size); - const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0); + TX_TYPE tx_type = + av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size); + const SCAN_ORDER *scan_order = + get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi); const int coeff_ctx = combine_entropy_contexts(tempa[idx], templ[idy]); #if CONFIG_CB4X4 block = 4 * block; #endif // CONFIG_CB4X4 #if !CONFIG_PVQ +#if DISABLE_TRELLISQ_SEARCH + av1_xform_quant(cm, x, 0, block, +#if CONFIG_CB4X4 + 2 * (row + idy), 2 * (col + idx), +#else + row + idy, col + idx, +#endif // CONFIG_CB4X4 + BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_B); +#else const AV1_XFORM_QUANT xform_quant = is_lossless ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP; av1_xform_quant(cm, x, 0, block, @@ -2984,12 +3415,12 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( #endif // CONFIG_CB4X4 BLOCK_8X8, tx_size, coeff_ctx, xform_quant); - av1_optimize_b(cm, x, 0, block, BLOCK_8X8, tx_size, tempa + idx, + av1_optimize_b(cm, x, 0, 0, 0, block, BLOCK_8X8, tx_size, tempa + idx, templ + idy); - - ratey += - av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order, tempa + idx, - templ + idy, cpi->sf.use_fast_coef_costing); +#endif // DISABLE_TRELLISQ_SEARCH + ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size, scan_order, + tempa + idx, templ + idy, + cpi->sf.use_fast_coef_costing); skip = (p->eobs[block] == 0); can_skip &= skip; tempa[idx] = !skip; @@ -3028,6 +3459,9 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( if (!skip) #endif // CONFIG_PVQ av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block), +#if CONFIG_LGT + mode, +#endif tx_type, tx_size, dst, dst_stride, p->eobs[block]); unsigned int tmp; @@ -3036,14 +3470,16 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( distortion += dist; } - if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) - goto next; + if (RDCOST(x->rdmult, ratey, distortion) >= best_rd) goto next; if (is_lossless) { // Calculate inverse txfm *after* RD cost. #if CONFIG_PVQ if (!skip) #endif // CONFIG_PVQ av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block), +#if CONFIG_LGT + mode, +#endif DCT_DCT, tx_size, dst, dst_stride, p->eobs[block]); } @@ -3051,7 +3487,7 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( } rate += ratey; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); + this_rd = RDCOST(x->rdmult, rate, distortion); if (this_rd < best_rd) { *bestrate = rate; @@ -3153,9 +3589,9 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi, cpi, mb, idy, idx, &best_mode, bmode_costs, xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r, &ry, &d, bsize, tx_size, y_skip, best_rd - total_rd); -#if !CONFIG_DAALA_DIST +#if !CONFIG_DIST_8X8 if (this_rd >= best_rd - total_rd) return INT64_MAX; -#endif // !CONFIG_DAALA_DIST +#endif // !CONFIG_DIST_8X8 total_rd += this_rd; cost += r; total_distortion += d; @@ -3172,7 +3608,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi, } mbmi->mode = mic->bmi[3].as_mode; -#if CONFIG_DAALA_DIST +#if CONFIG_DIST_8X8 { const struct macroblock_plane *p = &mb->plane[0]; const struct macroblockd_plane *pd = &xd->plane[0]; @@ -3180,18 +3616,16 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi, const int dst_stride = pd->dst.stride; uint8_t *src = p->src.buf; uint8_t *dst = pd->dst.buf; - int use_activity_masking = 0; - int qm = OD_HVS_QM; #if CONFIG_PVQ use_activity_masking = mb->daala_enc.use_activity_masking; #endif // CONFIG_PVQ // Daala-defined distortion computed for the block of 8x8 pixels - total_distortion = av1_daala_dist(src, src_stride, dst, dst_stride, 8, 8, - qm, use_activity_masking, mb->qindex) + total_distortion = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride, + BLOCK_8X8, 8, 8, 8, 8, mb->qindex) << 4; } -#endif // CONFIG_DAALA_DIST +#endif // CONFIG_DIST_8X8 // Add in the cost of the transform type if (!is_lossless) { int rate_tx_type = 0; @@ -3218,7 +3652,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi, *rate_y = tot_rate_y; *distortion = total_distortion; - return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion); + return RDCOST(mb->rdmult, cost, total_distortion); } #if CONFIG_FILTER_INTRA @@ -3261,7 +3695,7 @@ static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x, this_rate = tokenonly_rd_stats.rate + av1_cost_bit(cpi->common.fc->filter_intra_probs[0], 1) + write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost; - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist); + this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); if (this_rd < *best_rd) { *best_rd = this_rd; @@ -3321,7 +3755,7 @@ static int64_t calc_rd_given_intra_angle( this_rate = tokenonly_rd_stats.rate + mode_cost + write_uniform_cost(2 * max_angle_delta + 1, mbmi->angle_delta[0] + max_angle_delta); - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist); + this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); if (this_rd < *best_rd) { *best_rd = this_rd; @@ -3496,8 +3930,8 @@ static void angle_estimation(const uint8_t *src, int src_stride, int rows, uint8_t *directional_mode_skip_mask) { memset(directional_mode_skip_mask, 0, INTRA_MODES * sizeof(*directional_mode_skip_mask)); - // Sub-8x8 blocks do not use extra directions. - if (bsize < BLOCK_8X8) return; + // Check if angle_delta is used + if (!av1_use_angle_delta(bsize)) return; uint64_t hist[DIRECTIONAL_MODES]; memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0])); src += src_stride; @@ -3551,8 +3985,8 @@ static void highbd_angle_estimation(const uint8_t *src8, int src_stride, uint8_t *directional_mode_skip_mask) { memset(directional_mode_skip_mask, 0, INTRA_MODES * sizeof(*directional_mode_skip_mask)); - // Sub-8x8 blocks do not use extra directions. - if (bsize < BLOCK_8X8) return; + // Check if angle_delta is used + if (!av1_use_angle_delta(bsize)) return; uint16_t *src = CONVERT_TO_SHORTPTR(src8); uint64_t hist[DIRECTIONAL_MODES]; memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0])); @@ -3608,7 +4042,6 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, BLOCK_SIZE bsize, int64_t best_rd) { - uint8_t mode_idx; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mic = xd->mi[0]; MB_MODE_INFO *const mbmi = &mic->mbmi; @@ -3683,7 +4116,7 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, x->use_default_intra_tx_type = 0; /* Y Search for intra prediction mode */ - for (mode_idx = DC_PRED; mode_idx <= FINAL_MODE_SEARCH; ++mode_idx) { + for (int mode_idx = DC_PRED; mode_idx <= FINAL_MODE_SEARCH; ++mode_idx) { RD_STATS this_rd_stats; int this_rate, this_rate_tokenonly, s; int64_t this_distortion, this_rd, this_model_rd; @@ -3692,7 +4125,8 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, mbmi->mode = best_mbmi.mode; x->use_default_intra_tx_type = 0; } else { - mbmi->mode = mode_idx; + assert(mode_idx < INTRA_MODES); + mbmi->mode = intra_rd_search_mode_order[mode_idx]; } #if CONFIG_PVQ od_encode_rollback(&x->daala_enc, &pre_buf); @@ -3708,7 +4142,7 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, #if CONFIG_EXT_INTRA is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize); if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue; - if (is_directional_mode) { + if (is_directional_mode && av1_use_angle_delta(bsize)) { this_rd_stats.rate = INT_MAX; rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rd_stats, bsize, bmode_costs[mbmi->mode], best_rd, &best_model_rd); @@ -3754,11 +4188,13 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, this_rate += cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter]; #endif // CONFIG_INTRA_INTERP - this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, - MAX_ANGLE_DELTA + mbmi->angle_delta[0]); + if (av1_use_angle_delta(bsize)) { + this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, + MAX_ANGLE_DELTA + mbmi->angle_delta[0]); + } } #endif // CONFIG_EXT_INTRA - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + this_rd = RDCOST(x->rdmult, this_rate, this_distortion); #if CONFIG_FILTER_INTRA if (best_rd == INT64_MAX || this_rd - best_rd < (best_rd >> 4)) { filter_intra_mode_skip_mask ^= (1 << mbmi->mode); @@ -3785,16 +4221,6 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, od_encode_rollback(&x->daala_enc, &post_buf); #endif // CONFIG_PVQ -#if CONFIG_CFL - // Perform one extra txfm_rd_in_plane() call, this time with the best value so - // we can store reconstructed luma values - RD_STATS this_rd_stats; - x->cfl_store_y = 1; - txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, 0, bsize, - mic->mbmi.tx_size, cpi->sf.use_fast_coef_costing); - x->cfl_store_y = 0; -#endif - #if CONFIG_PALETTE if (try_palette) { rd_pick_palette_intra_sby(cpi, x, bsize, palette_y_mode_ctx, @@ -3826,7 +4252,7 @@ static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x, int64_t ref_best_rd) { MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]); + const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]); int plane; int is_cost_valid = 1; av1_init_rd_stats(rd_stats); @@ -3857,9 +4283,8 @@ static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x, break; } av1_merge_rd_stats(rd_stats, &pn_rd_stats); - if (RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist) > - ref_best_rd && - RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse) > ref_best_rd) { + if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) > ref_best_rd && + RDCOST(x->rdmult, 0, rd_stats->sse) > ref_best_rd) { is_cost_valid = 0; break; } @@ -3875,13 +4300,6 @@ static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x, } #if CONFIG_VAR_TX -// FIXME crop these calls -static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride, - TX_SIZE tx_size) { - return aom_sum_squares_2d_i16(diff, diff_stride, tx_size_wide[tx_size], - tx_size_high[tx_size]); -} - void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, int blk_row, int blk_col, int plane, int block, int plane_bsize, const ENTROPY_CONTEXT *a, @@ -3890,18 +4308,23 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, MACROBLOCKD *xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; + +#if CONFIG_TXK_SEL + av1_search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, + tx_size, a, l, 0, rd_stats); + return; +#endif + int64_t tmp; tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); PLANE_TYPE plane_type = get_plane_type(plane); - TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + TX_TYPE tx_type = + av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size); const SCAN_ORDER *const scan_order = - get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); + get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi); BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size]; int bh = block_size_high[txm_bsize]; int bw = block_size_wide[txm_bsize]; - int txb_h = tx_size_high_unit[tx_size]; - int txb_w = tx_size_wide_unit[tx_size]; - int src_stride = p->src.stride; uint8_t *src = &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]]; @@ -3914,30 +4337,15 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, #else DECLARE_ALIGNED(16, uint8_t, rec_buffer[MAX_TX_SQUARE]); #endif // CONFIG_HIGHBITDEPTH - int max_blocks_high = block_size_high[plane_bsize]; - int max_blocks_wide = block_size_wide[plane_bsize]; - const int diff_stride = max_blocks_wide; + const int diff_stride = block_size_wide[plane_bsize]; const int16_t *diff = &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]]; int txb_coeff_cost; assert(tx_size < TX_SIZES_ALL); - if (xd->mb_to_bottom_edge < 0) - max_blocks_high += xd->mb_to_bottom_edge >> (3 + pd->subsampling_y); - if (xd->mb_to_right_edge < 0) - max_blocks_wide += xd->mb_to_right_edge >> (3 + pd->subsampling_x); - - max_blocks_high >>= tx_size_wide_log2[0]; - max_blocks_wide >>= tx_size_wide_log2[0]; - int coeff_ctx = get_entropy_context(tx_size, a, l); - av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, - coeff_ctx, AV1_XFORM_QUANT_FP); - - av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l); - // TODO(any): Use av1_dist_block to compute distortion #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -3954,21 +4362,35 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, 0, bw, bh); #endif // CONFIG_HIGHBITDEPTH - if (blk_row + txb_h > max_blocks_high || blk_col + txb_w > max_blocks_wide) { - int idx, idy; - int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row); - int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col); - tmp = 0; - for (idy = 0; idy < blocks_height; ++idy) { - for (idx = 0; idx < blocks_width; ++idx) { - const int16_t *d = - diff + ((idy * diff_stride + idx) << tx_size_wide_log2[0]); - tmp += sum_squares_2d(d, diff_stride, 0); - } - } - } else { - tmp = sum_squares_2d(diff, diff_stride, tx_size); +#if DISABLE_TRELLISQ_SEARCH + av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, + coeff_ctx, AV1_XFORM_QUANT_B); + +#else + av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, + coeff_ctx, AV1_XFORM_QUANT_FP); + + const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2; + tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); + const int buffer_length = tx_size_2d[tx_size]; + int64_t tmp_dist; +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + tmp_dist = + av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd) >> + shift; + else +#endif + tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp) >> shift; + + if (RDCOST(x->rdmult, 0, tmp_dist) < rd_stats->ref_rdcost) { + av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size, + a, l); } +#endif // DISABLE_TRELLISQ_SEARCH + + tmp = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col, + plane_bsize, txm_bsize); #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) @@ -3977,36 +4399,48 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, rd_stats->sse += tmp * 16; const int eob = p->eobs[block]; +#if CONFIG_LGT + PREDICTION_MODE mode = get_prediction_mode(xd->mi[0], plane, tx_size, block); + av1_inverse_transform_block(xd, dqcoeff, mode, tx_type, tx_size, rec_buffer, + MAX_TX_SIZE, eob); +#else av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, rec_buffer, MAX_TX_SIZE, eob); +#endif if (eob > 0) { - if (txb_w + blk_col > max_blocks_wide || - txb_h + blk_row > max_blocks_high) { - int idx, idy; - unsigned int this_dist; - int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row); - int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col); - tmp = 0; - for (idy = 0; idy < blocks_height; ++idy) { - for (idx = 0; idx < blocks_width; ++idx) { - uint8_t *const s = - src + ((idy * src_stride + idx) << tx_size_wide_log2[0]); - uint8_t *const r = - rec_buffer + ((idy * MAX_TX_SIZE + idx) << tx_size_wide_log2[0]); - cpi->fn_ptr[0].vf(s, src_stride, r, MAX_TX_SIZE, &this_dist); - tmp += this_dist; - } +#if CONFIG_DIST_8X8 + if (plane == 0 && (bw < 8 && bh < 8)) { + // Save sub8x8 luma decoded pixels + // since 8x8 luma decoded pixels are not available for daala-dist + // after recursive split of BLOCK_8x8 is done. + const int pred_stride = block_size_wide[plane_bsize]; + const int pred_idx = (blk_row * pred_stride + blk_col) + << tx_size_wide_log2[0]; + int16_t *decoded = &pd->pred[pred_idx]; + int i, j; + +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + for (j = 0; j < bh; j++) + for (i = 0; i < bw; i++) + decoded[j * pred_stride + i] = + CONVERT_TO_SHORTPTR(rec_buffer)[j * MAX_TX_SIZE + i]; + } else { +#endif + for (j = 0; j < bh; j++) + for (i = 0; i < bw; i++) + decoded[j * pred_stride + i] = rec_buffer[j * MAX_TX_SIZE + i]; +#if CONFIG_HIGHBITDEPTH } - } else { - uint32_t this_dist; - cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, MAX_TX_SIZE, - &this_dist); - tmp = this_dist; +#endif // CONFIG_HIGHBITDEPTH } +#endif // CONFIG_DIST_8X8 + tmp = pixel_dist(cpi, x, plane, src, src_stride, rec_buffer, MAX_TX_SIZE, + blk_row, blk_col, plane_bsize, txm_bsize); } rd_stats->dist += tmp * 16; - txb_coeff_cost = - av1_cost_coeffs(cpi, x, plane, block, tx_size, scan_order, a, l, 0); + txb_coeff_cost = av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, + tx_size, scan_order, a, l, 0); rd_stats->rate += txb_coeff_cost; rd_stats->skip &= (eob == 0); @@ -4038,14 +4472,35 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, int64_t this_rd = INT64_MAX; ENTROPY_CONTEXT *pta = ta + blk_col; ENTROPY_CONTEXT *ptl = tl + blk_row; - int coeff_ctx, i; + int i; int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row, mbmi->sb_type, tx_size); int64_t sum_rd = INT64_MAX; int tmp_eob = 0; int zero_blk_rate; RD_STATS sum_rd_stats; - const int tx_size_ctx = txsize_sqr_map[tx_size]; +#if CONFIG_TXK_SEL + TX_TYPE best_tx_type = TX_TYPES; + int txk_idx = (blk_row << 4) + blk_col; +#endif +#if CONFIG_RECT_TX_EXT + TX_SIZE quarter_txsize = quarter_txsize_lookup[mbmi->sb_type]; + int check_qttx = is_quarter_tx_allowed(xd, mbmi, is_inter_block(mbmi)) && + tx_size == max_txsize_rect_lookup[mbmi->sb_type] && + quarter_txsize != tx_size; + int is_qttx_picked = 0; + int eobs_qttx[2] = { 0, 0 }; + int skip_qttx[2] = { 0, 0 }; + int block_offset_qttx = check_qttx + ? tx_size_wide_unit[quarter_txsize] * + tx_size_high_unit[quarter_txsize] + : 0; + int blk_row_offset, blk_col_offset; + int is_wide_qttx = + tx_size_wide_unit[quarter_txsize] > tx_size_high_unit[quarter_txsize]; + blk_row_offset = is_wide_qttx ? tx_size_high_unit[quarter_txsize] : 0; + blk_col_offset = is_wide_qttx ? 0 : tx_size_wide_unit[quarter_txsize]; +#endif av1_init_rd_stats(&sum_rd_stats); @@ -4056,15 +4511,25 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, return; } - coeff_ctx = get_entropy_context(tx_size, pta, ptl); - av1_init_rd_stats(rd_stats); if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; +#if CONFIG_LV_MAP + TX_SIZE txs_ctx = get_txsize_context(tx_size); + TXB_CTX txb_ctx; + get_txb_ctx(plane_bsize, tx_size, plane, pta, ptl, &txb_ctx); + zero_blk_rate = + av1_cost_bit(xd->fc->txb_skip[txs_ctx][txb_ctx.txb_skip_ctx], 1); +#else + int tx_size_ctx = txsize_sqr_map[tx_size]; + int coeff_ctx = get_entropy_context(tx_size, pta, ptl); zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0] [coeff_ctx][EOB_TOKEN]; +#endif + rd_stats->ref_rdcost = ref_best_rd; + rd_stats->zero_rate = zero_blk_rate; if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) { inter_tx_size[0][0] = tx_size; @@ -4081,8 +4546,8 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, } } - if ((RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist) >= - RDCOST(x->rdmult, x->rddiv, zero_blk_rate, rd_stats->sse) || + if ((RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >= + RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) || rd_stats->skip == 1) && !xd->lossless[mbmi->segment_id]) { #if CONFIG_RD_DEBUG @@ -4094,6 +4559,9 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, rd_stats->skip = 1; x->blk_skip[plane][blk_row * bw + blk_col] = 1; p->eobs[block] = 0; +#if CONFIG_TXK_SEL + mbmi->txk_type[txk_idx] = DCT_DCT; +#endif } else { x->blk_skip[plane][blk_row * bw + blk_col] = 0; rd_stats->skip = 0; @@ -4102,23 +4570,143 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) rd_stats->rate += av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0); - this_rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist); +#if CONFIG_RECT_TX_EXT + if (check_qttx) { + assert(blk_row == 0 && blk_col == 0); + rd_stats->rate += av1_cost_bit(cpi->common.fc->quarter_tx_size_prob, 0); + } +#endif + this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); +#if CONFIG_LV_MAP + tmp_eob = p->txb_entropy_ctx[block]; +#else tmp_eob = p->eobs[block]; +#endif + +#if CONFIG_TXK_SEL + best_tx_type = mbmi->txk_type[txk_idx]; +#endif + +#if CONFIG_RECT_TX_EXT + if (check_qttx) { + assert(blk_row == 0 && blk_col == 0 && block == 0 && plane == 0); + + RD_STATS rd_stats_tmp, rd_stats_qttx; + int64_t rd_qttx; + + av1_init_rd_stats(&rd_stats_qttx); + av1_init_rd_stats(&rd_stats_tmp); + + av1_tx_block_rd_b(cpi, x, quarter_txsize, 0, 0, plane, 0, plane_bsize, + pta, ptl, &rd_stats_qttx); + + tx_size_ctx = txsize_sqr_map[quarter_txsize]; + coeff_ctx = get_entropy_context(quarter_txsize, pta, ptl); + zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0] + [coeff_ctx][EOB_TOKEN]; + if ((RDCOST(x->rdmult, rd_stats_qttx.rate, rd_stats_qttx.dist) >= + RDCOST(x->rdmult, zero_blk_rate, rd_stats_qttx.sse) || + rd_stats_qttx.skip == 1) && + !xd->lossless[mbmi->segment_id]) { +#if CONFIG_RD_DEBUG + av1_update_txb_coeff_cost(&rd_stats_qttx, plane, quarter_txsize, 0, 0, + zero_blk_rate - rd_stats_qttx.rate); +#endif // CONFIG_RD_DEBUG + rd_stats_qttx.rate = zero_blk_rate; + rd_stats_qttx.dist = rd_stats_qttx.sse; + rd_stats_qttx.skip = 1; + x->blk_skip[plane][blk_row * bw + blk_col] = 1; + skip_qttx[0] = 1; + p->eobs[block] = 0; + } else { + x->blk_skip[plane][blk_row * bw + blk_col] = 0; + skip_qttx[0] = 0; + rd_stats->skip = 0; + } + + // Second tx block + av1_tx_block_rd_b(cpi, x, quarter_txsize, blk_row_offset, blk_col_offset, + plane, block_offset_qttx, plane_bsize, pta, ptl, + &rd_stats_tmp); + + av1_set_txb_context(x, plane, 0, quarter_txsize, pta, ptl); + coeff_ctx = get_entropy_context(quarter_txsize, pta + blk_col_offset, + ptl + blk_row_offset); + zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0] + [coeff_ctx][EOB_TOKEN]; + if ((RDCOST(x->rdmult, rd_stats_tmp.rate, rd_stats_tmp.dist) >= + RDCOST(x->rdmult, zero_blk_rate, rd_stats_tmp.sse) || + rd_stats_tmp.skip == 1) && + !xd->lossless[mbmi->segment_id]) { +#if CONFIG_RD_DEBUG + av1_update_txb_coeff_cost(&rd_stats_tmp, plane, quarter_txsize, 0, 0, + zero_blk_rate - rd_stats_tmp.rate); +#endif // CONFIG_RD_DEBUG + rd_stats_tmp.rate = zero_blk_rate; + rd_stats_tmp.dist = rd_stats_tmp.sse; + rd_stats_tmp.skip = 1; + x->blk_skip[plane][blk_row_offset * bw + blk_col_offset] = 1; + skip_qttx[1] = 1; + p->eobs[block_offset_qttx] = 0; + } else { + x->blk_skip[plane][blk_row_offset * bw + blk_col_offset] = 0; + skip_qttx[1] = 0; + rd_stats_tmp.skip = 0; + } + + av1_merge_rd_stats(&rd_stats_qttx, &rd_stats_tmp); + + if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) { + rd_stats_qttx.rate += + av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0); + } + rd_stats_qttx.rate += + av1_cost_bit(cpi->common.fc->quarter_tx_size_prob, 1); + rd_qttx = RDCOST(x->rdmult, rd_stats_qttx.rate, rd_stats_qttx.dist); +#if CONFIG_LV_MAP + eobs_qttx[0] = p->txb_entropy_ctx[0]; + eobs_qttx[1] = p->txb_entropy_ctx[block_offset_qttx]; +#else + eobs_qttx[0] = p->eobs[0]; + eobs_qttx[1] = p->eobs[block_offset_qttx]; +#endif + if (rd_qttx < this_rd) { + is_qttx_picked = 1; + this_rd = rd_qttx; + rd_stats->rate = rd_stats_qttx.rate; + rd_stats->dist = rd_stats_qttx.dist; + rd_stats->sse = rd_stats_qttx.sse; + rd_stats->skip = rd_stats_qttx.skip; + rd_stats->rdcost = rd_stats_qttx.rdcost; + } + av1_get_entropy_contexts(plane_bsize, 0, pd, ta, tl); + } +#endif } +#if CONFIG_MRC_TX + // If the tx type we are trying is MRC_DCT, we cannot partition the transform + // into anything smaller than TX_32X32 + if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH && mbmi->tx_type != MRC_DCT) { +#else if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) { +#endif const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; const int bsl = tx_size_wide_unit[sub_txs]; int sub_step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs]; RD_STATS this_rd_stats; int this_cost_valid = 1; int64_t tmp_rd = 0; - +#if CONFIG_DIST_8X8 + int sub8x8_eob[4]; +#endif sum_rd_stats.rate = av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1); assert(tx_size < TX_SIZES_ALL); + ref_best_rd = AOMMIN(this_rd, ref_best_rd); + for (i = 0; i < 4 && this_cost_valid; ++i) { int offsetr = blk_row + (i >> 1) * bsl; int offsetc = blk_col + (i & 0x01) * bsl; @@ -4129,30 +4717,170 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, depth + 1, plane_bsize, ta, tl, tx_above, tx_left, &this_rd_stats, ref_best_rd - tmp_rd, &this_cost_valid, rd_stats_stack); - +#if CONFIG_DIST_8X8 + if (plane == 0 && tx_size == TX_8X8) { + sub8x8_eob[i] = p->eobs[block]; + } +#endif // CONFIG_DIST_8X8 av1_merge_rd_stats(&sum_rd_stats, &this_rd_stats); - tmp_rd = - RDCOST(x->rdmult, x->rddiv, sum_rd_stats.rate, sum_rd_stats.dist); + tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist); +#if !CONFIG_DIST_8X8 if (this_rd < tmp_rd) break; +#endif block += sub_step; } +#if CONFIG_DIST_8X8 + if (this_cost_valid && plane == 0 && tx_size == TX_8X8) { + const int src_stride = p->src.stride; + const int dst_stride = pd->dst.stride; + + const uint8_t *src = + &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]]; + const uint8_t *dst = + &pd->dst + .buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; + + int64_t dist_8x8; + int qindex = x->qindex; + const int pred_stride = block_size_wide[plane_bsize]; + const int pred_idx = (blk_row * pred_stride + blk_col) + << tx_size_wide_log2[0]; + int16_t *pred = &pd->pred[pred_idx]; + int j; + int row, col; + +#if CONFIG_HIGHBITDEPTH + uint8_t *pred8; + DECLARE_ALIGNED(16, uint16_t, pred8_16[8 * 8]); +#else + DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]); +#endif // CONFIG_HIGHBITDEPTH + + dist_8x8 = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride, + BLOCK_8X8, 8, 8, 8, 8, qindex) * + 16; + sum_rd_stats.sse = dist_8x8; + +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + pred8 = CONVERT_TO_BYTEPTR(pred8_16); + else + pred8 = (uint8_t *)pred8_16; +#endif + +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + for (row = 0; row < 2; ++row) { + for (col = 0; col < 2; ++col) { + int idx = row * 2 + col; + int eob = sub8x8_eob[idx]; + + if (eob > 0) { + for (j = 0; j < 4; j++) + for (i = 0; i < 4; i++) + CONVERT_TO_SHORTPTR(pred8) + [(row * 4 + j) * 8 + 4 * col + i] = + pred[(row * 4 + j) * pred_stride + 4 * col + i]; + } else { + for (j = 0; j < 4; j++) + for (i = 0; i < 4; i++) + CONVERT_TO_SHORTPTR(pred8) + [(row * 4 + j) * 8 + 4 * col + i] = CONVERT_TO_SHORTPTR( + dst)[(row * 4 + j) * dst_stride + 4 * col + i]; + } + } + } + } else { +#endif + for (row = 0; row < 2; ++row) { + for (col = 0; col < 2; ++col) { + int idx = row * 2 + col; + int eob = sub8x8_eob[idx]; + + if (eob > 0) { + for (j = 0; j < 4; j++) + for (i = 0; i < 4; i++) + pred8[(row * 4 + j) * 8 + 4 * col + i] = + pred[(row * 4 + j) * pred_stride + 4 * col + i]; + } else { + for (j = 0; j < 4; j++) + for (i = 0; i < 4; i++) + pred8[(row * 4 + j) * 8 + 4 * col + i] = + dst[(row * 4 + j) * dst_stride + 4 * col + i]; + } + } + } +#if CONFIG_HIGHBITDEPTH + } +#endif // CONFIG_HIGHBITDEPTH + dist_8x8 = av1_dist_8x8(cpi, xd, src, src_stride, pred8, 8, BLOCK_8X8, 8, + 8, 8, 8, qindex) * + 16; + sum_rd_stats.dist = dist_8x8; + tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist); + } +#endif // CONFIG_DIST_8X8 if (this_cost_valid) sum_rd = tmp_rd; } if (this_rd < sum_rd) { int idx, idy; - for (i = 0; i < tx_size_wide_unit[tx_size]; ++i) pta[i] = !(tmp_eob == 0); - for (i = 0; i < tx_size_high_unit[tx_size]; ++i) ptl[i] = !(tmp_eob == 0); +#if CONFIG_RECT_TX_EXT + TX_SIZE tx_size_selected = is_qttx_picked ? quarter_txsize : tx_size; +#else + TX_SIZE tx_size_selected = tx_size; +#endif + +#if CONFIG_RECT_TX_EXT + if (is_qttx_picked) { + assert(blk_row == 0 && blk_col == 0 && plane == 0); +#if CONFIG_LV_MAP + p->txb_entropy_ctx[0] = eobs_qttx[0]; + p->txb_entropy_ctx[block_offset_qttx] = eobs_qttx[1]; +#else + p->eobs[0] = eobs_qttx[0]; + p->eobs[block_offset_qttx] = eobs_qttx[1]; +#endif + } else { +#endif +#if CONFIG_LV_MAP + p->txb_entropy_ctx[block] = tmp_eob; +#else + p->eobs[block] = tmp_eob; +#endif +#if CONFIG_RECT_TX_EXT + } +#endif + + av1_set_txb_context(x, plane, block, tx_size_selected, pta, ptl); +#if CONFIG_RECT_TX_EXT + if (is_qttx_picked) + av1_set_txb_context(x, plane, block_offset_qttx, tx_size_selected, + pta + blk_col_offset, ptl + blk_row_offset); +#endif + txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size, tx_size); - inter_tx_size[0][0] = tx_size; + inter_tx_size[0][0] = tx_size_selected; for (idy = 0; idy < tx_size_high_unit[tx_size] / 2; ++idy) for (idx = 0; idx < tx_size_wide_unit[tx_size] / 2; ++idx) - inter_tx_size[idy][idx] = tx_size; - mbmi->tx_size = tx_size; + inter_tx_size[idy][idx] = tx_size_selected; + mbmi->tx_size = tx_size_selected; +#if CONFIG_TXK_SEL + mbmi->txk_type[txk_idx] = best_tx_type; +#endif if (this_rd == INT64_MAX) *is_cost_valid = 0; - x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip; +#if CONFIG_RECT_TX_EXT + if (is_qttx_picked) { + x->blk_skip[plane][0] = skip_qttx[0]; + x->blk_skip[plane][blk_row_offset * bw + blk_col_offset] = skip_qttx[1]; + } else { +#endif + x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip; +#if CONFIG_RECT_TX_EXT + } +#endif } else { *rd_stats = sum_rd_stats; if (sum_rd == INT64_MAX) *is_cost_valid = 0; @@ -4201,17 +4929,16 @@ static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, tx_above, tx_left, &pn_rd_stats, ref_best_rd - this_rd, &is_cost_valid, rd_stats_stack); av1_merge_rd_stats(rd_stats, &pn_rd_stats); - this_rd += AOMMIN( - RDCOST(x->rdmult, x->rddiv, pn_rd_stats.rate, pn_rd_stats.dist), - RDCOST(x->rdmult, x->rddiv, 0, pn_rd_stats.sse)); + this_rd += AOMMIN(RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist), + RDCOST(x->rdmult, 0, pn_rd_stats.sse)); block += step; ++block32; } } } - this_rd = AOMMIN(RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist), - RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse)); + this_rd = AOMMIN(RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist), + RDCOST(x->rdmult, 0, rd_stats->sse)); if (this_rd > ref_best_rd) is_cost_valid = 0; if (!is_cost_valid) { @@ -4247,6 +4974,7 @@ static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x, mbmi->min_tx_size = AOMMIN( mbmi->min_tx_size, get_min_tx_size(mbmi->inter_tx_size[row][col])); +#if !CONFIG_TXK_SEL #if CONFIG_EXT_TX if (get_ext_tx_types(mbmi->min_tx_size, bsize, is_inter, cm->reduced_tx_set_used) > 1 && @@ -4266,20 +4994,21 @@ static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x, [mbmi->tx_type]; } } -#else // CONFIG_EXT_TX +#else if (mbmi->min_tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id]) rd_stats->rate += cpi->inter_tx_type_costs[mbmi->min_tx_size][mbmi->tx_type]; #endif // CONFIG_EXT_TX +#endif // CONFIG_TXK_SEL if (rd_stats->skip) - rd = RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse); + rd = RDCOST(x->rdmult, s1, rd_stats->sse); else - rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate + s0, rd_stats->dist); + rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist); if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !(rd_stats->skip)) - rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse)); + rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse)); return rd; } @@ -4299,6 +5028,12 @@ static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE best_tx = max_txsize_lookup[bsize]; TX_SIZE best_min_tx_size = TX_SIZES_ALL; uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8]; + TX_TYPE txk_start = DCT_DCT; +#if CONFIG_TXK_SEL + TX_TYPE txk_end = DCT_DCT + 1; +#else + TX_TYPE txk_end = TX_TYPES; +#endif const int n4 = bsize_to_num_blk(bsize); int idx, idy; int prune = 0; @@ -4326,9 +5061,14 @@ static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x, for (idx = 0; idx < count32; ++idx) av1_invalid_rd_stats(&rd_stats_stack[idx]); - for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) { + for (tx_type = txk_start; tx_type < txk_end; ++tx_type) { RD_STATS this_rd_stats; av1_init_rd_stats(&this_rd_stats); +#if CONFIG_MRC_TX + // MRC_DCT only implemented for TX_32X32 so only include this tx in + // the search for TX_32X32 + if (tx_type == MRC_DCT && max_tx_size != TX_32X32) continue; +#endif // CONFIG_MRC_TX #if CONFIG_EXT_TX if (is_inter) { if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue; @@ -4384,7 +5124,6 @@ static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, ENTROPY_CONTEXT *left_ctx, RD_STATS *rd_stats) { MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; const int tx_row = blk_row >> (1 - pd->subsampling_y); @@ -4402,16 +5141,11 @@ static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, : mbmi->inter_tx_size[tx_row][tx_col]; if (tx_size == plane_tx_size) { - int i; ENTROPY_CONTEXT *ta = above_ctx + blk_col; ENTROPY_CONTEXT *tl = left_ctx + blk_row; av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block, plane_bsize, ta, tl, rd_stats); - - for (i = 0; i < tx_size_wide_unit[tx_size]; ++i) - ta[i] = !(p->eobs[block] == 0); - for (i = 0; i < tx_size_high_unit[tx_size]; ++i) - tl[i] = !(p->eobs[block] == 0); + av1_set_txb_context(x, plane, block, tx_size, ta, tl); } else { const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; const int bsl = tx_size_wide_unit[sub_txs]; @@ -4498,9 +5232,8 @@ static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, av1_merge_rd_stats(rd_stats, &pn_rd_stats); - this_rd = - AOMMIN(RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist), - RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse)); + this_rd = AOMMIN(RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist), + RDCOST(x->rdmult, 0, rd_stats->sse)); if (this_rd > ref_best_rd) { is_cost_valid = 0; @@ -4543,7 +5276,7 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, &plane_block_height, &rows, &cols); if (rows * cols > PALETTE_MAX_BLOCK_SIZE) return; - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; #if CONFIG_FILTER_INTRA mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0; #endif // CONFIG_FILTER_INTRA @@ -4689,7 +5422,7 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, } } - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist); + this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); if (this_rd < *best_rd) { *best_rd = this_rd; *best_mbmi = *mbmi; @@ -4727,7 +5460,7 @@ static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, av1_zero(filter_intra_mode_info); mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 1; - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; #if CONFIG_PALETTE mbmi->palette_mode_info.palette_size[1] = 0; #endif // CONFIG_PALETTE @@ -4741,7 +5474,7 @@ static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, av1_cost_bit(cpi->common.fc->filter_intra_probs[1], 1) + cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] + write_uniform_cost(FILTER_INTRA_MODES, mode); - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist); + this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); if (this_rd < *best_rd) { *best_rd = this_rd; *rate = this_rate; @@ -4754,7 +5487,7 @@ static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, } if (filter_intra_selected_flag) { - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = filter_intra_mode_info.use_filter_intra_mode[1]; mbmi->filter_intra_mode_info.filter_intra_mode[1] = @@ -4782,7 +5515,7 @@ static int64_t pick_intra_angle_routine_sbuv( if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in)) return INT64_MAX; this_rate = tokenonly_rd_stats.rate + rate_overhead; - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist); + this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); if (this_rd < *best_rd) { *best_rd = this_rd; *best_angle_delta = mbmi->angle_delta[1]; @@ -4852,8 +5585,172 @@ static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, } #endif // CONFIG_EXT_INTRA +#if CONFIG_CFL +static int64_t cfl_alpha_dist(const uint8_t *y_pix, int y_stride, + const int y_averages_q3[MAX_NUM_TXB], + const uint8_t *src, int src_stride, int width, + int height, TX_SIZE tx_size, int dc_pred, + int alpha_q3, int64_t *dist_neg_out) { + int64_t dist = 0; + int diff; + + if (alpha_q3 == 0) { + for (int j = 0; j < height; j++) { + for (int i = 0; i < width; i++) { + diff = src[i] - dc_pred; + dist += diff * diff; + } + src += src_stride; + } + + if (dist_neg_out) *dist_neg_out = dist; + + return dist; + } + + int64_t dist_neg = 0; + const int tx_height = tx_size_high[tx_size]; + const int tx_width = tx_size_wide[tx_size]; + const int y_block_row_off = y_stride * tx_height; + const int src_block_row_off = src_stride * tx_height; + const uint8_t *t_y_pix; + const uint8_t *t_src; + int a = 0; + for (int b_j = 0; b_j < height; b_j += tx_height) { + const int h = b_j + tx_height; + for (int b_i = 0; b_i < width; b_i += tx_width) { + const int w = b_i + tx_width; + const int tx_avg_q3 = y_averages_q3[a++]; + t_y_pix = y_pix; + t_src = src; + for (int t_j = b_j; t_j < h; t_j++) { + for (int t_i = b_i; t_i < w; t_i++) { + const int uv = t_src[t_i]; + + const int scaled_luma = + get_scaled_luma_q0(alpha_q3, t_y_pix[t_i], tx_avg_q3); + + // TODO(ltrudeau) add support for HBD. + diff = uv - clamp(scaled_luma + dc_pred, 0, 255); + dist += diff * diff; + + // TODO(ltrudeau) add support for HBD. + diff = uv - clamp(-scaled_luma + dc_pred, 0, 255); + dist_neg += diff * diff; + } + t_y_pix += y_stride; + t_src += src_stride; + } + } + y_pix += y_block_row_off; + src += src_block_row_off; + } + + if (dist_neg_out) *dist_neg_out = dist_neg; + + return dist; +} + +static inline void cfl_update_costs(CFL_CTX *cfl, FRAME_CONTEXT *ec_ctx) { + assert(ec_ctx->cfl_alpha_cdf[CFL_ALPHABET_SIZE - 1] == + AOM_ICDF(CDF_PROB_TOP)); + + aom_cdf_prob prev_cdf = 0; + + for (int c = 0; c < CFL_ALPHABET_SIZE; c++) { + const int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0) + + (cfl_alpha_codes[c][CFL_PRED_V] != 0); + + aom_cdf_prob prob = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]) - prev_cdf; + prev_cdf = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]); + + cfl->costs[c] = av1_cost_symbol(prob) + av1_cost_literal(sign_bit_cost); + } +} + +static int cfl_rd_pick_alpha(MACROBLOCK *const x, TX_SIZE tx_size) { + const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U]; + const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V]; + const uint8_t *const src_u = p_u->src.buf; + const uint8_t *const src_v = p_v->src.buf; + const int src_stride_u = p_u->src.stride; + const int src_stride_v = p_v->src.stride; + + MACROBLOCKD *const xd = &x->e_mbd; + FRAME_CONTEXT *const ec_ctx = xd->tile_ctx; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + + CFL_CTX *const cfl = xd->cfl; + cfl_compute_parameters(xd, tx_size); + const int width = cfl->uv_width; + const int height = cfl->uv_height; + const int dc_pred_u = cfl->dc_pred[CFL_PRED_U]; + const int dc_pred_v = cfl->dc_pred[CFL_PRED_V]; + const int *y_averages_q3 = cfl->y_averages_q3; + const uint8_t *y_pix = cfl->y_down_pix; + + CFL_SIGN_TYPE *signs = mbmi->cfl_alpha_signs; + + cfl_update_costs(cfl, ec_ctx); + + int64_t sse[CFL_PRED_PLANES][CFL_MAGS_SIZE]; + sse[CFL_PRED_U][0] = + cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u, + width, height, tx_size, dc_pred_u, 0, NULL); + sse[CFL_PRED_V][0] = + cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v, + width, height, tx_size, dc_pred_v, 0, NULL); + + for (int m = 1; m < CFL_MAGS_SIZE; m += 2) { + assert(cfl_alpha_mags_q3[m + 1] == -cfl_alpha_mags_q3[m]); + sse[CFL_PRED_U][m] = cfl_alpha_dist( + y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u, width, height, + tx_size, dc_pred_u, cfl_alpha_mags_q3[m], &sse[CFL_PRED_U][m + 1]); + sse[CFL_PRED_V][m] = cfl_alpha_dist( + y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v, width, height, + tx_size, dc_pred_v, cfl_alpha_mags_q3[m], &sse[CFL_PRED_V][m + 1]); + } + + int64_t dist; + int64_t cost; + int64_t best_cost; + + // Compute least squares parameter of the entire block + // IMPORTANT: We assume that the first code is 0,0 + int ind = 0; + signs[CFL_PRED_U] = CFL_SIGN_POS; + signs[CFL_PRED_V] = CFL_SIGN_POS; + + dist = sse[CFL_PRED_U][0] + sse[CFL_PRED_V][0]; + dist *= 16; + best_cost = RDCOST(x->rdmult, cfl->costs[0], dist); + + for (int c = 1; c < CFL_ALPHABET_SIZE; c++) { + const int idx_u = cfl_alpha_codes[c][CFL_PRED_U]; + const int idx_v = cfl_alpha_codes[c][CFL_PRED_V]; + for (CFL_SIGN_TYPE sign_u = idx_u == 0; sign_u < CFL_SIGNS; sign_u++) { + for (CFL_SIGN_TYPE sign_v = idx_v == 0; sign_v < CFL_SIGNS; sign_v++) { + dist = sse[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] + + sse[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)]; + dist *= 16; + cost = RDCOST(x->rdmult, cfl->costs[c], dist); + if (cost < best_cost) { + best_cost = cost; + ind = c; + signs[CFL_PRED_U] = sign_u; + signs[CFL_PRED_V] = sign_v; + } + } + } + } + + mbmi->cfl_alpha_idx = ind; + return cfl->costs[ind]; +} +#endif // CONFIG_CFL + static void init_sbuv_mode(MB_MODE_INFO *const mbmi) { - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; #if CONFIG_PALETTE mbmi->palette_mode_info.palette_size[1] = 0; #endif // CONFIG_PALETTE @@ -4870,20 +5767,19 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; assert(!is_inter_block(mbmi)); MB_MODE_INFO best_mbmi = *mbmi; - PREDICTION_MODE mode; int64_t best_rd = INT64_MAX, this_rd; - int this_rate; - RD_STATS tokenonly_rd_stats; #if CONFIG_PVQ od_rollback_buffer buf; od_encode_checkpoint(&x->daala_enc, &buf); #endif // CONFIG_PVQ #if CONFIG_PALETTE PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; - uint8_t *best_palette_color_map = NULL; #endif // CONFIG_PALETTE - for (mode = DC_PRED; mode <= TM_PRED; ++mode) { + for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) { + int this_rate; + RD_STATS tokenonly_rd_stats; + UV_PREDICTION_MODE mode = uv_rd_search_mode_order[mode_idx]; #if CONFIG_EXT_INTRA const int is_directional_mode = av1_is_directional_mode(mode, mbmi->sb_type); @@ -4893,9 +5789,16 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, continue; mbmi->uv_mode = mode; +#if CONFIG_CFL + int cfl_alpha_rate = 0; + if (mode == UV_DC_PRED) { + const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]); + cfl_alpha_rate = cfl_rd_pick_alpha(x, uv_tx_size); + } +#endif #if CONFIG_EXT_INTRA mbmi->angle_delta[1] = 0; - if (is_directional_mode) { + if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) { const int rate_overhead = cpi->intra_uv_mode_cost[mbmi->mode][mode] + write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, 0); if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd, @@ -4915,8 +5818,13 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, this_rate = tokenonly_rd_stats.rate + cpi->intra_uv_mode_cost[mbmi->mode][mode]; +#if CONFIG_CFL + if (mode == UV_DC_PRED) { + this_rate += cfl_alpha_rate; + } +#endif #if CONFIG_EXT_INTRA - if (is_directional_mode) { + if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) { this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, MAX_ANGLE_DELTA + mbmi->angle_delta[1]); } @@ -4927,7 +5835,7 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, #endif // CONFIG_FILTER_INTRA #if CONFIG_PALETTE if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8 && - mode == DC_PRED) + mode == UV_DC_PRED) this_rate += av1_cost_bit( av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 0); #endif // CONFIG_PALETTE @@ -4935,7 +5843,7 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, #if CONFIG_PVQ od_encode_rollback(&x->daala_enc, &buf); #endif // CONFIG_PVQ - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist); + this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); if (this_rd < best_rd) { best_mbmi = *mbmi; @@ -4949,9 +5857,9 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, #if CONFIG_PALETTE if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8) { - best_palette_color_map = x->palette_buffer->best_palette_color_map; + uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map; rd_pick_palette_intra_sbuv(cpi, x, - cpi->intra_uv_mode_cost[mbmi->mode][DC_PRED], + cpi->intra_uv_mode_cost[mbmi->mode][UV_DC_PRED], best_palette_color_map, &best_mbmi, &best_rd, rate, rate_tokenonly, distortion, skippable); } @@ -4975,7 +5883,7 @@ static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x, PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize, TX_SIZE max_tx_size, int *rate_uv, int *rate_uv_tokenonly, int64_t *dist_uv, - int *skip_uv, PREDICTION_MODE *mode_uv) { + int *skip_uv, UV_PREDICTION_MODE *mode_uv) { // Use an estimated rd for uv_intra based on DC_PRED if the // appropriate speed flag is set. (void)ctx; @@ -4990,7 +5898,7 @@ static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x, *rate_uv_tokenonly = 0; *dist_uv = 0; *skip_uv = 1; - *mode_uv = DC_PRED; + *mode_uv = UV_DC_PRED; return; } BLOCK_SIZE bs = scale_chroma_bsize(bsize, x->e_mbd.plane[1].subsampling_x, @@ -5011,6 +5919,12 @@ static int cost_mv_ref(const AV1_COMP *const cpi, PREDICTION_MODE mode, if (is_inter_compound_mode(mode)) { return cpi ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)]; +#if CONFIG_COMPOUND_SINGLEREF + } else if (is_inter_singleref_comp_mode(mode)) { + return cpi + ->inter_singleref_comp_mode_cost[mode_context] + [INTER_SINGLEREF_COMP_OFFSET(mode)]; +#endif // CONFIG_COMPOUND_SINGLEREF } #endif @@ -5096,8 +6010,13 @@ typedef struct { int segment_yrate; PREDICTION_MODE modes[4]; #if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + SEG_RDSTAT rdstat[4][INTER_MODES + INTER_SINGLEREF_COMP_MODES + + INTER_COMPOUND_MODES]; +#else // !CONFIG_COMPOUND_SINGLEREF SEG_RDSTAT rdstat[4][INTER_MODES + INTER_COMPOUND_MODES]; -#else +#endif // CONFIG_COMPOUND_SINGLEREF +#else // !CONFIG_EXT_INTER SEG_RDSTAT rdstat[4][INTER_MODES]; #endif // CONFIG_EXT_INTER int mvthresh; @@ -5120,27 +6039,28 @@ static int check_best_zero_mv( int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME], int this_mode, const MV_REFERENCE_FRAME ref_frames[2], const BLOCK_SIZE bsize, int block, int mi_row, int mi_col) { - int_mv zeromv[2]; + int_mv zeromv[2] = { {.as_int = 0 } }; +#if CONFIG_GLOBAL_MOTION int comp_pred_mode = ref_frames[1] > INTRA_FRAME; - int cur_frm; +#endif (void)mi_row; (void)mi_col; - for (cur_frm = 0; cur_frm < 1 + comp_pred_mode; cur_frm++) { #if CONFIG_GLOBAL_MOTION - if (this_mode == ZEROMV + if (this_mode == ZEROMV #if CONFIG_EXT_INTER - || this_mode == ZERO_ZEROMV + || this_mode == ZERO_ZEROMV #endif // CONFIG_EXT_INTER - ) + ) { + for (int cur_frm = 0; cur_frm < 1 + comp_pred_mode; cur_frm++) { zeromv[cur_frm].as_int = gm_get_motion_vector(&cpi->common.global_motion[ref_frames[cur_frm]], cpi->common.allow_high_precision_mv, bsize, mi_col, mi_row, block) .as_int; - else -#endif // CONFIG_GLOBAL_MOTION - zeromv[cur_frm].as_int = 0; + } } +#endif // CONFIG_GLOBAL_MOTION + #if !CONFIG_EXT_INTER assert(ref_frames[1] != INTRA_FRAME); // Just sanity check #endif // !CONFIG_EXT_INTER @@ -5201,8 +6121,11 @@ static int check_best_zero_mv( } static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, int_mv *frame_mv, int mi_row, - int mi_col, + BLOCK_SIZE bsize, int_mv *frame_mv, +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + int_mv *frame_comp_mv, +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + int mi_row, int mi_col, #if CONFIG_EXT_INTER int_mv *ref_mv_sub8x8[2], const uint8_t *mask, int mask_stride, @@ -5213,35 +6136,47 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, const int ph = block_size_high[bsize]; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - // This function should only ever be called for compound modes +// This function should only ever be called for compound modes +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) { + assert(is_inter_singleref_comp_mode(mbmi->mode)); + assert(frame_comp_mv); + } + assert(has_second_ref(mbmi) || is_inter_singleref_comp_mode(mbmi->mode)); + const int refs[2] = { mbmi->ref_frame[0], has_second_ref(mbmi) + ? mbmi->ref_frame[1] + : mbmi->ref_frame[0] }; +#else assert(has_second_ref(mbmi)); const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] }; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF int_mv ref_mv[2]; int ite, ref; -#if CONFIG_DUAL_FILTER - InterpFilter interp_filter[4] = { - mbmi->interp_filter[0], mbmi->interp_filter[1], mbmi->interp_filter[2], - mbmi->interp_filter[3], - }; -#else - const InterpFilter interp_filter = mbmi->interp_filter; -#endif // CONFIG_DUAL_FILTER struct scale_factors sf; - struct macroblockd_plane *const pd = &xd->plane[0]; #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION // ic and ir are the 4x4 coordiantes of the sub8x8 at index "block" const int ic = block & 1; const int ir = (block - ic) >> 1; + struct macroblockd_plane *const pd = &xd->plane[0]; const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic; const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir; #if CONFIG_GLOBAL_MOTION int is_global[2]; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) { +#else for (ref = 0; ref < 2; ++ref) { +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF WarpedMotionParams *const wm = &xd->global_motion[xd->mi[0]->mbmi.ref_frame[ref]]; is_global[ref] = is_global_mv_block(xd->mi[0], block, wm->wmtype); } +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) is_global[1] = is_global[0]; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF #endif // CONFIG_GLOBAL_MOTION +#else // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION + (void)block; #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION // Do joint motion search in compound mode to get more accurate mv. @@ -5264,7 +6199,11 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, (void)ref_mv_sub8x8; #endif // CONFIG_EXT_INTER && CONFIG_CB4X4 +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) { +#else for (ref = 0; ref < 2; ++ref) { +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF #if CONFIG_EXT_INTER && !CONFIG_CB4X4 if (bsize < BLOCK_8X8 && ref_mv_sub8x8 != NULL) ref_mv[ref].as_int = ref_mv_sub8x8[ref]->as_int; @@ -5284,6 +6223,24 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, } } +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) { + assert(is_inter_singleref_comp_mode(mbmi->mode)); + // NOTE: For single ref comp mode, set up the 2nd set of ref_mv/pre_planes + // all from the 1st reference frame, i.e. refs[0]. + ref_mv[1] = x->mbmi_ext->ref_mvs[refs[0]][0]; + if (scaled_ref_frame[0]) { + int i; + // Swap out the reference frame for a version that's been scaled to + // match the resolution of the current frame, allowing the existing + // motion search code to be used without additional modifications. + for (i = 0; i < MAX_MB_PLANE; i++) + backup_yv12[1][i] = xd->plane[i].pre[1]; + av1_setup_pre_planes(xd, 1, scaled_ref_frame[0], mi_row, mi_col, NULL); + } + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // Since we have scaled the reference frames to match the size of the current // frame we must use a unit scaling factor during mode selection. #if CONFIG_HIGHBITDEPTH @@ -5294,9 +6251,16 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, cm->height); #endif // CONFIG_HIGHBITDEPTH - // Allow joint search multiple times iteratively for each reference frame - // and break out of the search loop if it couldn't find a better mv. +// Allow joint search multiple times iteratively for each reference frame +// and break out of the search loop if it couldn't find a better mv. +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + const int num_ites = + (has_second_ref(mbmi) || mbmi->mode == SR_NEW_NEWMV) ? 4 : 1; + const int start_ite = has_second_ref(mbmi) ? 0 : 1; + for (ite = start_ite; ite < (start_ite + num_ites); ite++) { +#else for (ite = 0; ite < 4; ite++) { +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF struct buf_2d ref_yv12[2]; int bestsme = INT_MAX; int sadpb = x->sadperbit16; @@ -5308,7 +6272,7 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, // odd iterations search in the second. The predictor // found for the 'other' reference frame is factored in. const int plane = 0; - ConvolveParams conv_params = get_conv_params(0, plane); + ConvolveParams conv_params = get_conv_params(!id, 0, plane); #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION WarpTypesAllowed warp_types; #if CONFIG_GLOBAL_MOTION @@ -5323,21 +6287,24 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, ref_yv12[0] = xd->plane[plane].pre[0]; ref_yv12[1] = xd->plane[plane].pre[1]; -#if CONFIG_DUAL_FILTER - // reload the filter types - interp_filter[0] = - (id == 0) ? mbmi->interp_filter[2] : mbmi->interp_filter[0]; - interp_filter[1] = - (id == 0) ? mbmi->interp_filter[3] : mbmi->interp_filter[1]; -#endif // CONFIG_DUAL_FILTER - // Get the prediction block from the 'other' reference frame. +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + MV *const the_other_mv = (has_second_ref(mbmi) || id) + ? &frame_mv[refs[!id]].as_mv + : &frame_comp_mv[refs[0]].as_mv; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16); av1_highbd_build_inter_predictor( ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw, - &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0, interp_filter, +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + the_other_mv, +#else // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF) + &frame_mv[refs[!id]].as_mv, +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + &sf, pw, ph, 0, mbmi->interp_filter, #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION &warp_types, p_col, p_row, #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION @@ -5347,7 +6314,12 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, #endif // CONFIG_HIGHBITDEPTH av1_build_inter_predictor( ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw, - &frame_mv[refs[!id]].as_mv, &sf, pw, ph, &conv_params, interp_filter, +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + the_other_mv, +#else // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF) + &frame_mv[refs[!id]].as_mv, +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + &sf, pw, ph, &conv_params, mbmi->interp_filter, #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION &warp_types, p_col, p_row, plane, !id, #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION @@ -5360,13 +6332,24 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, if (id) xd->plane[plane].pre[0] = ref_yv12[id]; av1_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv); - // Use the mv result from the single mode as mv predictor. - *best_mv = frame_mv[refs[id]].as_mv; +// Use the mv result from the single mode as mv predictor. +// Use the mv result from the single mode as mv predictor. +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi) && id) + *best_mv = frame_comp_mv[refs[0]].as_mv; + else +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + *best_mv = frame_mv[refs[id]].as_mv; best_mv->col >>= 3; best_mv->row >>= 3; - av1_set_mvcost(x, refs[id], id, mbmi->ref_mv_idx); +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) + av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx); + else +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + av1_set_mvcost(x, refs[id], id, mbmi->ref_mv_idx); // Small-range full-pixel motion search. bestsme = @@ -5392,60 +6375,33 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, if (bestsme < INT_MAX) { int dis; /* TODO: use dis in distortion calculation later. */ unsigned int sse; - if (cpi->sf.use_upsampled_references) { - // Use up-sampled reference frames. - struct buf_2d backup_pred = pd->pre[0]; - const YV12_BUFFER_CONFIG *upsampled_ref = - get_upsampled_ref(cpi, refs[id]); - - // Set pred for Y plane - setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer, - upsampled_ref->y_crop_width, - upsampled_ref->y_crop_height, upsampled_ref->y_stride, - (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x, - pd->subsampling_y); - -// If bsize < BLOCK_8X8, adjust pred pointer for this block -#if !CONFIG_CB4X4 - if (bsize < BLOCK_8X8) - pd->pre[0].buf = - &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block, - pd->pre[0].stride)) - << 3]; -#endif // !CONFIG_CB4X4 - - bestsme = cpi->find_fractional_mv_step( - x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv, - x->errorperbit, &cpi->fn_ptr[bsize], 0, - cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost, - &dis, &sse, second_pred, -#if CONFIG_EXT_INTER - mask, mask_stride, id, -#endif - pw, ph, 1); - - // Restore the reference frames. - pd->pre[0] = backup_pred; - } else { - (void)block; - bestsme = cpi->find_fractional_mv_step( - x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv, - x->errorperbit, &cpi->fn_ptr[bsize], 0, - cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost, - &dis, &sse, second_pred, + bestsme = cpi->find_fractional_mv_step( + x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv, + x->errorperbit, &cpi->fn_ptr[bsize], 0, + cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost, + &dis, &sse, second_pred, #if CONFIG_EXT_INTER - mask, mask_stride, id, + mask, mask_stride, id, #endif - pw, ph, 0); - } + pw, ph, cpi->sf.use_upsampled_references); } // Restore the pointer to the first (possibly scaled) prediction buffer. if (id) xd->plane[plane].pre[0] = ref_yv12[0]; if (bestsme < last_besterr[id]) { - frame_mv[refs[id]].as_mv = *best_mv; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // NOTE: For single ref comp mode, frame_mv stores the first mv and + // frame_comp_mv stores the second mv. + if (!has_second_ref(mbmi) && id) + frame_comp_mv[refs[0]].as_mv = *best_mv; + else +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + frame_mv[refs[id]].as_mv = *best_mv; last_besterr[id] = bestsme; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) last_besterr[!id] = last_besterr[id]; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF } else { break; } @@ -5453,40 +6409,92 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, *rate_mv = 0; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) { +#else for (ref = 0; ref < 2; ++ref) { +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF if (scaled_ref_frame[ref]) { // Restore the prediction frame pointers to their unscaled versions. int i; for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[ref] = backup_yv12[ref][i]; } - av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx); + +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) + av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx); + else +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx); + +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) { + // NOTE: For single ref comp mode, i.e. !has_second_ref(mbmi) is true, the + // first mv is stored in frame_mv[] and the second mv is stored in + // frame_comp_mv[]. + if (compound_ref0_mode(mbmi->mode) == NEWMV) // SR_NEW_NEWMV + *rate_mv += av1_mv_bit_cost(&frame_mv[refs[0]].as_mv, + &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + assert(compound_ref1_mode(mbmi->mode) == NEWMV); + *rate_mv += av1_mv_bit_cost(&frame_comp_mv[refs[0]].as_mv, + &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + } else { +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF #if CONFIG_EXT_INTER && !CONFIG_CB4X4 - if (bsize >= BLOCK_8X8) + if (bsize >= BLOCK_8X8) #endif // CONFIG_EXT_INTER && !CONFIG_CB4X4 - *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv, - &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv, - x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv, + &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); #if CONFIG_EXT_INTER && !CONFIG_CB4X4 - else - *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv, - &ref_mv_sub8x8[ref]->as_mv, x->nmvjointcost, - x->mvcost, MV_COST_WEIGHT); + else + *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv, + &ref_mv_sub8x8[ref]->as_mv, x->nmvjointcost, + x->mvcost, MV_COST_WEIGHT); #endif // CONFIG_EXT_INTER && !CONFIG_CB4X4 +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF } + +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) { + if (scaled_ref_frame[0]) { + // Restore the prediction frame pointers to their unscaled versions. + int i; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[1] = backup_yv12[1][i]; + } + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF } -static void estimate_ref_frame_costs(const AV1_COMMON *cm, - const MACROBLOCKD *xd, int segment_id, - unsigned int *ref_costs_single, - unsigned int *ref_costs_comp, - aom_prob *comp_mode_p) { +static void estimate_ref_frame_costs( + const AV1_COMMON *cm, const MACROBLOCKD *xd, int segment_id, + unsigned int *ref_costs_single, +#if CONFIG_EXT_COMP_REFS + unsigned int (*ref_costs_comp)[TOTAL_REFS_PER_FRAME], +#else + unsigned int *ref_costs_comp, +#endif // CONFIG_EXT_COMP_REFS + aom_prob *comp_mode_p) { int seg_ref_active = segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME); if (seg_ref_active) { memset(ref_costs_single, 0, TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_single)); +#if CONFIG_EXT_COMP_REFS + int ref_frame; + for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame) + memset(ref_costs_comp[ref_frame], 0, + TOTAL_REFS_PER_FRAME * sizeof((*ref_costs_comp)[0])); +#else memset(ref_costs_comp, 0, TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_comp)); +#endif // CONFIG_EXT_COMP_REFS + *comp_mode_p = 128; } else { aom_prob intra_inter_p = av1_get_intra_inter_prob(cm, xd); @@ -5541,7 +6549,7 @@ static void estimate_ref_frame_costs(const AV1_COMMON *cm, ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p5, 0); ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p5, 1); -#else +#else // !CONFIG_EXT_REFS ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0); ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 1); ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p1, 1); @@ -5570,6 +6578,63 @@ static void estimate_ref_frame_costs(const AV1_COMMON *cm, unsigned int base_cost = av1_cost_bit(intra_inter_p, 1); +#if CONFIG_EXT_COMP_REFS + aom_prob comp_ref_type_p = av1_get_comp_reference_type_prob(cm, xd); + unsigned int ref_bicomp_costs[TOTAL_REFS_PER_FRAME] = { 0 }; + + ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] = + ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] = +#if USE_UNI_COMP_REFS + base_cost + av1_cost_bit(comp_ref_type_p, 1); +#else + base_cost; +#endif // USE_UNI_COMP_REFS + ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF_FRAME] = 0; + + ref_bicomp_costs[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0); + ref_bicomp_costs[LAST2_FRAME] += av1_cost_bit(ref_comp_p, 0); + ref_bicomp_costs[LAST3_FRAME] += av1_cost_bit(ref_comp_p, 1); + ref_bicomp_costs[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1); + + ref_bicomp_costs[LAST_FRAME] += av1_cost_bit(ref_comp_p1, 1); + ref_bicomp_costs[LAST2_FRAME] += av1_cost_bit(ref_comp_p1, 0); + + ref_bicomp_costs[LAST3_FRAME] += av1_cost_bit(ref_comp_p2, 0); + ref_bicomp_costs[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p2, 1); + + ref_bicomp_costs[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0); + ref_bicomp_costs[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1); + + int ref0; + for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) { + ref_costs_comp[ref0][BWDREF_FRAME] = + ref_bicomp_costs[ref0] + ref_bicomp_costs[BWDREF_FRAME]; + ref_costs_comp[ref0][ALTREF_FRAME] = + ref_bicomp_costs[ref0] + ref_bicomp_costs[ALTREF_FRAME]; + } + + aom_prob uni_comp_ref_p = av1_get_pred_prob_uni_comp_ref_p(cm, xd); + aom_prob uni_comp_ref_p1 = av1_get_pred_prob_uni_comp_ref_p1(cm, xd); + aom_prob uni_comp_ref_p2 = av1_get_pred_prob_uni_comp_ref_p2(cm, xd); + + ref_costs_comp[LAST_FRAME][LAST2_FRAME] = + base_cost + av1_cost_bit(comp_ref_type_p, 0) + + av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 0); + ref_costs_comp[LAST_FRAME][LAST3_FRAME] = + base_cost + av1_cost_bit(comp_ref_type_p, 0) + + av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 1) + + av1_cost_bit(uni_comp_ref_p2, 0); + ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = + base_cost + av1_cost_bit(comp_ref_type_p, 0) + + av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 1) + + av1_cost_bit(uni_comp_ref_p2, 1); + + ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = + base_cost + av1_cost_bit(comp_ref_type_p, 0) + + av1_cost_bit(uni_comp_ref_p, 1); + +#else // !CONFIG_EXT_COMP_REFS + ref_costs_comp[LAST_FRAME] = #if CONFIG_EXT_REFS ref_costs_comp[LAST2_FRAME] = ref_costs_comp[LAST3_FRAME] = @@ -5596,11 +6661,23 @@ static void estimate_ref_frame_costs(const AV1_COMMON *cm, // more bit. ref_costs_comp[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0); ref_costs_comp[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1); -#else +#else // !CONFIG_EXT_REFS ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0); ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1); #endif // CONFIG_EXT_REFS +#endif // CONFIG_EXT_COMP_REFS } else { +#if CONFIG_EXT_COMP_REFS + int ref0; + for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) { + ref_costs_comp[ref0][BWDREF_FRAME] = 512; + ref_costs_comp[ref0][ALTREF_FRAME] = 512; + } + ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512; + ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512; + ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512; + ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512; +#else // !CONFIG_EXT_COMP_REFS ref_costs_comp[LAST_FRAME] = 512; #if CONFIG_EXT_REFS ref_costs_comp[LAST2_FRAME] = 512; @@ -5609,6 +6686,7 @@ static void estimate_ref_frame_costs(const AV1_COMMON *cm, ref_costs_comp[ALTREF_FRAME] = 512; #endif // CONFIG_EXT_REFS ref_costs_comp[GOLDEN_FRAME] = 512; +#endif // CONFIG_EXT_COMP_REFS } } } @@ -5693,8 +6771,13 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, int sadpb = x->sadperbit16; MV mvp_full; #if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + int ref = + has_second_ref(mbmi) ? mbmi->ref_frame[ref_idx] : mbmi->ref_frame[0]; +#else // !CONFIG_COMPOUND_SINGLEREF int ref = mbmi->ref_frame[ref_idx]; -#else +#endif // CONFIG_COMPOUND_SINGLEREF +#else // !CONFIG_EXT_INTER int ref = mbmi->ref_frame[0]; int ref_idx = 0; #endif // CONFIG_EXT_INTER @@ -5802,7 +6885,7 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv, &(x->best_mv.as_mv), 0); break; - default: assert("Invalid motion mode!\n"); + default: assert(0 && "Invalid motion mode!\n"); } #endif // CONFIG_MOTION_VAR @@ -5820,17 +6903,6 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, x->second_best_mv.as_int != x->best_mv.as_int; const int pw = block_size_wide[bsize]; const int ph = block_size_high[bsize]; - // Use up-sampled reference frames. - struct macroblockd_plane *const pd = &xd->plane[0]; - struct buf_2d backup_pred = pd->pre[ref_idx]; - const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref); - - // Set pred for Y plane - setup_pred_plane( - &pd->pre[ref_idx], bsize, upsampled_ref->y_buffer, - upsampled_ref->y_crop_width, upsampled_ref->y_crop_height, - upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), NULL, - pd->subsampling_x, pd->subsampling_y); best_mv_var = cpi->find_fractional_mv_step( x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit, @@ -5873,9 +6945,6 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, x->best_mv.as_mv = best_mv; } } - - // Restore the reference frames. - pd->pre[ref_idx] = backup_pred; } else { cpi->find_fractional_mv_step( x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit, @@ -5891,13 +6960,12 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, break; case OBMC_CAUSAL: av1_find_best_obmc_sub_pixel_tree_up( - cpi, x, mi_row, mi_col, &x->best_mv.as_mv, &ref_mv, - cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step, - x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], 0, - cpi->sf.use_upsampled_references); + x, &x->best_mv.as_mv, &ref_mv, cm->allow_high_precision_mv, + x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, + cpi->sf.mv.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &dis, + &x->pred_sse[ref], 0, cpi->sf.use_upsampled_references); break; - default: assert("Invalid motion mode!\n"); + default: assert(0 && "Invalid motion mode!\n"); } #endif // CONFIG_MOTION_VAR } @@ -5936,15 +7004,12 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x, const int ph = block_size_high[bsize]; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; +#if CONFIG_COMPOUND_SINGLEREF + const int other_ref = + has_second_ref(mbmi) ? mbmi->ref_frame[!ref_idx] : mbmi->ref_frame[0]; +#else // !CONFIG_COMPOUND_SINGLEREF const int other_ref = mbmi->ref_frame[!ref_idx]; -#if CONFIG_DUAL_FILTER - InterpFilter interp_filter[2] = { - (ref_idx == 0) ? mbmi->interp_filter[2] : mbmi->interp_filter[0], - (ref_idx == 0) ? mbmi->interp_filter[3] : mbmi->interp_filter[1] - }; -#else - const InterpFilter interp_filter = mbmi->interp_filter; -#endif // CONFIG_DUAL_FILTER +#endif // CONFIG_COMPOUND_SINGLEREF struct scale_factors sf; #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION struct macroblockd_plane *const pd = &xd->plane[0]; @@ -5961,8 +7026,12 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x, (void)block; #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION - // This function should only ever be called for compound modes +// This function should only ever be called for compound modes +#if CONFIG_COMPOUND_SINGLEREF + assert(has_second_ref(mbmi) || is_inter_singleref_comp_mode(mbmi->mode)); +#else // !CONFIG_COMPOUND_SINGLEREF assert(has_second_ref(mbmi)); +#endif // CONFIG_COMPOUND_SINGLEREF struct buf_2d backup_yv12[MAX_MB_PLANE]; const YV12_BUFFER_CONFIG *const scaled_ref_frame = @@ -5991,7 +7060,7 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x, struct buf_2d ref_yv12; const int plane = 0; - ConvolveParams conv_params = get_conv_params(0, plane); + ConvolveParams conv_params = get_conv_params(!ref_idx, 0, plane); #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION WarpTypesAllowed warp_types; #if CONFIG_GLOBAL_MOTION @@ -6010,7 +7079,7 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x, if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { av1_highbd_build_inter_predictor( ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph, - 0, interp_filter, + 0, mbmi->interp_filter, #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION &warp_types, p_col, p_row, #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION @@ -6019,7 +7088,7 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x, #endif // CONFIG_HIGHBITDEPTH av1_build_inter_predictor( ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph, - &conv_params, interp_filter, + &conv_params, mbmi->interp_filter, #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION &warp_types, p_col, p_row, plane, !ref_idx, #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION @@ -6038,15 +7107,22 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x, // Search for the best mv for one component of a compound, // given that the other component is fixed. -static void compound_single_motion_search( - const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *this_mv, - int mi_row, int mi_col, const uint8_t *second_pred, const uint8_t *mask, - int mask_stride, int *rate_mv, const int block, int ref_idx) { +static void compound_single_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, MV *this_mv, + int mi_row, int mi_col, + const uint8_t *second_pred, + const uint8_t *mask, int mask_stride, + int *rate_mv, int ref_idx) { const int pw = block_size_wide[bsize]; const int ph = block_size_high[bsize]; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; +#if CONFIG_COMPOUND_SINGLEREF + const int ref = + has_second_ref(mbmi) ? mbmi->ref_frame[ref_idx] : mbmi->ref_frame[0]; +#else const int ref = mbmi->ref_frame[ref_idx]; +#endif // CONFIG_COMPOUND_SINGLEREF int_mv ref_mv = x->mbmi_ext->ref_mvs[ref][0]; struct macroblockd_plane *const pd = &xd->plane[0]; @@ -6054,9 +7130,16 @@ static void compound_single_motion_search( const YV12_BUFFER_CONFIG *const scaled_ref_frame = av1_get_scaled_ref_frame(cpi, ref); - // Check that this is either an interinter or an interintra block +// Check that this is either an interinter or an interintra block +#if CONFIG_COMPOUND_SINGLEREF assert(has_second_ref(mbmi) || + // or a single ref comp pred mode + is_inter_singleref_comp_mode(mbmi->mode) || (ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME)); +#else + assert(has_second_ref(mbmi) || + (ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME)); +#endif // CONFIG_COMPOUND_SINGLEREF if (scaled_ref_frame) { int i; @@ -6091,7 +7174,12 @@ static void compound_single_motion_search( best_mv->col >>= 3; best_mv->row >>= 3; - av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx); +#if CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) + av1_set_mvcost(x, ref, 0, mbmi->ref_mv_idx); + else +#endif // CONFIG_COMPOUND_SINGLEREF + av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx); // Small-range full-pixel motion search. bestsme = av1_refining_search_8p_c(x, sadpb, search_range, @@ -6112,43 +7200,11 @@ static void compound_single_motion_search( if (bestsme < INT_MAX) { int dis; /* TODO: use dis in distortion calculation later. */ unsigned int sse; - if (cpi->sf.use_upsampled_references) { - // Use up-sampled reference frames. - struct buf_2d backup_pred = pd->pre[0]; - const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref); - - // Set pred for Y plane - setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer, - upsampled_ref->y_crop_width, - upsampled_ref->y_crop_height, upsampled_ref->y_stride, - (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x, - pd->subsampling_y); - -// If bsize < BLOCK_8X8, adjust pred pointer for this block -#if !CONFIG_CB4X4 - if (bsize < BLOCK_8X8) - pd->pre[0].buf = - &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block, - pd->pre[0].stride)) - << 3]; -#endif // !CONFIG_CB4X4 - - bestsme = cpi->find_fractional_mv_step( - x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, - &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL, - x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, - mask_stride, ref_idx, pw, ph, 1); - - // Restore the reference frames. - pd->pre[0] = backup_pred; - } else { - (void)block; - bestsme = cpi->find_fractional_mv_step( - x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, - &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL, - x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, - mask_stride, ref_idx, pw, ph, 0); - } + bestsme = cpi->find_fractional_mv_step( + x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, + &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL, + x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, mask_stride, + ref_idx, pw, ph, cpi->sf.use_upsampled_references); } // Restore the pointer to the first (possibly scaled) prediction buffer. @@ -6165,7 +7221,12 @@ static void compound_single_motion_search( xd->plane[i].pre[ref_idx] = backup_yv12[i]; } - av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx); +#if CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) + av1_set_mvcost(x, ref, 0, mbmi->ref_mv_idx); + else +#endif // CONFIG_COMPOUND_SINGLEREF + av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx); *rate_mv += av1_mv_bit_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); } @@ -6174,13 +7235,23 @@ static void compound_single_motion_search( // where the second prediction is also an inter mode. static void compound_single_motion_search_interinter( const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv, +#if CONFIG_COMPOUND_SINGLEREF + int_mv *frame_comp_mv, +#endif // CONFIG_COMPOUND_SINGLEREF int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv, const int block, int ref_idx) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - // This function should only ever be called for compound modes +// This function should only ever be called for compound modes +#if CONFIG_COMPOUND_SINGLEREF + int is_singleref_comp_mode = + !has_second_ref(mbmi) && is_inter_singleref_comp_mode(mbmi->mode); + assert(has_second_ref(mbmi) || is_singleref_comp_mode); + if (is_singleref_comp_mode && ref_idx) assert(frame_comp_mv); +#else // !CONFIG_COMPOUND_SINGLEREF assert(has_second_ref(mbmi)); +#endif // CONFIG_COMPOUND_SINGLEREF // Prediction buffer from second frame. #if CONFIG_HIGHBITDEPTH @@ -6194,14 +7265,26 @@ static void compound_single_motion_search_interinter( DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]); #endif // CONFIG_HIGHBITDEPTH +#if CONFIG_COMPOUND_SINGLEREF + MV *this_mv = has_second_ref(mbmi) + ? &frame_mv[mbmi->ref_frame[ref_idx]].as_mv + : (ref_idx ? &frame_comp_mv[mbmi->ref_frame[0]].as_mv + : &frame_mv[mbmi->ref_frame[0]].as_mv); + const MV *other_mv = + has_second_ref(mbmi) + ? &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv + : (ref_idx ? &frame_mv[mbmi->ref_frame[0]].as_mv + : &frame_comp_mv[mbmi->ref_frame[0]].as_mv); +#else // !CONFIG_COMPOUND_SINGLEREF MV *this_mv = &frame_mv[mbmi->ref_frame[ref_idx]].as_mv; const MV *other_mv = &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv; +#endif // CONFIG_COMPOUND_SINGLEREF build_second_inter_pred(cpi, x, bsize, other_mv, mi_row, mi_col, block, ref_idx, second_pred); compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col, - second_pred, mask, mask_stride, rate_mv, block, + second_pred, mask, mask_stride, rate_mv, ref_idx); } @@ -6220,21 +7303,40 @@ static void do_masked_motion_search_indexed( mask = av1_get_compound_type_mask(comp_data, sb_type); int_mv frame_mv[TOTAL_REFS_PER_FRAME]; +#if CONFIG_COMPOUND_SINGLEREF + int_mv frame_comp_mv[TOTAL_REFS_PER_FRAME]; +#endif // CONFIG_COMPOUND_SINGLEREF MV_REFERENCE_FRAME rf[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] }; assert(bsize >= BLOCK_8X8 || CONFIG_CB4X4); frame_mv[rf[0]].as_int = cur_mv[0].as_int; - frame_mv[rf[1]].as_int = cur_mv[1].as_int; +#if CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) + frame_comp_mv[rf[0]].as_int = cur_mv[1].as_int; + else +#endif // CONFIG_COMPOUND_SINGLEREF + frame_mv[rf[1]].as_int = cur_mv[1].as_int; if (which == 0 || which == 1) { - compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, mi_row, - mi_col, mask, mask_stride, rate_mv, - 0, which); + compound_single_motion_search_interinter( + cpi, x, bsize, frame_mv, +#if CONFIG_COMPOUND_SINGLEREF + has_second_ref(mbmi) ? NULL : frame_comp_mv, +#endif // CONFIG_COMPOUND_SINGLEREF + mi_row, mi_col, mask, mask_stride, rate_mv, 0, which); } else if (which == 2) { - joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, mask, - mask_stride, rate_mv, 0); + joint_motion_search(cpi, x, bsize, frame_mv, +#if CONFIG_COMPOUND_SINGLEREF + has_second_ref(mbmi) ? NULL : frame_comp_mv, +#endif // CONFIG_COMPOUND_SINGLEREF + mi_row, mi_col, NULL, mask, mask_stride, rate_mv, 0); } tmp_mv[0].as_int = frame_mv[rf[0]].as_int; - tmp_mv[1].as_int = frame_mv[rf[1]].as_int; +#if CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) + tmp_mv[1].as_int = frame_comp_mv[rf[0]].as_int; + else // comp ref +#endif // CONFIG_COMPOUND_SINGLEREF + tmp_mv[1].as_int = frame_mv[rf[1]].as_int; } #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE #endif // CONFIG_EXT_INTER @@ -6483,7 +7585,7 @@ static int64_t pick_wedge(const AV1_COMP *const cpi, const MACROBLOCK *const x, sse = ROUND_POWER_OF_TWO(sse, bd_round); model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist); - rd = RDCOST(x->rdmult, x->rddiv, rate, dist); + rd = RDCOST(x->rdmult, rate, dist); if (rd < best_rd) { *best_wedge_index = wedge_index; @@ -6544,7 +7646,7 @@ static int64_t pick_wedge_fixed_sign( sse = ROUND_POWER_OF_TWO(sse, bd_round); model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist); - rd = RDCOST(x->rdmult, x->rddiv, rate, dist); + rd = RDCOST(x->rdmult, rate, dist); if (rd < best_rd) { *best_wedge_index = wedge_index; @@ -6646,7 +7748,7 @@ static int64_t pick_interinter_seg(const AV1_COMP *const cpi, sse = ROUND_POWER_OF_TWO(sse, bd_round); model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist); - rd0 = RDCOST(x->rdmult, x->rddiv, rate, dist); + rd0 = RDCOST(x->rdmult, rate, dist); if (rd0 < best_rd) { best_mask_type = cur_mask_type; @@ -6729,7 +7831,17 @@ static int interinter_compound_motion_search( #endif // CONFIG_COMPOUND_SEGMENT mbmi->interinter_compound_type }; - if (this_mode == NEW_NEWMV) { +#if CONFIG_COMPOUND_SINGLEREF + // NOTE: Mode is needed to identify the compound mode prediction, regardless + // of comp refs or single ref. + mbmi->mode = this_mode; +#endif // CONFIG_COMPOUND_SINGLEREF + + if (this_mode == NEW_NEWMV +#if CONFIG_COMPOUND_SINGLEREF + || this_mode == SR_NEW_NEWMV +#endif // CONFIG_COMPOUND_SINGLEREF + ) { do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize, mi_row, mi_col, tmp_mv, &tmp_rate_mv, 2); mbmi->mv[0].as_int = tmp_mv[0].as_int; @@ -6738,7 +7850,12 @@ static int interinter_compound_motion_search( do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize, mi_row, mi_col, tmp_mv, &tmp_rate_mv, 0); mbmi->mv[0].as_int = tmp_mv[0].as_int; - } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) { + } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV +#if CONFIG_COMPOUND_SINGLEREF + // || this_mode == SR_NEAREST_NEWMV + || this_mode == SR_NEAR_NEWMV || this_mode == SR_ZERO_NEWMV +#endif // CONFIG_COMPOUND_SINGLEREF + ) { do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize, mi_row, mi_col, tmp_mv, &tmp_rate_mv, 1); mbmi->mv[1].as_int = tmp_mv[1].as_int; @@ -6763,7 +7880,7 @@ static int64_t build_and_cost_compound_type( const COMPOUND_TYPE compound_type = mbmi->interinter_compound_type; best_rd_cur = pick_interinter_mask(cpi, x, bsize, *preds0, *preds1); - best_rd_cur += RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv, 0); + best_rd_cur += RDCOST(x->rdmult, rs2 + rate_mv, 0); if (have_newmv_in_inter_mode(this_mode) && use_masked_motion_search(compound_type)) { @@ -6772,7 +7889,7 @@ static int64_t build_and_cost_compound_type( av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb); - rd = RDCOST(x->rdmult, x->rddiv, rs2 + *out_rate_mv + rate_sum, dist_sum); + rd = RDCOST(x->rdmult, rs2 + *out_rate_mv + rate_sum, dist_sum); if (rd >= best_rd_cur) { mbmi->mv[0].as_int = cur_mv[0].as_int; mbmi->mv[1].as_int = cur_mv[1].as_int; @@ -6788,7 +7905,7 @@ static int64_t build_and_cost_compound_type( rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX); if (rd != INT64_MAX) - rd = RDCOST(x->rdmult, x->rddiv, rs2 + *out_rate_mv + rate_sum, dist_sum); + rd = RDCOST(x->rdmult, rs2 + *out_rate_mv + rate_sum, dist_sum); best_rd_cur = rd; } else { @@ -6801,7 +7918,7 @@ static int64_t build_and_cost_compound_type( rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX); if (rd != INT64_MAX) - rd = RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum); + rd = RDCOST(x->rdmult, rs2 + rate_mv + rate_sum, dist_sum); best_rd_cur = rd; } return best_rd_cur; @@ -6832,6 +7949,9 @@ typedef struct { static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, const BLOCK_SIZE bsize, int_mv (*const mode_mv)[TOTAL_REFS_PER_FRAME], +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + int_mv (*const mode_comp_mv)[TOTAL_REFS_PER_FRAME], +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF const int mi_row, const int mi_col, int *const rate_mv, int_mv *const single_newmv, HandleInterModeArgs *const args) { @@ -6844,6 +7964,9 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME); #endif // CONFIG_EXT_INTER int_mv *const frame_mv = mode_mv[this_mode]; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + int_mv *const frame_comp_mv = mode_comp_mv[this_mode]; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] }; int i; @@ -6861,8 +7984,11 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { - joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, NULL, - 0, rate_mv, 0); + joint_motion_search(cpi, x, bsize, frame_mv, +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + NULL, // int_mv *frame_comp_mv +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + mi_row, mi_col, NULL, NULL, 0, rate_mv, 0); } else { *rate_mv = 0; for (i = 0; i < 2; ++i) { @@ -6877,8 +8003,12 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { frame_mv[refs[0]].as_int = mode_mv[compound_ref0_mode(this_mode)][refs[0]].as_int; - compound_single_motion_search_interinter( - cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 1); + compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, +#if CONFIG_COMPOUND_SINGLEREF + NULL, +#endif // CONFIG_COMPOUND_SINGLEREF + mi_row, mi_col, NULL, 0, + rate_mv, 0, 1); } else { av1_set_mvcost(x, refs[1], 1, mbmi->ref_mv_idx); *rate_mv = av1_mv_bit_cost(&frame_mv[refs[1]].as_mv, @@ -6891,8 +8021,12 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { frame_mv[refs[1]].as_int = mode_mv[compound_ref1_mode(this_mode)][refs[1]].as_int; - compound_single_motion_search_interinter( - cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 0); + compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, +#if CONFIG_COMPOUND_SINGLEREF + NULL, +#endif // CONFIG_COMPOUND_SINGLEREF + mi_row, mi_col, NULL, 0, + rate_mv, 0, 0); } else { av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx); *rate_mv = av1_mv_bit_cost(&frame_mv[refs[0]].as_mv, @@ -6900,7 +8034,7 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); } } -#else +#else // !CONFIG_EXT_INTER // Initialize mv using single prediction mode result. frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; @@ -6917,6 +8051,41 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, } } #endif // CONFIG_EXT_INTER +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + } else if (is_inter_singleref_comp_mode(this_mode)) { + // Single ref comp mode + const int mode0 = compound_ref0_mode(this_mode); + + single_newmv[refs[0]].as_int = args->single_newmv[refs[0]].as_int; + frame_mv[refs[0]].as_int = (mode0 == NEWMV) + ? single_newmv[refs[0]].as_int + : mode_mv[mode0][refs[0]].as_int; + assert(compound_ref1_mode(this_mode) == NEWMV); + frame_comp_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; + + if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { + if (this_mode == SR_NEW_NEWMV) { + joint_motion_search(cpi, x, bsize, frame_mv, frame_comp_mv, mi_row, + mi_col, NULL, NULL, 0, rate_mv, 0); + } else { + assert( // this_mode == SR_NEAREST_NEWMV || + this_mode == SR_NEAR_NEWMV || this_mode == SR_ZERO_NEWMV); + compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, + frame_comp_mv, mi_row, mi_col, + NULL, 0, rate_mv, 0, 1); + } + } else { + *rate_mv = 0; + av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx); + if (mode0 == NEWMV) + *rate_mv += av1_mv_bit_cost(&frame_mv[refs[0]].as_mv, + &mbmi_ext->ref_mvs[refs[0]][0].as_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + *rate_mv += av1_mv_bit_cost(&frame_comp_mv[refs[0]].as_mv, + &mbmi_ext->ref_mvs[refs[0]][0].as_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF } else { #if CONFIG_EXT_INTER if (is_comp_interintra_pred) { @@ -6984,7 +8153,7 @@ int64_t interpolation_filter_search( av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist, skip_txfm_sb, skip_sse_sb); - *rd = RDCOST(x->rdmult, x->rddiv, *switchable_rate + tmp_rate, tmp_dist); + *rd = RDCOST(x->rdmult, *switchable_rate + tmp_rate, tmp_dist); if (assign_filter == SWITCHABLE) { // do interp_filter search @@ -7020,7 +8189,7 @@ int64_t interpolation_filter_search( av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist, &tmp_skip_sb, &tmp_skip_sse); - tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rs + tmp_rate, tmp_dist); + tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist); if (tmp_rd < *rd) { *rd = tmp_rd; @@ -7072,12 +8241,10 @@ static int64_t motion_mode_rd( int mi_col, HandleInterModeArgs *const args, const int64_t ref_best_rd, const int *refs, int rate_mv, #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION + // only used when WARPED_MOTION is on? int_mv *const single_newmv, #if CONFIG_EXT_INTER - int rate2_bmc_nocoeff, MB_MODE_INFO *best_bmc_mbmi, -#if CONFIG_MOTION_VAR - int rate_mv_bmc, -#endif // CONFIG_MOTION_VAR + int rate2_bmc_nocoeff, MB_MODE_INFO *best_bmc_mbmi, int rate_mv_bmc, #endif // CONFIG_EXT_INTER #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION int rs, int *skip_txfm_sb, int64_t *skip_sse_sb, BUFFER_SET *orig_dst) { @@ -7108,7 +8275,13 @@ static int64_t motion_mode_rd( #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION #if CONFIG_WARPED_MOTION +#if WARPED_MOTION_SORT_SAMPLES + int pts0[SAMPLES_ARRAY_SIZE], pts_inref0[SAMPLES_ARRAY_SIZE]; + int pts_mv0[SAMPLES_ARRAY_SIZE]; + int total_samples; +#else int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; +#endif // WARPED_MOTION_SORT_SAMPLES #endif // CONFIG_WARPED_MOTION #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION @@ -7118,18 +8291,39 @@ static int64_t motion_mode_rd( if (cm->interp_filter == SWITCHABLE) rd_stats->rate += rs; #if CONFIG_WARPED_MOTION aom_clear_system_state(); +#if WARPED_MOTION_SORT_SAMPLES + mbmi->num_proj_ref[0] = + findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0, pts_mv0); + total_samples = mbmi->num_proj_ref[0]; +#else mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref); +#endif // WARPED_MOTION_SORT_SAMPLES #if CONFIG_EXT_INTER best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0]; #endif // CONFIG_EXT_INTER #endif // CONFIG_WARPED_MOTION #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION rate2_nocoeff = rd_stats->rate; +#if CONFIG_NCOBMC_ADAPT_WEIGHT + // We cannot estimate the rd cost for the motion mode NCOBMC_ADAPT_WEIGHT + // right now since it requires mvs from all neighboring blocks. We will + // check if this mode is beneficial after all the mv's in the current + // superblock are selected. + last_motion_mode_allowed = motion_mode_allowed_wrapper(1, +#if CONFIG_GLOBAL_MOTION + 0, xd->global_motion, +#endif // CONFIG_GLOBAL_MOTION + mi); +#else last_motion_mode_allowed = motion_mode_allowed( -#if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#if CONFIG_GLOBAL_MOTION 0, xd->global_motion, -#endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#endif // CONFIG_GLOBAL_MOTION +#if CONFIG_WARPED_MOTION + xd, +#endif mi); +#endif // CONFIG_NCOBMC_ADAPT_WEIGHT base_mbmi = *mbmi; #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION @@ -7155,7 +8349,11 @@ static int64_t motion_mode_rd( *mbmi = *best_bmc_mbmi; mbmi->motion_mode = OBMC_CAUSAL; #endif // CONFIG_EXT_INTER - if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) { + if (!is_comp_pred && +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + !is_inter_singleref_comp_mode(this_mode) && +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + have_newmv_in_inter_mode(this_mode)) { int tmp_rate_mv = 0; single_motion_search(cpi, x, bsize, mi_row, mi_col, @@ -7195,6 +8393,9 @@ static int64_t motion_mode_rd( #if CONFIG_WARPED_MOTION if (mbmi->motion_mode == WARPED_CAUSAL) { +#if WARPED_MOTION_SORT_SAMPLES + int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; +#endif // WARPED_MOTION_SORT_SAMPLES #if CONFIG_EXT_INTER *mbmi = *best_bmc_mbmi; mbmi->motion_mode = WARPED_CAUSAL; @@ -7210,6 +8411,19 @@ static int64_t motion_mode_rd( : cm->interp_filter; #endif // CONFIG_DUAL_FILTER +#if WARPED_MOTION_SORT_SAMPLES + memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0)); + memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0)); + // Rank the samples by motion vector difference + if (mbmi->num_proj_ref[0] > 1) { + mbmi->num_proj_ref[0] = sortSamples(pts_mv0, &mbmi->mv[0].as_mv, pts, + pts_inref, mbmi->num_proj_ref[0]); +#if CONFIG_EXT_INTER + best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0]; +#endif // CONFIG_EXT_INTER + } +#endif // WARPED_MOTION_SORT_SAMPLES + if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize, mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col, &mbmi->wm_params[0], mi_row, mi_col)) { @@ -7218,9 +8432,16 @@ static int64_t motion_mode_rd( int tmp_rate_mv = 0; const int_mv mv0 = mbmi->mv[0]; WarpedMotionParams wm_params0 = mbmi->wm_params[0]; +#if WARPED_MOTION_SORT_SAMPLES + int num_proj_ref0 = mbmi->num_proj_ref[0]; // Refine MV in a small range. + av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts0, pts_inref0, + pts_mv0, total_samples); +#else + // Refine MV in a small range. av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts, pts_inref); +#endif // WARPED_MOTION_SORT_SAMPLES // Keep the refined MV and WM parameters. if (mv0.as_int != mbmi->mv[0].as_int) { @@ -7241,6 +8462,9 @@ static int64_t motion_mode_rd( tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1); } #if CONFIG_EXT_INTER +#if WARPED_MOTION_SORT_SAMPLES + best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0]; +#endif // WARPED_MOTION_SORT_SAMPLES tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv; #else tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv; @@ -7255,6 +8479,9 @@ static int64_t motion_mode_rd( // Restore the old MV and WM parameters. mbmi->mv[0] = mv0; mbmi->wm_params[0] = wm_params0; +#if WARPED_MOTION_SORT_SAMPLES + mbmi->num_proj_ref[0] = num_proj_ref0; +#endif // WARPED_MOTION_SORT_SAMPLES } } @@ -7328,8 +8555,8 @@ static int64_t motion_mode_rd( av1_merge_rd_stats(rd_stats, rd_stats_y); - rdcosty = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist); - rdcosty = AOMMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse)); + rdcosty = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); + rdcosty = AOMMIN(rdcosty, RDCOST(x->rdmult, 0, rd_stats->sse)); /* clang-format off */ #if CONFIG_VAR_TX is_cost_valid_uv = @@ -7365,12 +8592,11 @@ static int64_t motion_mode_rd( mbmi->skip = 0; // here mbmi->skip temporarily plays a role as what this_skip2 does } else if (!xd->lossless[mbmi->segment_id] && - (RDCOST(x->rdmult, x->rddiv, + (RDCOST(x->rdmult, rd_stats_y->rate + rd_stats_uv->rate + av1_cost_bit(av1_get_skip_prob(cm, xd), 0), rd_stats->dist) >= - RDCOST(x->rdmult, x->rddiv, - av1_cost_bit(av1_get_skip_prob(cm, xd), 1), + RDCOST(x->rdmult, av1_cost_bit(av1_get_skip_prob(cm, xd), 1), rd_stats->sse))) { rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate; rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 1); @@ -7427,7 +8653,7 @@ static int64_t motion_mode_rd( #endif // CONFIG_GLOBAL_MOTION #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION - tmp_rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist); + tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); if (mbmi->motion_mode == SIMPLE_TRANSLATION || (tmp_rd < best_rd)) { best_mbmi = *mbmi; best_rd = tmp_rd; @@ -7466,11 +8692,17 @@ static int64_t motion_mode_rd( return 0; } -static int64_t handle_inter_mode( - const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, - RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv, - int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row, - int mi_col, HandleInterModeArgs *args, const int64_t ref_best_rd) { +static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, RD_STATS *rd_stats, + RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv, + int *disable_skip, + int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + int_mv (*mode_comp_mv)[TOTAL_REFS_PER_FRAME], +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + int mi_row, int mi_col, + HandleInterModeArgs *args, + const int64_t ref_best_rd) { const AV1_COMMON *cm = &cpi->common; (void)cm; MACROBLOCKD *xd = &x->e_mbd; @@ -7479,7 +8711,14 @@ static int64_t handle_inter_mode( MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; const int is_comp_pred = has_second_ref(mbmi); const int this_mode = mbmi->mode; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + const int is_singleref_comp_mode = is_inter_singleref_comp_mode(this_mode); +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF int_mv *frame_mv = mode_mv[this_mode]; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // The comp mv for the compound mode in single ref + int_mv *frame_comp_mv = mode_comp_mv[this_mode]; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF int i; int refs[2] = { mbmi->ref_frame[0], (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; @@ -7487,7 +8726,7 @@ static int64_t handle_inter_mode( int rate_mv = 0; #if CONFIG_EXT_INTER int pred_exists = 1; -#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT +#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT || CONFIG_INTERINTRA const int bw = block_size_wide[bsize]; #endif // ONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT int_mv single_newmv[TOTAL_REFS_PER_FRAME]; @@ -7511,9 +8750,7 @@ static int64_t handle_inter_mode( #if CONFIG_EXT_INTER int rate2_bmc_nocoeff; MB_MODE_INFO best_bmc_mbmi; -#if CONFIG_MOTION_VAR int rate_mv_bmc; -#endif // CONFIG_MOTION_VAR #endif // CONFIG_EXT_INTER #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION int64_t rd = INT64_MAX; @@ -7523,6 +8760,11 @@ static int64_t handle_inter_mode( int skip_txfm_sb = 0; int64_t skip_sse_sb = INT64_MAX; int16_t mode_ctx; +#if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_MOTION_VAR + // dummy fillers + mbmi->ncobmc_mode[0] = NO_OVERLAP; + mbmi->ncobmc_mode[1] = NO_OVERLAP; +#endif #if CONFIG_EXT_INTER #if CONFIG_INTERINTRA @@ -7546,7 +8788,11 @@ static int64_t handle_inter_mode( #endif // CONFIG_EXT_INTER #if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + if (is_comp_pred || is_singleref_comp_mode) +#else // !CONFIG_COMPOUND_SINGLEREF if (is_comp_pred) +#endif // CONFIG_COMPOUND_SINGLEREF mode_ctx = mbmi_ext->compound_mode_context[refs[0]]; else #endif // CONFIG_EXT_INTER @@ -7572,12 +8818,22 @@ static int64_t handle_inter_mode( if (frame_mv[refs[0]].as_int == INVALID_MV || frame_mv[refs[1]].as_int == INVALID_MV) return INT64_MAX; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + } else if (is_singleref_comp_mode) { + if (frame_mv[refs[0]].as_int == INVALID_MV || + frame_comp_mv[refs[0]].as_int == INVALID_MV) + return INT64_MAX; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF } mbmi->motion_mode = SIMPLE_TRANSLATION; if (have_newmv_in_inter_mode(this_mode)) { - const int64_t ret_val = handle_newmv(cpi, x, bsize, mode_mv, mi_row, mi_col, - &rate_mv, single_newmv, args); + const int64_t ret_val = + handle_newmv(cpi, x, bsize, mode_mv, +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + mode_comp_mv, +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + mi_row, mi_col, &rate_mv, single_newmv, args); if (ret_val != 0) return ret_val; else @@ -7591,6 +8847,16 @@ static int64_t handle_inter_mode( mbmi->mv[i].as_int = cur_mv[i].as_int; } +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!is_comp_pred && is_singleref_comp_mode) { + cur_mv[1] = frame_comp_mv[refs[0]]; + // Clip "next_nearest" so that it does not extend to far out of image + if (this_mode != NEWMV) clamp_mv2(&cur_mv[1].as_mv, xd); + if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX; + mbmi->mv[1].as_int = cur_mv[1].as_int; + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + #if CONFIG_EXT_INTER if (this_mode == NEAREST_NEARESTMV) #else @@ -7614,7 +8880,13 @@ static int64_t handle_inter_mode( #if CONFIG_EXT_INTER if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) { - if (this_mode == NEAREST_NEWMV) { +#if CONFIG_COMPOUND_SINGLEREF + if (this_mode == NEAREST_NEWMV || // this_mode == SR_NEAREST_NEWMV || + this_mode == SR_NEAREST_NEARMV) +#else // !CONFIG_COMPOUND_SINGLEREF + if (this_mode == NEAREST_NEWMV) +#endif // CONFIG_COMPOUND_SINGLEREF + { cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv; lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv); @@ -7635,7 +8907,11 @@ static int64_t handle_inter_mode( if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) { int ref_mv_idx = mbmi->ref_mv_idx + 1; - if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEARMV) { + if (this_mode == NEAR_NEWMV || +#if CONFIG_COMPOUND_SINGLEREF + this_mode == SR_NEAR_NEWMV || +#endif // CONFIG_COMPOUND_SINGLEREF + this_mode == NEAR_NEARMV) { cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv; lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv); @@ -7644,8 +8920,17 @@ static int64_t handle_inter_mode( mbmi->mv[0].as_int = cur_mv[0].as_int; } - if (this_mode == NEW_NEARMV || this_mode == NEAR_NEARMV) { - cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv; + if (this_mode == NEW_NEARMV || +#if CONFIG_COMPOUND_SINGLEREF + this_mode == SR_NEAREST_NEARMV || +#endif // CONFIG_COMPOUND_SINGLEREF + this_mode == NEAR_NEARMV) { +#if CONFIG_COMPOUND_SINGLEREF + if (this_mode == SR_NEAREST_NEARMV) + cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv; + else +#endif // CONFIG_COMPOUND_SINGLEREF + cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv; lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv); clamp_mv2(&cur_mv[1].as_mv, xd); @@ -7653,7 +8938,7 @@ static int64_t handle_inter_mode( mbmi->mv[1].as_int = cur_mv[1].as_int; } } -#else +#else // !CONFIG_EXT_INTER if (this_mode == NEARMV && is_comp_pred) { uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) { @@ -7706,7 +8991,7 @@ static int64_t handle_inter_mode( rd_stats->rate += cost_mv_ref(cpi, this_mode, mode_ctx); } - if (RDCOST(x->rdmult, x->rddiv, rd_stats->rate, 0) > ref_best_rd && + if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd && #if CONFIG_EXT_INTER mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV #else @@ -7725,13 +9010,16 @@ static int64_t handle_inter_mode( best_bmc_mbmi = *mbmi; rate2_bmc_nocoeff = rd_stats->rate; if (cm->interp_filter == SWITCHABLE) rate2_bmc_nocoeff += rs; -#if CONFIG_MOTION_VAR rate_mv_bmc = rate_mv; -#endif // CONFIG_MOTION_VAR #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT - if (is_comp_pred) { +#if CONFIG_COMPOUND_SINGLEREF + if (is_comp_pred || is_singleref_comp_mode) +#else + if (is_comp_pred) +#endif // CONFIG_COMPOUND_SINGLEREF + { int rate_sum, rs2; int64_t dist_sum; int64_t best_rd_compound = INT64_MAX, best_rd_cur = INT64_MAX; @@ -7741,8 +9029,8 @@ static int64_t handle_inter_mode( int tmp_skip_txfm_sb; int64_t tmp_skip_sse_sb; int compound_type_cost[COMPOUND_TYPES]; - uint8_t pred0[2 * MAX_SB_SQUARE]; - uint8_t pred1[2 * MAX_SB_SQUARE]; + DECLARE_ALIGNED(16, uint8_t, pred0[2 * MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, uint8_t, pred1[2 * MAX_SB_SQUARE]); uint8_t *preds0[1] = { pred0 }; uint8_t *preds1[1] = { pred1 }; int strides[1] = { bw }; @@ -7761,6 +9049,17 @@ static int64_t handle_inter_mode( best_compound_data.seg_mask = tmp_mask_buf; #endif // CONFIG_COMPOUND_SEGMENT +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // TODO(zoeliu): To further check whether the following setups are needed. + // Single ref compound mode: Prepare the 2nd ref frame predictor the same as + // the 1st one. + if (!is_comp_pred && is_singleref_comp_mode) { + xd->block_refs[1] = xd->block_refs[0]; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[1] = xd->plane[i].pre[0]; + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (masked_compound_used) { av1_cost_tokens(compound_type_cost, cm->fc->compound_type_prob[bsize], av1_compound_type_tree); @@ -7773,7 +9072,7 @@ static int64_t handle_inter_mode( for (cur_type = COMPOUND_AVERAGE; cur_type < COMPOUND_TYPES; cur_type++) { if (cur_type != COMPOUND_AVERAGE && !masked_compound_used) break; - if (!is_interinter_compound_used(cur_type, bsize)) break; + if (!is_interinter_compound_used(cur_type, bsize)) continue; tmp_rate_mv = rate_mv; best_rd_cur = INT64_MAX; mbmi->interinter_compound_type = cur_type; @@ -7792,8 +9091,7 @@ static int64_t handle_inter_mode( &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX); if (rd != INT64_MAX) - best_rd_cur = - RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum); + best_rd_cur = RDCOST(x->rdmult, rs2 + rate_mv + rate_sum, dist_sum); best_rd_compound = best_rd_cur; break; #if CONFIG_WEDGE @@ -7923,8 +9221,7 @@ static int64_t handle_inter_mode( av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw); model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb); - rd = - RDCOST(x->rdmult, x->rddiv, tmp_rate_mv + rate_sum + rmode, dist_sum); + rd = RDCOST(x->rdmult, tmp_rate_mv + rate_sum + rmode, dist_sum); if (rd < best_interintra_rd) { best_interintra_rd = rd; best_interintra_mode = mbmi->interintra_mode; @@ -7939,7 +9236,7 @@ static int64_t handle_inter_mode( rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX); if (rd != INT64_MAX) - rd = RDCOST(x->rdmult, x->rddiv, rate_mv + rmode + rate_sum, dist_sum); + rd = RDCOST(x->rdmult, rate_mv + rmode + rate_sum, dist_sum); best_interintra_rd = rd; if (ref_best_rd < INT64_MAX && best_interintra_rd > 2 * ref_best_rd) { @@ -7953,8 +9250,7 @@ static int64_t handle_inter_mode( int_mv tmp_mv; int rwedge = av1_cost_bit(cm->fc->wedge_interintra_prob[bsize], 0); if (rd != INT64_MAX) - rd = RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge + rate_sum, - dist_sum); + rd = RDCOST(x->rdmult, rmode + rate_mv + rwedge + rate_sum, dist_sum); best_interintra_rd_nowedge = best_interintra_rd; // Disable wedge search if source variance is small @@ -7968,7 +9264,7 @@ static int64_t handle_inter_mode( pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_); best_interintra_rd_wedge += - RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge, 0); + RDCOST(x->rdmult, rmode + rate_mv + rwedge, 0); // Refine motion vector. if (have_newmv_in_inter_mode(this_mode)) { // get negative of mask @@ -7977,14 +9273,14 @@ static int64_t handle_inter_mode( tmp_mv.as_int = x->mbmi_ext->ref_mvs[refs[0]][0].as_int; compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row, mi_col, intrapred, mask, bw, - &tmp_rate_mv, 0, 0); + &tmp_rate_mv, 0); mbmi->mv[0].as_int = tmp_mv.as_int; av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst, bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb); - rd = RDCOST(x->rdmult, x->rddiv, - rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum); + rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum, + dist_sum); if (rd >= best_interintra_rd_wedge) { tmp_mv.as_int = cur_mv[0].as_int; tmp_rate_mv = rate_mv; @@ -8000,8 +9296,8 @@ static int64_t handle_inter_mode( estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX); if (rd != INT64_MAX) - rd = RDCOST(x->rdmult, x->rddiv, - rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum); + rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum, + dist_sum); best_interintra_rd_wedge = rd; if (best_interintra_rd_wedge < best_interintra_rd_nowedge) { mbmi->use_wedge_interintra = 1; @@ -8042,7 +9338,7 @@ static int64_t handle_inter_mode( av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, &orig_dst, bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist, &skip_txfm_sb, &skip_sse_sb); - rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist); + rd = RDCOST(x->rdmult, rs + tmp_rate, tmp_dist); } #endif // CONFIG_EXT_INTER @@ -8097,10 +9393,7 @@ static int64_t handle_inter_mode( #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION single_newmv, #if CONFIG_EXT_INTER - rate2_bmc_nocoeff, &best_bmc_mbmi, -#if CONFIG_MOTION_VAR - rate_mv_bmc, -#endif // CONFIG_MOTION_VAR + rate2_bmc_nocoeff, &best_bmc_mbmi, rate_mv_bmc, #endif // CONFIG_EXT_INTER #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION rs, &skip_txfm_sb, &skip_sse_sb, &orig_dst); @@ -8118,11 +9411,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; const TileInfo *tile = &xd->tile; -#if CONFIG_EC_ADAPT FRAME_CONTEXT *const ec_ctx = xd->tile_ctx; -#else - FRAME_CONTEXT *const ec_ctx = cm->fc; -#endif // CONFIG_EC_ADAPT MODE_INFO *const mi = xd->mi[0]; const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE); const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE); @@ -8222,7 +9511,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, #endif mbmi->use_intrabc = 1; mbmi->mode = DC_PRED; - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; mbmi->mv[0].as_mv = dv; #if CONFIG_DUAL_FILTER for (int idx = 0; idx < 4; ++idx) mbmi->interp_filter[idx] = BILINEAR; @@ -8233,12 +9522,12 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, x->skip = 0; av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); + assert(x->mvcost == x->mv_cost_stack[0]); + // TODO(aconverse@google.com): The full motion field defining discount + // in MV_COST_WEIGHT is too large. Explore other values. int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, x->nmvjointcost, - x->mvcost, MV_COST_WEIGHT); - const PREDICTION_MODE A = av1_above_block_mode(mi, xd->above_mi, 0); - const PREDICTION_MODE L = av1_left_block_mode(mi, xd->left_mi, 0); - const int rate_mode = cpi->y_mode_costs[A][L][DC_PRED] + - av1_cost_bit(ec_ctx->intrabc_prob, 1); + x->mvcost, MV_COST_WEIGHT_SUB); + const int rate_mode = av1_cost_bit(ec_ctx->intrabc_prob, 1); RD_STATS rd_stats, rd_stats_uv; av1_subtract_plane(x, bsize, 0); @@ -8267,8 +9556,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, rdc_noskip.rate = rate_mode + rate_mv + rd_stats.rate + av1_cost_bit(skip_prob, 0); rdc_noskip.dist = rd_stats.dist; - rdc_noskip.rdcost = - RDCOST(x->rdmult, x->rddiv, rdc_noskip.rate, rdc_noskip.dist); + rdc_noskip.rdcost = RDCOST(x->rdmult, rdc_noskip.rate, rdc_noskip.dist); if (rdc_noskip.rdcost < best_rd) { best_rd = rdc_noskip.rdcost; best_mbmi = *mbmi; @@ -8282,7 +9570,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, av1_init_rd_stats(&rdc_skip); rdc_skip.rate = rate_mode + rate_mv + av1_cost_bit(skip_prob, 1); rdc_skip.dist = rd_stats.sse; - rdc_skip.rdcost = RDCOST(x->rdmult, x->rddiv, rdc_skip.rate, rdc_skip.dist); + rdc_skip.rdcost = RDCOST(x->rdmult, rdc_skip.rate, rdc_skip.dist); if (rdc_skip.rdcost < best_rd) { best_rd = rdc_skip.rdcost; best_mbmi = *mbmi; @@ -8302,6 +9590,7 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int64_t best_rd) { const AV1_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; struct macroblockd_plane *const pd = xd->plane; int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; int y_skip = 0, uv_skip = 0; @@ -8310,11 +9599,11 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, const int unify_bsize = CONFIG_CB4X4; ctx->skip = 0; - xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME; - xd->mi[0]->mbmi.ref_frame[1] = NONE_FRAME; + mbmi->ref_frame[0] = INTRA_FRAME; + mbmi->ref_frame[1] = NONE_FRAME; #if CONFIG_INTRABC - xd->mi[0]->mbmi.use_intrabc = 0; - xd->mi[0]->mbmi.mv[0].as_int = 0; + mbmi->use_intrabc = 0; + mbmi->mv[0].as_int = 0; #endif // CONFIG_INTRABC const int64_t intra_yrd = @@ -8325,9 +9614,29 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, &dist_y, &y_skip, best_rd); if (intra_yrd < best_rd) { - max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->mbmi.tx_size] - [pd[1].subsampling_x][pd[1].subsampling_y]; - init_sbuv_mode(&xd->mi[0]->mbmi); +#if CONFIG_CFL + // Perform one extra txfm_rd_in_plane() call, this time with the best value + // so we can store reconstructed luma values + RD_STATS this_rd_stats; + +#if CONFIG_CB4X4 + // Don't store the luma value if no chroma is associated. + // Don't worry, we will store this reconstructed luma in the following + // encode dry-run the chroma plane will never know. + x->cfl_store_y = !x->skip_chroma_rd; +#else + x->cfl_store_y = 1; +#endif + + txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y, + mbmi->sb_type, mbmi->tx_size, + cpi->sf.use_fast_coef_costing); + + x->cfl_store_y = 0; +#endif + max_uv_tx_size = uv_txsize_lookup[bsize][mbmi->tx_size][pd[1].subsampling_x] + [pd[1].subsampling_y]; + init_sbuv_mode(mbmi); #if CONFIG_CB4X4 if (!x->skip_chroma_rd) rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv, @@ -8346,8 +9655,8 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, rate_y + rate_uv + av1_cost_bit(av1_get_skip_prob(cm, xd), 0); rd_cost->dist = dist_y + dist_uv; } - rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist); +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 rd_cost->dist_y = dist_y; #endif } else { @@ -8360,7 +9669,7 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, if (rd_pick_intrabc_mode_sb(cpi, x, rd_cost, bsize, best_rd) < best_rd) { ctx->skip = x->skip; // FIXME where is the proper place to set this?! assert(rd_cost->rate != INT_MAX); - rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); + rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist); } #endif if (rd_cost->rate == INT_MAX) return; @@ -8494,7 +9803,8 @@ static void pick_filter_intra_interframe( const AV1_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize, int mi_row, int mi_col, int *rate_uv_intra, int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv, - PREDICTION_MODE *mode_uv, FILTER_INTRA_MODE_INFO *filter_intra_mode_info_uv, + UV_PREDICTION_MODE *mode_uv, + FILTER_INTRA_MODE_INFO *filter_intra_mode_info_uv, #if CONFIG_EXT_INTRA int8_t *uv_angle_delta, #endif // CONFIG_EXT_INTRA @@ -8531,7 +9841,7 @@ static void pick_filter_intra_interframe( // TODO(huisu): use skip_mask for further speedup. (void)skip_mask; mbmi->mode = DC_PRED; - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; mbmi->ref_frame[0] = INTRA_FRAME; mbmi->ref_frame[1] = NONE_FRAME; if (!rd_pick_filter_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y, @@ -8600,7 +9910,8 @@ static void pick_filter_intra_interframe( rate2 += write_uniform_cost( FILTER_INTRA_MODES, mbmi->filter_intra_mode_info.filter_intra_mode[0]); #if CONFIG_EXT_INTRA - if (av1_is_directional_mode(mbmi->uv_mode, bsize)) { + if (av1_is_directional_mode(mbmi->uv_mode, bsize) && + av1_use_angle_delta(bsize)) { rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, MAX_ANGLE_DELTA + mbmi->angle_delta[1]); } @@ -8628,7 +9939,7 @@ static void pick_filter_intra_interframe( } else { rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0); } - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + this_rd = RDCOST(x->rdmult, rate2, distortion2); if (this_rd < *best_intra_rd) { *best_intra_rd = this_rd; @@ -8693,6 +10004,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, unsigned char segment_id = mbmi->segment_id; int comp_pred, i, k; int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME]; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + int_mv frame_comp_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME]; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE]; int_mv single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } }; #if CONFIG_EXT_INTER @@ -8722,7 +10036,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, int best_mode_skippable = 0; int midx, best_mode_index = -1; unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME]; +#if CONFIG_EXT_COMP_REFS + unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME][TOTAL_REFS_PER_FRAME]; +#else unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME]; +#endif // CONFIG_EXT_COMP_REFS aom_prob comp_mode_p; int64_t best_intra_rd = INT64_MAX; unsigned int best_pred_sse = UINT_MAX; @@ -8730,7 +10048,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, int rate_uv_intra[TX_SIZES_ALL], rate_uv_tokenonly[TX_SIZES_ALL]; int64_t dist_uvs[TX_SIZES_ALL]; int skip_uvs[TX_SIZES_ALL]; - PREDICTION_MODE mode_uv[TX_SIZES_ALL]; + UV_PREDICTION_MODE mode_uv[TX_SIZES_ALL]; #if CONFIG_PALETTE PALETTE_MODE_INFO pmi_uv[TX_SIZES_ALL]; #endif // CONFIG_PALETTE @@ -8747,7 +10065,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth); const int *const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]]; int best_skip2 = 0; - uint8_t ref_frame_skip_mask[2] = { 0 }; + uint16_t ref_frame_skip_mask[2] = { 0 }; uint32_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 }; #if CONFIG_EXT_INTER && CONFIG_INTERINTRA MV_REFERENCE_FRAME best_single_inter_ref = LAST_FRAME; @@ -8850,6 +10168,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, *returnrate_nocoef = INT_MAX; #endif // CONFIG_SUPERTX +#if CONFIG_SPEED_REFS + memset(x->mbmi_ext->ref_mvs, 0, sizeof(x->mbmi_ext->ref_mvs)); +#endif // CONFIG_SPEED_REFS + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; x->mbmi_ext->mode_context[ref_frame] = 0; @@ -8873,6 +10195,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, #endif // CONFIG_GLOBAL_MOTION #if CONFIG_EXT_INTER frame_mv[NEW_NEWMV][ref_frame].as_int = INVALID_MV; +#if CONFIG_COMPOUND_SINGLEREF + frame_mv[SR_NEW_NEWMV][ref_frame].as_int = INVALID_MV; + frame_comp_mv[SR_NEW_NEWMV][ref_frame].as_int = INVALID_MV; +#endif // CONFIG_COMPOUND_SINGLEREF #if CONFIG_GLOBAL_MOTION frame_mv[ZERO_ZEROMV][ref_frame].as_int = gm_get_motion_vector(&cm->global_motion[ref_frame], @@ -8934,6 +10260,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, // Skip checking missing references in both single and compound reference // modes. Note that a mode will be skipped iff both reference frames // are masked out. +#if CONFIG_EXT_COMP_REFS + ref_frame_skip_mask[0] |= (1 << ref_frame); + ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; +#else // !CONFIG_EXT_COMP_REFS #if CONFIG_EXT_REFS if (ref_frame == BWDREF_FRAME || ref_frame == ALTREF_FRAME) { ref_frame_skip_mask[0] |= (1 << ref_frame); @@ -8945,6 +10275,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, #if CONFIG_EXT_REFS } #endif // CONFIG_EXT_REFS +#endif // CONFIG_EXT_COMP_REFS } else { for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { // Skip fixed mv modes for poor references @@ -9000,6 +10331,12 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARESTMV); if (frame_mv[NEAR_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int) mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARMV); +#if CONFIG_COMPOUND_SINGLEREF + if (frame_mv[SR_NEAREST_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int || + frame_comp_mv[SR_NEAREST_NEARMV][ALTREF_FRAME].as_int != + zeromv.as_int) + mode_skip_mask[ALTREF_FRAME] |= (1 << SR_NEAREST_NEARMV); +#endif // CONFIG_COMPOUND_SINGLEREF #endif // CONFIG_EXT_INTER } } @@ -9077,7 +10414,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, int compmode_cost = 0; int rate2 = 0, rate_y = 0, rate_uv = 0; int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 int64_t distortion2_y = 0; int64_t total_sse_y = INT64_MAX; #endif @@ -9106,6 +10443,13 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int; frame_mv[this_mode][second_ref_frame].as_int = frame_mv[compound_ref1_mode(this_mode)][second_ref_frame].as_int; +#if CONFIG_COMPOUND_SINGLEREF + } else if (is_inter_singleref_comp_mode(this_mode)) { + frame_mv[this_mode][ref_frame].as_int = + frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int; + frame_comp_mv[this_mode][ref_frame].as_int = + frame_mv[compound_ref1_mode(this_mode)][ref_frame].as_int; +#endif // CONFIG_COMPOUND_SINGLEREF } #endif // CONFIG_EXT_INTER @@ -9154,6 +10498,34 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, (ref_frame_skip_mask[1] & (1 << AOMMAX(0, second_ref_frame)))) continue; +#if CONFIG_EXT_COMP_REFS +// TODO(zoeliu): Following toggle between #if 0/1 and the bug will manifest +// itself. +#if 0 + if (!(cpi->ref_frame_flags & flag_list[ref_frame]) || + (second_ref_frame > INTRA_FRAME && + (!(cpi->ref_frame_flags & flag_list[second_ref_frame])))) + printf("Frame=%d, bsize=%d, (mi_row,mi_col)=(%d,%d), ref_frame=%d, " + "second_ref_frame=%d\n", cm->current_video_frame, bsize, mi_row, + mi_col, ref_frame, second_ref_frame); + + if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue; + if (second_ref_frame > INTRA_FRAME && + (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))) + continue; +#endif // 0 + +#if !USE_UNI_COMP_REFS + // NOTE(zoeliu): Temporarily disable uni-directional comp refs + if (second_ref_frame > INTRA_FRAME) { + if (!((ref_frame < BWDREF_FRAME) ^ (second_ref_frame < BWDREF_FRAME))) + continue; + } + assert(second_ref_frame <= INTRA_FRAME || + ((ref_frame < BWDREF_FRAME) ^ (second_ref_frame < BWDREF_FRAME))); +#endif // !USE_UNI_COMP_REFS +#endif // CONFIG_EXT_COMP_REFS + if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue; // Test best rd so far against threshold for trying this mode. @@ -9239,7 +10611,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, } mbmi->mode = this_mode; - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; mbmi->ref_frame[0] = ref_frame; mbmi->ref_frame[1] = second_ref_frame; #if CONFIG_PALETTE @@ -9267,6 +10639,15 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; } +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // Single ref compound mode + if (!comp_pred && is_inter_singleref_comp_mode(mbmi->mode)) { + xd->block_refs[1] = xd->block_refs[0]; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[1] = xd->plane[i].pre[0]; + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + #if CONFIG_EXT_INTER && CONFIG_INTERINTRA mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1); #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA @@ -9277,7 +10658,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, struct macroblockd_plane *const pd = &xd->plane[1]; #if CONFIG_EXT_INTRA is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize); - if (is_directional_mode) { + if (is_directional_mode && av1_use_angle_delta(bsize)) { int rate_dummy; int64_t model_rd = INT64_MAX; if (!angle_stats_ready) { @@ -9390,10 +10771,13 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, if (av1_is_intra_filter_switchable(p_angle)) rate2 += cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter]; #endif // CONFIG_INTRA_INTERP - rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, - MAX_ANGLE_DELTA + mbmi->angle_delta[0]); + if (av1_use_angle_delta(bsize)) { + rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, + MAX_ANGLE_DELTA + mbmi->angle_delta[0]); + } } - if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) { + if (av1_is_directional_mode(mbmi->uv_mode, bsize) && + av1_use_angle_delta(bsize)) { rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, MAX_ANGLE_DELTA + mbmi->angle_delta[1]); } @@ -9409,7 +10793,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, mbmi->filter_intra_mode_info.filter_intra_mode[0]); } } - if (mbmi->uv_mode == DC_PRED) { + if (mbmi->uv_mode == UV_DC_PRED) { rate2 += av1_cost_bit(cpi->common.fc->filter_intra_probs[1], mbmi->filter_intra_mode_info.use_filter_intra_mode[1]); @@ -9422,7 +10806,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) rate2 += intra_cost_penalty; distortion2 = distortion_y + distortion_uv; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (bsize < BLOCK_8X8) distortion2_y = distortion_y; #endif } else { @@ -9481,6 +10865,27 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv; } } +#if CONFIG_COMPOUND_SINGLEREF + } else if (is_inter_singleref_comp_mode(mbmi->mode)) { + if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) { + // TODO(zoeliu): To further investigate which ref_mv_idx should be + // chosen for the mode of SR_NEAR_NEWMV. + int ref_mv_idx = 0; + // Special case: SR_NEAR_NEWMV mode use + // 1 + mbmi->ref_mv_idx (like NEARMV) instead of + // mbmi->ref_mv_idx (like NEWMV) + if (mbmi->mode == SR_NEAR_NEWMV) ref_mv_idx = 1; + + if (compound_ref0_mode(mbmi->mode) == NEWMV || + compound_ref1_mode(mbmi->mode) == NEWMV) { + int_mv this_mv = + mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv; + clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2, + xd->n8_h << MI_SIZE_LOG2, xd); + mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv; + } + } +#endif // CONFIG_COMPOUND_SINGLEREF } else { #endif // CONFIG_EXT_INTER if (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1) { @@ -9500,6 +10905,19 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, { RD_STATS rd_stats, rd_stats_y, rd_stats_uv; av1_init_rd_stats(&rd_stats); +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + // While av1 master uses rd_stats_y.rate through out the codebase, + // which is set when handle_inter_moden is called, the daala-dist code + // in rd_pick_partition() for cb4x4 and sub8x8 blocks need to know + // .dist_y which comes from rd_stats_y.dist and rd_stats_y.sse. + // The problem is rd_stats_y.dist and rd_stats_y.sse are sometimes not + // initialized when rd_stats.skip = 1, + // then instead rd_stats.dist and rd_stats.sse have the + // combined luma and chroma dist and sse. + // This can be seen inside motion_mode_rd(), which is called by + // handle_inter_mode(). + if (bsize < BLOCK_8X8) av1_init_rd_stats(&rd_stats_y); +#endif rd_stats.rate = rate2; // Point to variables that are maintained between loop iterations @@ -9510,6 +10928,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, #endif // CONFIG_EXT_INTER this_rd = handle_inter_mode(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &disable_skip, frame_mv, +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + frame_comp_mv, +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF mi_row, mi_col, &args, best_rd); rate2 = rd_stats.rate; @@ -9518,23 +10939,39 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, total_sse = rd_stats.sse; rate_y = rd_stats_y.rate; rate_uv = rd_stats_uv.rate; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 - if (bsize < BLOCK_8X8) distortion2_y = rd_stats_y.dist; +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if (bsize < BLOCK_8X8) { + if (rd_stats_y.rate != INT_MAX) { + assert(rd_stats_y.sse < INT64_MAX); + assert(rd_stats_y.dist < INT64_MAX); + } + total_sse_y = rd_stats_y.sse; + distortion2_y = rd_stats_y.dist; + } #endif } // TODO(jingning): This needs some refactoring to improve code quality // and reduce redundant steps. #if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + if ((have_nearmv_in_inter_mode(mbmi->mode) && + mbmi_ext->ref_mv_count[ref_frame_type] > 2) || + ((mbmi->mode == NEWMV || mbmi->mode == SR_NEW_NEWMV || + mbmi->mode == NEW_NEWMV) && + mbmi_ext->ref_mv_count[ref_frame_type] > 1)) +#else // !CONFIG_COMPOUND_SINGLEREF if ((have_nearmv_in_inter_mode(mbmi->mode) && mbmi_ext->ref_mv_count[ref_frame_type] > 2) || ((mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) && - mbmi_ext->ref_mv_count[ref_frame_type] > 1)) { -#else + mbmi_ext->ref_mv_count[ref_frame_type] > 1)) +#endif // CONFIG_COMPOUND_SINGLEREF +#else // !CONFIG_EXT_INTER if ((mbmi->mode == NEARMV && mbmi_ext->ref_mv_count[ref_frame_type] > 2) || - (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1)) { -#endif + (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1)) +#endif // CONFIG_EXT_INTER + { int_mv backup_mv = frame_mv[NEARMV][ref_frame]; MB_MODE_INFO backup_mbmi = *mbmi; int backup_skip = x->skip; @@ -9560,18 +10997,16 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, rate2 += (rate2 < INT_MAX ? cpi->drl_mode_cost0[drl_ctx][0] : 0); if (this_rd < INT64_MAX) { - if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < - RDCOST(x->rdmult, x->rddiv, 0, total_sse)) - tmp_ref_rd = - RDCOST(x->rdmult, x->rddiv, - rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 0), - distortion2); + if (RDCOST(x->rdmult, rate_y + rate_uv, distortion2) < + RDCOST(x->rdmult, 0, total_sse)) + tmp_ref_rd = RDCOST( + x->rdmult, rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 0), + distortion2); else - tmp_ref_rd = - RDCOST(x->rdmult, x->rddiv, - rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 1) - - rate_y - rate_uv, - total_sse); + tmp_ref_rd = RDCOST( + x->rdmult, rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 1) - + rate_y - rate_uv, + total_sse); } #if CONFIG_VAR_TX for (i = 0; i < MAX_MB_PLANE; ++i) @@ -9587,6 +11022,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, RD_STATS tmp_rd_stats, tmp_rd_stats_y, tmp_rd_stats_uv; av1_invalid_rd_stats(&tmp_rd_stats); + x->skip = 0; mbmi->ref_mv_idx = 1 + ref_idx; @@ -9627,6 +11063,34 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, xd->n8_h << MI_SIZE_LOG2, xd); mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv; } +#if CONFIG_COMPOUND_SINGLEREF + } else if (is_inter_singleref_comp_mode(mbmi->mode)) { + int ref_mv_idx = mbmi->ref_mv_idx; + // Special case: SR_NEAR_NEWMV mode use + // 1 + mbmi->ref_mv_idx (like NEARMV) instead of + // mbmi->ref_mv_idx (like NEWMV) + if (mbmi->mode == SR_NEAR_NEWMV) ref_mv_idx = 1 + mbmi->ref_mv_idx; + + // TODO(zoeliu): For the mode of SR_NEAREST_NEWMV, as it only runs + // the "if", not the "else if", + // mbmi_ext->ref_mvs[mbmi->ref_frame[0]] takes the + // value for "NEWMV", instead of "NEARESTMV". + if (compound_ref0_mode(mbmi->mode) == NEWMV || + compound_ref1_mode(mbmi->mode) == NEWMV) { + int_mv this_mv = + mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv; + clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2, + xd->n8_h << MI_SIZE_LOG2, xd); + mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv; + } else if (compound_ref0_mode(mbmi->mode) == NEARESTMV || + compound_ref1_mode(mbmi->mode) == NEARESTMV) { + int_mv this_mv = + mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv; + clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2, + xd->n8_h << MI_SIZE_LOG2, xd); + mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv; + } +#endif // CONFIG_COMPOUND_SINGLEREF } else { #endif // CONFIG_EXT_INTER for (ref = 0; ref < 1 + comp_pred; ++ref) { @@ -9657,16 +11121,28 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, frame_mv[NEARMV][ref_frame] = cur_mv; av1_init_rd_stats(&tmp_rd_stats); - +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + // With the same reason as 'rd_stats_y' passed to above + // handle_inter_mode(), tmp_rd_stats_y.dist and + // tmp_rd_stats_y.sse are sometimes not initialized, esp. when + // tmp_rd_stats.skip = 1 and tmp_rd_stats.dist and .sse + // represent combined luma and chroma .dist and .sse, + // we should initialized tmp_rd_stats_y. + if (bsize < BLOCK_8X8) av1_init_rd_stats(&tmp_rd_stats_y); +#endif // Point to variables that are not maintained between iterations args.single_newmv = dummy_single_newmv; #if CONFIG_EXT_INTER args.single_newmv_rate = dummy_single_newmv_rate; args.modelled_rd = NULL; #endif // CONFIG_EXT_INTER - tmp_alt_rd = handle_inter_mode( - cpi, x, bsize, &tmp_rd_stats, &tmp_rd_stats_y, &tmp_rd_stats_uv, - &dummy_disable_skip, frame_mv, mi_row, mi_col, &args, best_rd); + tmp_alt_rd = handle_inter_mode(cpi, x, bsize, &tmp_rd_stats, + &tmp_rd_stats_y, &tmp_rd_stats_uv, + &dummy_disable_skip, frame_mv, +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + frame_comp_mv, +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + mi_row, mi_col, &args, best_rd); // Prevent pointers from escaping local scope args.single_newmv = NULL; #if CONFIG_EXT_INTER @@ -9696,25 +11172,22 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, if (tmp_alt_rd < INT64_MAX) { #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION - tmp_alt_rd = RDCOST(x->rdmult, x->rddiv, tmp_rd_stats.rate, - tmp_rd_stats.dist); + tmp_alt_rd = + RDCOST(x->rdmult, tmp_rd_stats.rate, tmp_rd_stats.dist); #else - if (RDCOST(x->rdmult, x->rddiv, - tmp_rd_stats_y.rate + tmp_rd_stats_uv.rate, + if (RDCOST(x->rdmult, tmp_rd_stats_y.rate + tmp_rd_stats_uv.rate, tmp_rd_stats.dist) < - RDCOST(x->rdmult, x->rddiv, 0, tmp_rd_stats.sse)) - tmp_alt_rd = - RDCOST(x->rdmult, x->rddiv, - tmp_rd_stats.rate + - av1_cost_bit(av1_get_skip_prob(cm, xd), 0), - tmp_rd_stats.dist); + RDCOST(x->rdmult, 0, tmp_rd_stats.sse)) + tmp_alt_rd = RDCOST( + x->rdmult, tmp_rd_stats.rate + + av1_cost_bit(av1_get_skip_prob(cm, xd), 0), + tmp_rd_stats.dist); else - tmp_alt_rd = - RDCOST(x->rdmult, x->rddiv, - tmp_rd_stats.rate + - av1_cost_bit(av1_get_skip_prob(cm, xd), 1) - - tmp_rd_stats_y.rate - tmp_rd_stats_uv.rate, - tmp_rd_stats.sse); + tmp_alt_rd = RDCOST( + x->rdmult, tmp_rd_stats.rate + + av1_cost_bit(av1_get_skip_prob(cm, xd), 1) - + tmp_rd_stats_y.rate - tmp_rd_stats_uv.rate, + tmp_rd_stats.sse); #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION } @@ -9730,8 +11203,12 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, tmp_ref_rd = tmp_alt_rd; backup_mbmi = *mbmi; backup_skip = x->skip; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (bsize < BLOCK_8X8) { + if (tmp_rd_stats_y.rate != INT_MAX) { + assert(tmp_rd_stats_y.sse < INT64_MAX); + assert(tmp_rd_stats_y.dist < INT64_MAX); + } total_sse_y = tmp_rd_stats_y.sse; distortion2_y = tmp_rd_stats_y.dist; } @@ -9774,19 +11251,33 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, // Estimate the reference frame signaling cost and add it // to the rolling cost variable. if (comp_pred) { +#if CONFIG_EXT_COMP_REFS + rate2 += ref_costs_comp[ref_frame][second_ref_frame]; +#else // !CONFIG_EXT_COMP_REFS rate2 += ref_costs_comp[ref_frame]; #if CONFIG_EXT_REFS rate2 += ref_costs_comp[second_ref_frame]; #endif // CONFIG_EXT_REFS +#endif // CONFIG_EXT_COMP_REFS } else { rate2 += ref_costs_single[ref_frame]; } +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // Add the cost to signal single/comp mode in single ref. + if (!comp_pred && cm->reference_mode != COMPOUND_REFERENCE) { + aom_prob singleref_comp_mode_p = av1_get_inter_mode_prob(cm, xd); + rate2 += av1_cost_bit(singleref_comp_mode_p, + is_inter_singleref_comp_mode(mbmi->mode)); + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION - if (ref_frame == INTRA_FRAME) { + if (ref_frame == INTRA_FRAME) #else - if (!disable_skip) { + if (!disable_skip) #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION + { if (skippable) { // Back out the coefficient coding costs rate2 -= (rate_y + rate_uv); @@ -9795,9 +11286,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, // Cost the skip mb case rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1); } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) { - if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + rate_skip0, - distortion2) < - RDCOST(x->rdmult, x->rddiv, rate_skip1, total_sse)) { + if (RDCOST(x->rdmult, rate_y + rate_uv + rate_skip0, distortion2) < + RDCOST(x->rdmult, rate_skip1, total_sse)) { // Add in the cost of the no skip flag. rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0); } else { @@ -9809,8 +11299,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, this_skip2 = 1; rate_y = 0; rate_uv = 0; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 - if (bsize < BLOCK_8X8) distortion2_y = total_sse_y; +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if (bsize < BLOCK_8X8) { + assert(total_sse_y < INT64_MAX); + distortion2_y = total_sse_y; + } #endif } } else { @@ -9819,11 +11312,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, } // Calculate the final RD estimate for this mode. - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + this_rd = RDCOST(x->rdmult, rate2, distortion2); #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION } else { this_skip2 = mbmi->skip; - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + this_rd = RDCOST(x->rdmult, rate2, distortion2); if (this_skip2) { rate_y = 0; rate_uv = 0; @@ -9831,6 +11324,12 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION } +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if ((bsize < BLOCK_8X8) && (rate2 != INT_MAX)) { + assert(distortion2_y < INT64_MAX); + } +#endif + if (ref_frame == INTRA_FRAME) { // Keep record of best intra rd if (this_rd < best_intra_rd) { @@ -9875,12 +11374,18 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, *returnrate_nocoef -= av1_cost_bit(av1_get_intra_inter_prob(cm, xd), mbmi->ref_frame[0] != INTRA_FRAME); #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION +#if CONFIG_WARPED_MOTION + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); +#endif #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION MODE_INFO *const mi = xd->mi[0]; const MOTION_MODE motion_allowed = motion_mode_allowed( -#if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#if CONFIG_GLOBAL_MOTION 0, xd->global_motion, -#endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#endif // CONFIG_GLOBAL_MOTION +#if CONFIG_WARPED_MOTION + xd, +#endif mi); if (motion_allowed == WARPED_CAUSAL) *returnrate_nocoef -= cpi->motion_mode_cost[bsize][mbmi->motion_mode]; @@ -9901,8 +11406,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, best_rate_y = rate_y + av1_cost_bit(av1_get_skip_prob(cm, xd), this_skip2 || skippable); best_rate_uv = rate_uv; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 - if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2_y; +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if (bsize < BLOCK_8X8) { + assert(distortion2_y < INT64_MAX); + rd_cost->dist_y = distortion2_y; + } #endif #if CONFIG_VAR_TX for (i = 0; i < MAX_MB_PLANE; ++i) @@ -9911,7 +11419,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, #endif // CONFIG_VAR_TX } } - +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if ((bsize < BLOCK_8X8) && (rd_cost->rate != INT_MAX)) { + assert(rd_cost->dist_y < INT64_MAX); + } +#endif /* keep record of best compound/single-only prediction */ if (!disable_skip && ref_frame != INTRA_FRAME) { int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; @@ -9924,8 +11436,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, hybrid_rate = rate2 + compmode_cost; } - single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); - hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); + single_rd = RDCOST(x->rdmult, single_rate, distortion2); + hybrid_rd = RDCOST(x->rdmult, hybrid_rate, distortion2); if (!comp_pred) { if (single_rd < best_pred_rd[SINGLE_REFERENCE]) @@ -9963,6 +11475,15 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; } +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // Single ref compound mode + if (!has_second_ref(mbmi) && is_inter_singleref_comp_mode(mbmi->mode)) { + xd->block_refs[1] = xd->block_refs[0]; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[1] = xd->plane[i].pre[0]; + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (is_inter_mode(mbmi->mode)) { av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); #if CONFIG_MOTION_VAR @@ -9996,9 +11517,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX); } - if (RDCOST(x->rdmult, x->rddiv, rd_stats_y.rate + rd_stats_uv.rate, + if (RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate, (rd_stats_y.dist + rd_stats_uv.dist)) > - RDCOST(x->rdmult, x->rddiv, 0, (rd_stats_y.sse + rd_stats_uv.sse))) { + RDCOST(x->rdmult, 0, (rd_stats_y.sse + rd_stats_uv.sse))) { skip_blk = 1; rd_stats_y.rate = av1_cost_bit(av1_get_skip_prob(cm, xd), 1); rd_stats_uv.rate = 0; @@ -10009,8 +11530,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, rd_stats_y.rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 0); } - if (RDCOST(x->rdmult, x->rddiv, best_rate_y + best_rate_uv, rd_cost->dist) > - RDCOST(x->rdmult, x->rddiv, rd_stats_y.rate + rd_stats_uv.rate, + if (RDCOST(x->rdmult, best_rate_y + best_rate_uv, rd_cost->dist) > + RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate, (rd_stats_y.dist + rd_stats_uv.dist))) { #if CONFIG_VAR_TX int idx, idy; @@ -10031,15 +11552,24 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, rd_cost->rate += (rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv); rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 - if (bsize < BLOCK_8X8) rd_cost->dist_y = rd_stats_y.dist; -#endif - rd_cost->rdcost = - RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); + rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist); best_skip2 = skip_blk; +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if (bsize < BLOCK_8X8) { + assert(rd_cost->rate != INT_MAX); + assert(rd_cost->dist_y < INT64_MAX); + rd_cost->dist_y = rd_stats_y.dist; + } +#endif } } +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if ((bsize < BLOCK_8X8) && (rd_cost->rate != INT_MAX)) { + assert(rd_cost->dist_y < INT64_MAX); + } +#endif + #if CONFIG_PALETTE // Only try palette mode when the best mode so far is an intra mode. if (try_palette && !is_inter_mode(best_mbmode.mode)) { @@ -10058,7 +11588,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, MB_MODE_INFO best_mbmi_palette = best_mbmode; mbmi->mode = DC_PRED; - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; mbmi->ref_frame[0] = INTRA_FRAME; mbmi->ref_frame[1] = NONE_FRAME; rate_overhead_palette = rd_pick_palette_intra_sby( @@ -10119,7 +11649,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, #endif // CONFIG_SUPERTX rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0); } - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + this_rd = RDCOST(x->rdmult, rate2, distortion2); if (this_rd < best_rd) { best_mode_index = 3; mbmi->mv[0].as_int = 0; @@ -10165,10 +11695,14 @@ PALETTE_EXIT: } #endif // CONFIG_FILTER_INTRA - // The inter modes' rate costs are not calculated precisely in some cases. - // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and - // ZEROMV. Here, checks are added for those cases, and the mode decisions - // are corrected. +// The inter modes' rate costs are not calculated precisely in some cases. +// Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and +// ZEROMV. Here, checks are added for those cases, and the mode decisions +// are corrected. +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF +// NOTE: For SR_NEW_NEWMV, no need to check as the two mvs from the same ref +// are surely different from each other. +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF if (best_mbmode.mode == NEWMV #if CONFIG_EXT_INTER || best_mbmode.mode == NEW_NEWMV @@ -10248,8 +11782,9 @@ PALETTE_EXIT: } if (nearestmv[0].as_int == best_mbmode.mv[0].as_int && - nearestmv[1].as_int == best_mbmode.mv[1].as_int) { + nearestmv[1].as_int == best_mbmode.mv[1].as_int) #if CONFIG_EXT_INTER + { best_mbmode.mode = NEAREST_NEARESTMV; } else { int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2) @@ -10274,6 +11809,7 @@ PALETTE_EXIT: best_mbmode.mode = ZERO_ZEROMV; } #else + { best_mbmode.mode = NEARESTMV; } else if (best_mbmode.mv[0].as_int == zeromv[0].as_int && best_mbmode.mv[1].as_int == zeromv[1].as_int) { @@ -10287,11 +11823,18 @@ PALETTE_EXIT: // using a mode which can support ref_mv_idx if (best_mbmode.ref_mv_idx != 0 && #if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + !(best_mbmode.mode == NEWMV || best_mbmode.mode == SR_NEW_NEWMV || + best_mbmode.mode == NEW_NEWMV || + have_nearmv_in_inter_mode(best_mbmode.mode))) +#else // !CONFIG_COMPOUND_SINGLEREF !(best_mbmode.mode == NEWMV || best_mbmode.mode == NEW_NEWMV || - have_nearmv_in_inter_mode(best_mbmode.mode))) { -#else - !(best_mbmode.mode == NEARMV || best_mbmode.mode == NEWMV)) { -#endif + have_nearmv_in_inter_mode(best_mbmode.mode))) +#endif // CONFIG_COMPOUND_SINGLEREF +#else // !CONFIG_EXT_INTER + !(best_mbmode.mode == NEARMV || best_mbmode.mode == NEWMV)) +#endif // CONFIG_EXT_INTER + { best_mbmode.ref_mv_idx = 0; } @@ -10377,11 +11920,12 @@ PALETTE_EXIT: ) { #if CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR // Correct the motion mode for ZEROMV - const MOTION_MODE last_motion_mode_allowed = motion_mode_allowed( -#if SEPARATE_GLOBAL_MOTION - 0, xd->global_motion, -#endif // SEPARATE_GLOBAL_MOTION - xd->mi[0]); + const MOTION_MODE last_motion_mode_allowed = + motion_mode_allowed(0, xd->global_motion, +#if CONFIG_WARPED_MOTION + xd, +#endif + xd->mi[0]); if (mbmi->motion_mode > last_motion_mode_allowed) mbmi->motion_mode = last_motion_mode_allowed; #endif // CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR @@ -10445,7 +11989,11 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi, int i; int64_t best_pred_diff[REFERENCE_MODES]; unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME]; +#if CONFIG_EXT_COMP_REFS + unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME][TOTAL_REFS_PER_FRAME]; +#else unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME]; +#endif // CONFIG_EXT_COMP_REFS aom_prob comp_mode_p; InterpFilter best_filter = SWITCHABLE; int64_t this_rd = INT64_MAX; @@ -10476,7 +12024,7 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi, #endif // CONFIG_FILTER_INTRA mbmi->mode = ZEROMV; mbmi->motion_mode = SIMPLE_TRANSLATION; - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; mbmi->ref_frame[0] = LAST_FRAME; mbmi->ref_frame[1] = NONE_FRAME; #if CONFIG_GLOBAL_MOTION @@ -10501,7 +12049,17 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi, #if CONFIG_WARPED_MOTION if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) { int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; +#if WARPED_MOTION_SORT_SAMPLES + int pts_mv[SAMPLES_ARRAY_SIZE]; + mbmi->num_proj_ref[0] = + findSamples(cm, xd, mi_row, mi_col, pts, pts_inref, pts_mv); + // Rank the samples by motion vector difference + if (mbmi->num_proj_ref[0] > 1) + mbmi->num_proj_ref[0] = sortSamples(pts_mv, &mbmi->mv[0].as_mv, pts, + pts_inref, mbmi->num_proj_ref[0]); +#else mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref); +#endif // WARPED_MOTION_SORT_SAMPLES } #endif @@ -10548,12 +12106,12 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi, // Estimate the reference frame signaling cost and add it // to the rolling cost variable. rate2 += ref_costs_single[LAST_FRAME]; - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + this_rd = RDCOST(x->rdmult, rate2, distortion2); rd_cost->rate = rate2; rd_cost->dist = distortion2; rd_cost->rdcost = this_rd; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2; #endif if (this_rd >= best_rd_so_far) { @@ -10646,7 +12204,8 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, // handle above row if (xd->up_available) { - const int overlap = num_4x4_blocks_high_lookup[bsize] * 2; + const int overlap = + AOMMIN(block_size_high[bsize] >> 1, block_size_high[BLOCK_64X64] >> 1); const int miw = AOMMIN(xd->n8_w, cm->mi_cols - mi_col); const int mi_row_offset = -1; const uint8_t *const mask1d = av1_get_obmc_mask(overlap); @@ -10666,7 +12225,9 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, &xd->mi[mi_col_offset + 1 + mi_row_offset * xd->mi_stride]->mbmi; #endif const BLOCK_SIZE a_bsize = AOMMAX(above_mbmi->sb_type, BLOCK_8X8); - const int mi_step = AOMMIN(xd->n8_w, mi_size_wide[a_bsize]); + const int above_step = + AOMMIN(mi_size_wide[a_bsize], mi_size_wide[BLOCK_64X64]); + const int mi_step = AOMMIN(xd->n8_w, above_step); const int neighbor_bw = mi_step * MI_SIZE; if (is_neighbor_overlappable(above_mbmi)) { @@ -10725,7 +12286,8 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, // handle left column if (xd->left_available) { - const int overlap = num_4x4_blocks_wide_lookup[bsize] * 2; + const int overlap = + AOMMIN(block_size_wide[bsize] >> 1, block_size_wide[BLOCK_64X64] >> 1); const int mih = AOMMIN(xd->n8_h, cm->mi_rows - mi_row); const int mi_col_offset = -1; const uint8_t *const mask1d = av1_get_obmc_mask(overlap); @@ -10746,7 +12308,9 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, &xd->mi[mi_col_offset + (mi_row_offset + 1) * xd->mi_stride]->mbmi; #endif const BLOCK_SIZE l_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8); - const int mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]); + const int left_step = + AOMMIN(mi_size_high[l_bsize], mi_size_high[BLOCK_64X64]); + const int mi_step = AOMMIN(xd->n8_h, left_step); const int neighbor_bh = mi_step * MI_SIZE; if (is_neighbor_overlappable(left_mbmi)) { @@ -10854,8 +12418,23 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x, av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); av1_subtract_plane(x, bsize, 0); +#if CONFIG_VAR_TX + if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) { + select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); + } else { + int idx, idy; + super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); + for (idy = 0; idy < xd->n8_h; ++idy) + for (idx = 0; idx < xd->n8_w; ++idx) + mbmi->inter_tx_size[idy][idx] = mbmi->tx_size; + memset(x->blk_skip[0], rd_stats_y.skip, + sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4); + } + inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX); +#else super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX); +#endif assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX); if (rd_stats_y.skip && rd_stats_uv.skip) { rd_stats_y.rate = rate_skip1; @@ -10863,10 +12442,10 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x, rd_stats_y.dist = rd_stats_y.sse; rd_stats_uv.dist = rd_stats_uv.sse; skip_blk = 0; - } else if (RDCOST(x->rdmult, x->rddiv, + } else if (RDCOST(x->rdmult, (rd_stats_y.rate + rd_stats_uv.rate + rate_skip0), (rd_stats_y.dist + rd_stats_uv.dist)) > - RDCOST(x->rdmult, x->rddiv, rate_skip1, + RDCOST(x->rdmult, rate_skip1, (rd_stats_y.sse + rd_stats_uv.sse))) { rd_stats_y.rate = rate_skip1; rd_stats_uv.rate = 0; @@ -10879,18 +12458,33 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x, } backup_skip = skip_blk; backup_mbmi = *mbmi; - rd_causal = RDCOST(x->rdmult, x->rddiv, (rd_stats_y.rate + rd_stats_uv.rate), + rd_causal = RDCOST(x->rdmult, (rd_stats_y.rate + rd_stats_uv.rate), (rd_stats_y.dist + rd_stats_uv.dist)); - rd_causal += RDCOST(x->rdmult, x->rddiv, - av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 0), 0); + rd_causal += + RDCOST(x->rdmult, av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 0), 0); // Check non-causal mode mbmi->motion_mode = OBMC_CAUSAL; av1_build_ncobmc_inter_predictors_sb(cm, xd, mi_row, mi_col); av1_subtract_plane(x, bsize, 0); +#if CONFIG_VAR_TX + if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) { + select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); + } else { + int idx, idy; + super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); + for (idy = 0; idy < xd->n8_h; ++idy) + for (idx = 0; idx < xd->n8_w; ++idx) + mbmi->inter_tx_size[idy][idx] = mbmi->tx_size; + memset(x->blk_skip[0], rd_stats_y.skip, + sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4); + } + inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX); +#else super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX); +#endif assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX); if (rd_stats_y.skip && rd_stats_uv.skip) { rd_stats_y.rate = rate_skip1; @@ -10898,10 +12492,10 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x, rd_stats_y.dist = rd_stats_y.sse; rd_stats_uv.dist = rd_stats_uv.sse; skip_blk = 0; - } else if (RDCOST(x->rdmult, x->rddiv, + } else if (RDCOST(x->rdmult, (rd_stats_y.rate + rd_stats_uv.rate + rate_skip0), (rd_stats_y.dist + rd_stats_uv.dist)) > - RDCOST(x->rdmult, x->rddiv, rate_skip1, + RDCOST(x->rdmult, rate_skip1, (rd_stats_y.sse + rd_stats_uv.sse))) { rd_stats_y.rate = rate_skip1; rd_stats_uv.rate = 0; @@ -10914,9 +12508,8 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x, } if (rd_causal > - RDCOST(x->rdmult, x->rddiv, - rd_stats_y.rate + rd_stats_uv.rate + - av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 1), + RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate + + av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 1), (rd_stats_y.dist + rd_stats_uv.dist))) { x->skip = skip_blk; } else { diff --git a/third_party/aom/av1/encoder/rdopt.h b/third_party/aom/av1/encoder/rdopt.h index e5d778fe5..43a6a3794 100644 --- a/third_party/aom/av1/encoder/rdopt.h +++ b/third_party/aom/av1/encoder/rdopt.h @@ -57,22 +57,33 @@ typedef enum OUTPUT_STATUS { OUTPUT_HAS_DECODED_PIXELS } OUTPUT_STATUS; +#if CONFIG_PALETTE || CONFIG_INTRABC +// Returns the number of colors in 'src'. +int av1_count_colors(const uint8_t *src, int stride, int rows, int cols); +#if CONFIG_HIGHBITDEPTH +// Same as av1_count_colors(), but for high-bitdepth mode. +int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols, + int bit_depth); +#endif // CONFIG_HIGHBITDEPTH +#endif // CONFIG_PALETTE || CONFIG_INTRABC + void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col, TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse, OUTPUT_STATUS output_status); -#if CONFIG_DAALA_DIST -int64_t av1_daala_dist(const uint8_t *src, int src_stride, const uint8_t *dst, - int dst_stride, int bsw, int bsh, int qm, - int use_activity_masking, int qindex); +#if CONFIG_DIST_8X8 +int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCKD *xd, + const uint8_t *src, int src_stride, const uint8_t *dst, + int dst_stride, const BLOCK_SIZE tx_bsize, int bsw, + int bsh, int visible_w, int visible_h, int qindex); #endif #if !CONFIG_PVQ || CONFIG_VAR_TX int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, - int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order, - const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l, - int use_fast_coef_costing); + int blk_row, int blk_col, int block, TX_SIZE tx_size, + const SCAN_ORDER *scan_order, const ENTROPY_CONTEXT *a, + const ENTROPY_CONTEXT *l, int use_fast_coef_costing); #endif void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x, struct RD_STATS *rd_cost, BLOCK_SIZE bsize, diff --git a/third_party/aom/av1/encoder/segmentation.c b/third_party/aom/av1/encoder/segmentation.c index b581a61d0..b61df43fa 100644 --- a/third_party/aom/av1/encoder/segmentation.c +++ b/third_party/aom/av1/encoder/segmentation.c @@ -299,12 +299,8 @@ void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd) { int no_pred_cost; int t_pred_cost = INT_MAX; - int i, tile_col, tile_row, mi_row, mi_col; -#if CONFIG_TILE_GROUPS + int tile_col, tile_row, mi_row, mi_col; const int probwt = cm->num_tg; -#else - const int probwt = 1; -#endif unsigned(*temporal_predictor_count)[2] = cm->counts.seg.pred; unsigned *no_pred_segcounts = cm->counts.seg.tree_total; @@ -312,7 +308,9 @@ void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd) { aom_prob no_pred_tree[SEG_TREE_PROBS]; aom_prob t_pred_tree[SEG_TREE_PROBS]; +#if !CONFIG_NEW_MULTISYMBOL aom_prob t_nopred_prob[PREDICTION_PROBS]; +#endif (void)xd; @@ -327,7 +325,7 @@ void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd) { for (tile_col = 0; tile_col < cm->tile_cols; tile_col++) { MODE_INFO **mi_ptr; av1_tile_set_col(&tile_info, cm, tile_col); -#if CONFIG_TILE_GROUPS && CONFIG_DEPENDENT_HORZTILES +#if CONFIG_DEPENDENT_HORZTILES av1_tile_set_tg_boundary(&tile_info, cm, tile_row, tile_col); #endif mi_ptr = cm->mi_grid_visible + tile_info.mi_row_start * cm->mi_stride + @@ -357,8 +355,9 @@ void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd) { calc_segtree_probs(t_unpred_seg_counts, t_pred_tree, segp->tree_probs, probwt); t_pred_cost = cost_segmap(t_unpred_seg_counts, t_pred_tree); - +#if !CONFIG_NEW_MULTISYMBOL // Add in the cost of the signaling for each prediction context. + int i; for (i = 0; i < PREDICTION_PROBS; i++) { const int count0 = temporal_predictor_count[i][0]; const int count1 = temporal_predictor_count[i][1]; @@ -372,6 +371,7 @@ void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd) { t_pred_cost += count0 * av1_cost_zero(t_nopred_prob[i]) + count1 * av1_cost_one(t_nopred_prob[i]); } +#endif } // Now choose which coding method to use. diff --git a/third_party/aom/av1/encoder/speed_features.c b/third_party/aom/av1/encoder/speed_features.c index e2275a54f..eeab33a95 100644 --- a/third_party/aom/av1/encoder/speed_features.c +++ b/third_party/aom/av1/encoder/speed_features.c @@ -35,7 +35,7 @@ static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = { // TODO(aconverse@google.com): These settings are pretty relaxed, tune them for // each speed setting static MESH_PATTERN intrabc_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = { - { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } }, + { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } }, { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } }, { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } }, { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } }, @@ -171,12 +171,24 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi, sf->recode_loop = ALLOW_RECODE_KFARFGF; #if CONFIG_TX64X64 sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V; +#if CONFIG_CFL + sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V; +#else sf->intra_uv_mode_mask[TX_64X64] = INTRA_DC_H_V; +#endif // CONFIG_CFL #endif // CONFIG_TX64X64 sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; +#if CONFIG_CFL + sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V; +#else sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; +#endif sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; +#if CONFIG_CFL + sf->intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V; +#else sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; +#endif sf->tx_size_search_breakout = 1; sf->partition_search_breakout_rate_thr = 80; @@ -199,7 +211,7 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi, : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR; sf->disable_filter_search_var_thresh = 100; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + sf->comp_inter_joint_search_thresh = BLOCK_SIZES_ALL; sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->allow_partition_search_skip = 1; sf->use_upsampled_references = 0; @@ -227,10 +239,18 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi, sf->mode_skip_start = 6; #if CONFIG_TX64X64 sf->intra_y_mode_mask[TX_64X64] = INTRA_DC; +#if CONFIG_CFL + sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC; +#else sf->intra_uv_mode_mask[TX_64X64] = INTRA_DC; +#endif // CONFIG_CFL #endif // CONFIG_TX64X64 sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; +#if CONFIG_CFL + sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC; +#else sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC; +#endif // CONFIG_CFL sf->adaptive_interp_filter_search = 1; } @@ -255,7 +275,11 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi, sf->disable_filter_search_var_thresh = 500; for (i = 0; i < TX_SIZES; ++i) { sf->intra_y_mode_mask[i] = INTRA_DC; +#if CONFIG_CFL + sf->intra_uv_mode_mask[i] = UV_INTRA_DC; +#else sf->intra_uv_mode_mask[i] = INTRA_DC; +#endif // CONFIG_CFL } sf->partition_search_breakout_rate_thr = 500; sf->mv.reduce_first_step_size = 1; @@ -405,7 +429,11 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) { for (i = 0; i < TX_SIZES; i++) { sf->intra_y_mode_mask[i] = INTRA_ALL; +#if CONFIG_CFL + sf->intra_uv_mode_mask[i] = UV_INTRA_ALL; +#else sf->intra_uv_mode_mask[i] = INTRA_ALL; +#endif // CONFIG_CFL } sf->use_rd_breakout = 0; sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE; @@ -413,7 +441,7 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) { sf->use_fast_coef_costing = 0; sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set sf->schedule_mode_search = 0; - for (i = 0; i < BLOCK_SIZES; ++i) sf->inter_mode_mask[i] = INTER_ALL; + for (i = 0; i < BLOCK_SIZES_ALL; ++i) sf->inter_mode_mask[i] = INTER_ALL; sf->max_intra_bsize = BLOCK_LARGEST; sf->reuse_inter_pred_sby = 0; // This setting only takes effect when partition_search_type is set diff --git a/third_party/aom/av1/encoder/speed_features.h b/third_party/aom/av1/encoder/speed_features.h index 5710d77c7..2c89f4e5c 100644 --- a/third_party/aom/av1/encoder/speed_features.h +++ b/third_party/aom/av1/encoder/speed_features.h @@ -29,6 +29,24 @@ enum { #endif // CONFIG_SMOOTH_HV #endif // CONFIG_ALT_INTRA (1 << TM_PRED), +#if CONFIG_CFL + UV_INTRA_ALL = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED) | + (1 << UV_D45_PRED) | (1 << UV_D135_PRED) | + (1 << UV_D117_PRED) | (1 << UV_D153_PRED) | + (1 << UV_D207_PRED) | (1 << UV_D63_PRED) | +#if CONFIG_ALT_INTRA + (1 << UV_SMOOTH_PRED) | +#if CONFIG_SMOOTH_HV + (1 << UV_SMOOTH_V_PRED) | (1 << UV_SMOOTH_H_PRED) | +#endif // CONFIG_SMOOTH_HV +#endif // CONFIG_ALT_INTRA + (1 << UV_TM_PRED), + UV_INTRA_DC = (1 << UV_DC_PRED), + UV_INTRA_DC_TM = (1 << UV_DC_PRED) | (1 << UV_TM_PRED), + UV_INTRA_DC_H_V = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED), + UV_INTRA_DC_TM_H_V = (1 << UV_DC_PRED) | (1 << UV_TM_PRED) | + (1 << UV_V_PRED) | (1 << UV_H_PRED), +#endif // CONFIG_CFL INTRA_DC = (1 << DC_PRED), INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED), INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED), @@ -38,6 +56,11 @@ enum { #if CONFIG_EXT_INTER enum { +#if CONFIG_COMPOUND_SINGLEREF +// TODO(zoeliu): To further consider following single ref comp modes: +// SR_NEAREST_NEARMV, SR_NEAREST_NEWMV, SR_NEAR_NEWMV, +// SR_ZERO_NEWMV, and SR_NEW_NEWMV. +#endif // CONFIG_COMPOUND_SINGLEREF INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV) | (1 << NEAREST_NEARESTMV) | (1 << NEAR_NEARMV) | (1 << NEW_NEWMV) | (1 << NEAREST_NEWMV) | (1 << NEAR_NEWMV) | (1 << NEW_NEARMV) | @@ -67,7 +90,7 @@ enum { (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) | (1 << NEAR_NEARMV), }; -#else +#else // !CONFIG_EXT_INTER enum { INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV), INTER_NEAREST = (1 << NEARESTMV), @@ -399,10 +422,6 @@ typedef struct SPEED_FEATURES { int intra_y_mode_mask[TX_SIZES]; int intra_uv_mode_mask[TX_SIZES]; - // These bit masks allow you to enable or disable intra modes for each - // prediction block size separately. - int intra_y_mode_bsize_mask[BLOCK_SIZES]; - // This variable enables an early break out of mode testing if the model for // rd built from the prediction signal indicates a value that's much // higher than the best rd we've seen so far. @@ -417,7 +436,7 @@ typedef struct SPEED_FEATURES { // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV // modes are used in order from LSB to MSB for each BLOCK_SIZE. - int inter_mode_mask[BLOCK_SIZES]; + int inter_mode_mask[BLOCK_SIZES_ALL]; // This feature controls whether we do the expensive context update and // calculation in the rd coefficient costing loop. diff --git a/third_party/aom/av1/encoder/temporal_filter.c b/third_party/aom/av1/encoder/temporal_filter.c index 1ed1ebdb2..604647922 100644 --- a/third_party/aom/av1/encoder/temporal_filter.c +++ b/third_party/aom/av1/encoder/temporal_filter.c @@ -41,7 +41,7 @@ static void temporal_filter_predictors_mb_c( enum mv_precision mv_precision_uv; int uv_stride; // TODO(angiebird): change plane setting accordingly - ConvolveParams conv_params = get_conv_params(which_mv, 0); + ConvolveParams conv_params = get_conv_params(which_mv, which_mv, 0); #if USE_TEMPORALFILTER_12TAP #if CONFIG_DUAL_FILTER @@ -413,10 +413,10 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi, mbd->mi[0]->bmi[0].as_mv[0].as_mv.col, predictor, scale, mb_col * 16, mb_row * 16); +// Apply the filter (YUV) #if CONFIG_HIGHBITDEPTH if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int adj_strength = strength + 2 * (mbd->bd - 8); - // Apply the filter (YUV) av1_highbd_temporal_filter_apply( f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16, adj_strength, filter_weight, accumulator, count); @@ -429,7 +429,7 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi, mb_uv_width, mb_uv_height, adj_strength, filter_weight, accumulator + 512, count + 512); } else { - // Apply the filter (YUV) +#endif // CONFIG_HIGHBITDEPTH av1_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16, strength, filter_weight, accumulator, count); @@ -441,29 +441,17 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi, f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512, mb_uv_width, mb_uv_height, strength, filter_weight, accumulator + 512, count + 512); +#if CONFIG_HIGHBITDEPTH } -#else - // Apply the filter (YUV) - av1_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride, - predictor, 16, 16, strength, - filter_weight, accumulator, count); - av1_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride, - predictor + 256, mb_uv_width, - mb_uv_height, strength, filter_weight, - accumulator + 256, count + 256); - av1_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride, - predictor + 512, mb_uv_width, - mb_uv_height, strength, filter_weight, - accumulator + 512, count + 512); #endif // CONFIG_HIGHBITDEPTH } } +// Normalize filter output to produce AltRef frame #if CONFIG_HIGHBITDEPTH if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { uint16_t *dst1_16; uint16_t *dst2_16; - // Normalize filter output to produce AltRef frame dst1 = cpi->alt_ref_buffer.y_buffer; dst1_16 = CONVERT_TO_SHORTPTR(dst1); stride = cpi->alt_ref_buffer.y_stride; @@ -505,7 +493,7 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi, byte += stride - mb_uv_width; } } else { - // Normalize filter output to produce AltRef frame +#endif // CONFIG_HIGHBITDEPTH dst1 = cpi->alt_ref_buffer.y_buffer; stride = cpi->alt_ref_buffer.y_stride; byte = mb_y_offset; @@ -541,43 +529,7 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi, } byte += stride - mb_uv_width; } - } -#else - // Normalize filter output to produce AltRef frame - dst1 = cpi->alt_ref_buffer.y_buffer; - stride = cpi->alt_ref_buffer.y_stride; - byte = mb_y_offset; - for (i = 0, k = 0; i < 16; i++) { - for (j = 0; j < 16; j++, k++) { - dst1[byte] = - (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); - - // move to next pixel - byte++; - } - byte += stride - 16; - } - - dst1 = cpi->alt_ref_buffer.u_buffer; - dst2 = cpi->alt_ref_buffer.v_buffer; - stride = cpi->alt_ref_buffer.uv_stride; - byte = mb_uv_offset; - for (i = 0, k = 256; i < mb_uv_height; i++) { - for (j = 0; j < mb_uv_width; j++, k++) { - int m = k + 256; - - // U - dst1[byte] = - (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); - - // V - dst2[byte] = - (uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]); - - // move to next pixel - byte++; - } - byte += stride - mb_uv_width; +#if CONFIG_HIGHBITDEPTH } #endif // CONFIG_HIGHBITDEPTH mb_y_offset += 16; @@ -650,7 +602,11 @@ static void adjust_arnr_filter(AV1_COMP *cpi, int distance, int group_boost, *arnr_strength = strength; } -void av1_temporal_filter(AV1_COMP *cpi, int distance) { +void av1_temporal_filter(AV1_COMP *cpi, +#if CONFIG_BGSPRITE + YV12_BUFFER_CONFIG *bg, +#endif // CONFIG_BGSPRITE + int distance) { RATE_CONTROL *const rc = &cpi->rc; int frame; int frames_to_blur; @@ -692,9 +648,18 @@ void av1_temporal_filter(AV1_COMP *cpi, int distance) { // Setup frame pointers, NULL indicates frame not included in filter. for (frame = 0; frame < frames_to_blur; ++frame) { const int which_buffer = start_frame - frame; - struct lookahead_entry *buf = - av1_lookahead_peek(cpi->lookahead, which_buffer); - frames[frames_to_blur - 1 - frame] = &buf->img; +#if CONFIG_BGSPRITE + if (frame == frames_to_blur_backward && bg != NULL) { + // Insert bg into frames at ARF index. + frames[frames_to_blur - 1 - frame] = bg; + } else { +#endif // CONFIG_BGSPRITE + struct lookahead_entry *buf = + av1_lookahead_peek(cpi->lookahead, which_buffer); + frames[frames_to_blur - 1 - frame] = &buf->img; +#if CONFIG_BGSPRITE + } +#endif // CONFIG_BGSPRITE } if (frames_to_blur > 0) { diff --git a/third_party/aom/av1/encoder/temporal_filter.h b/third_party/aom/av1/encoder/temporal_filter.h index bc0863a63..ebb24703f 100644 --- a/third_party/aom/av1/encoder/temporal_filter.h +++ b/third_party/aom/av1/encoder/temporal_filter.h @@ -16,7 +16,11 @@ extern "C" { #endif -void av1_temporal_filter(AV1_COMP *cpi, int distance); +void av1_temporal_filter(AV1_COMP *cpi, +#if CONFIG_BGSPRITE + YV12_BUFFER_CONFIG *bg, +#endif // CONFIG_BGSPRITE + int distance); #ifdef __cplusplus } // extern "C" diff --git a/third_party/aom/av1/encoder/tokenize.c b/third_party/aom/av1/encoder/tokenize.c index 18d2cd958..b9db891b3 100644 --- a/third_party/aom/av1/encoder/tokenize.c +++ b/third_party/aom/av1/encoder/tokenize.c @@ -277,12 +277,12 @@ static void cost_coeffs_b(int plane, int block, int blk_row, int blk_col, struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; const PLANE_TYPE type = pd->plane_type; - const int ref = is_inter_block(mbmi); - const TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size); - const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, ref); - const int rate = av1_cost_coeffs(cpi, x, plane, block, tx_size, scan_order, - pd->above_context + blk_col, - pd->left_context + blk_row, 0); + const TX_TYPE tx_type = + av1_get_tx_type(type, xd, blk_row, blk_col, block, tx_size); + const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi); + const int rate = av1_cost_coeffs( + cpi, x, plane, blk_row, blk_col, block, tx_size, scan_order, + pd->above_context + blk_col, pd->left_context + blk_row, 0); args->this_rate += rate; (void)plane_bsize; av1_set_contexts(xd, pd, plane, tx_size, p->eobs[block] > 0, blk_col, @@ -323,42 +323,48 @@ void av1_tokenize_palette_sb(const AV1_COMP *cpi, const struct ThreadData *const td, int plane, TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize, int *rate) { + assert(plane == 0 || plane == 1); const MACROBLOCK *const x = &td->mb; const MACROBLOCKD *const xd = &x->e_mbd; const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const uint8_t *const color_map = xd->plane[plane].color_index_map; const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; - const int n = pmi->palette_size[plane]; - int i, j; - int this_rate = 0; - uint8_t color_order[PALETTE_MAX_SIZE]; - const aom_prob( - *const probs)[PALETTE_COLOR_INDEX_CONTEXTS][PALETTE_COLORS - 1] = - plane == 0 ? av1_default_palette_y_color_index_prob - : av1_default_palette_uv_color_index_prob; + aom_cdf_prob( + *palette_cdf)[PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] = + plane ? xd->tile_ctx->palette_uv_color_index_cdf + : xd->tile_ctx->palette_y_color_index_cdf; int plane_block_width, rows, cols; av1_get_block_dimensions(bsize, plane, xd, &plane_block_width, NULL, &rows, &cols); - assert(plane == 0 || plane == 1); + // The first color index does not use context or entropy. + (*t)->token = color_map[0]; + (*t)->palette_cdf = NULL; + (*t)->skip_eob_node = 0; + ++(*t); + + const int n = pmi->palette_size[plane]; + const int calc_rate = rate && dry_run == DRY_RUN_COSTCOEFFS; + int this_rate = 0; + uint8_t color_order[PALETTE_MAX_SIZE]; #if CONFIG_PALETTE_THROUGHPUT - int k; - for (k = 1; k < rows + cols - 1; ++k) { - for (j = AOMMIN(k, cols - 1); j >= AOMMAX(0, k - rows + 1); --j) { - i = k - j; + for (int k = 1; k < rows + cols - 1; ++k) { + for (int j = AOMMIN(k, cols - 1); j >= AOMMAX(0, k - rows + 1); --j) { + int i = k - j; #else - for (i = 0; i < rows; ++i) { - for (j = (i == 0 ? 1 : 0); j < cols; ++j) { + for (int i = 0; i < rows; ++i) { + for (int j = (i == 0 ? 1 : 0); j < cols; ++j) { #endif // CONFIG_PALETTE_THROUGHPUT int color_new_idx; const int color_ctx = av1_get_palette_color_index_context( color_map, plane_block_width, i, j, n, color_order, &color_new_idx); assert(color_new_idx >= 0 && color_new_idx < n); - if (dry_run == DRY_RUN_COSTCOEFFS) + if (calc_rate) { this_rate += cpi->palette_y_color_cost[n - PALETTE_MIN_SIZE][color_ctx] [color_new_idx]; + } (*t)->token = color_new_idx; - (*t)->context_tree = probs[n - PALETTE_MIN_SIZE][color_ctx]; + (*t)->palette_cdf = palette_cdf[n - PALETTE_MIN_SIZE][color_ctx]; (*t)->skip_eob_node = 0; ++(*t); } @@ -434,17 +440,13 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col, const int segment_id = mbmi->segment_id; #endif // CONFIG_SUEPRTX const int16_t *scan, *nb; - const TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size); - const SCAN_ORDER *const scan_order = - get_scan(cm, tx_size, tx_type, is_inter_block(mbmi)); + const TX_TYPE tx_type = + av1_get_tx_type(type, xd, blk_row, blk_col, block, tx_size); + const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi); const int ref = is_inter_block(mbmi); unsigned int(*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] = td->rd_counts.coef_counts[txsize_sqr_map[tx_size]][type][ref]; -#if CONFIG_EC_ADAPT FRAME_CONTEXT *ec_ctx = xd->tile_ctx; -#else - FRAME_CONTEXT *ec_ctx = cpi->common.fc; -#endif aom_cdf_prob( *const coef_head_cdfs)[COEFF_CONTEXTS][CDF_SIZE(ENTROPY_TOKENS)] = ec_ctx->coef_head_cdfs[txsize_sqr_map[tx_size]][type][ref]; @@ -595,16 +597,31 @@ void tokenize_vartx(ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run, cost_coeffs_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg); #endif } else { +#if CONFIG_RECT_TX_EXT + int is_qttx = plane_tx_size == quarter_txsize_lookup[plane_bsize]; + const TX_SIZE sub_txs = is_qttx ? plane_tx_size : sub_tx_size_map[tx_size]; +#else // Half the block size in transform block unit. const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; +#endif const int bsl = tx_size_wide_unit[sub_txs]; int i; assert(bsl > 0); for (i = 0; i < 4; ++i) { +#if CONFIG_RECT_TX_EXT + int is_wide_tx = tx_size_wide_unit[sub_txs] > tx_size_high_unit[sub_txs]; + const int offsetr = + is_qttx ? (is_wide_tx ? i * tx_size_high_unit[sub_txs] : 0) + : blk_row + ((i >> 1) * bsl); + const int offsetc = + is_qttx ? (is_wide_tx ? 0 : i * tx_size_wide_unit[sub_txs]) + : blk_col + ((i & 0x01) * bsl); +#else const int offsetr = blk_row + ((i >> 1) * bsl); const int offsetc = blk_col + ((i & 0x01) * bsl); +#endif int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs]; @@ -666,7 +683,7 @@ void av1_tokenize_sb_vartx(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t, } #endif const struct macroblockd_plane *const pd = &xd->plane[plane]; -#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2 +#if CONFIG_CHROMA_SUB8X8 const BLOCK_SIZE plane_bsize = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd)); #else @@ -681,14 +698,30 @@ void av1_tokenize_sb_vartx(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t, int idx, idy; int block = 0; int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size]; - for (idy = 0; idy < mi_height; idy += bh) { - for (idx = 0; idx < mi_width; idx += bw) { - tokenize_vartx(td, t, dry_run, max_tx_size, plane_bsize, idy, idx, - block, plane, &arg); - block += step; + + const BLOCK_SIZE max_unit_bsize = get_plane_block_size(BLOCK_64X64, pd); + int mu_blocks_wide = + block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0]; + int mu_blocks_high = + block_size_high[max_unit_bsize] >> tx_size_high_log2[0]; + + mu_blocks_wide = AOMMIN(mi_width, mu_blocks_wide); + mu_blocks_high = AOMMIN(mi_height, mu_blocks_high); + + for (idy = 0; idy < mi_height; idy += mu_blocks_high) { + for (idx = 0; idx < mi_width; idx += mu_blocks_wide) { + int blk_row, blk_col; + const int unit_height = AOMMIN(mu_blocks_high + idy, mi_height); + const int unit_width = AOMMIN(mu_blocks_wide + idx, mi_width); + for (blk_row = idy; blk_row < unit_height; blk_row += bh) { + for (blk_col = idx; blk_col < unit_width; blk_col += bw) { + tokenize_vartx(td, t, dry_run, max_tx_size, plane_bsize, blk_row, + blk_col, block, plane, &arg); + block += step; + } + } } } - #if !CONFIG_LV_MAP if (!dry_run) { (*t)->token = EOSB_TOKEN; diff --git a/third_party/aom/av1/encoder/tokenize.h b/third_party/aom/av1/encoder/tokenize.h index cbfa3cd91..73f0305fa 100644 --- a/third_party/aom/av1/encoder/tokenize.h +++ b/third_party/aom/av1/encoder/tokenize.h @@ -37,6 +37,9 @@ typedef struct { typedef struct { aom_cdf_prob (*tail_cdf)[CDF_SIZE(ENTROPY_TOKENS)]; aom_cdf_prob (*head_cdf)[CDF_SIZE(ENTROPY_TOKENS)]; +#if CONFIG_PALETTE + aom_cdf_prob *palette_cdf; +#endif // CONFIG_PALETTE int eob_val; int first_val; const aom_prob *context_tree; diff --git a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c b/third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c new file mode 100644 index 000000000..c8d4ccb70 --- /dev/null +++ b/third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <immintrin.h> + +#include "./av1_rtcd.h" +#include "aom/aom_integer.h" +#include "aom_dsp/aom_dsp_common.h" + +static INLINE void init_one_qp(const __m128i *p, __m256i *qp) { + const __m128i zero = _mm_setzero_si128(); + const __m128i dc = _mm_unpacklo_epi16(*p, zero); + const __m128i ac = _mm_unpackhi_epi16(*p, zero); + *qp = _mm256_insertf128_si256(_mm256_castsi128_si256(dc), ac, 1); +} + +static INLINE void update_qp(__m256i *qp) { + qp[0] = _mm256_permute2x128_si256(qp[0], qp[0], 0x11); + qp[1] = _mm256_permute2x128_si256(qp[1], qp[1], 0x11); + qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11); +} + +static INLINE void init_qp(const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *dequant_ptr, int log_scale, + __m256i *qp) { + __m128i round = _mm_loadu_si128((const __m128i *)round_ptr); + round = _mm_srai_epi16(round, log_scale); + const __m128i quant = _mm_loadu_si128((const __m128i *)quant_ptr); + const __m128i dequant = _mm_loadu_si128((const __m128i *)dequant_ptr); + + init_one_qp(&round, &qp[0]); + init_one_qp(&quant, &qp[1]); + init_one_qp(&dequant, &qp[2]); +} + +static INLINE void quantize(const __m256i *qp, __m256i *c, + const int16_t *iscan_ptr, int log_scale, + tran_low_t *qcoeff, tran_low_t *dqcoeff, + __m256i *eob) { + const __m256i abs = _mm256_abs_epi32(*c); + __m256i q = _mm256_add_epi32(abs, qp[0]); + + __m256i q_lo = _mm256_mul_epi32(q, qp[1]); + __m256i q_hi = _mm256_srli_epi64(q, 32); + const __m256i qp_hi = _mm256_srli_epi64(qp[1], 32); + q_hi = _mm256_mul_epi32(q_hi, qp_hi); + q_lo = _mm256_srli_epi64(q_lo, 16 - log_scale); + q_hi = _mm256_srli_epi64(q_hi, 16 - log_scale); + q_hi = _mm256_slli_epi64(q_hi, 32); + q = _mm256_or_si256(q_lo, q_hi); + + __m256i dq = _mm256_mullo_epi32(q, qp[2]); + dq = _mm256_srai_epi32(dq, log_scale); + q = _mm256_sign_epi32(q, *c); + dq = _mm256_sign_epi32(dq, *c); + + _mm256_storeu_si256((__m256i *)qcoeff, q); + _mm256_storeu_si256((__m256i *)dqcoeff, dq); + + const __m128i isc = _mm_loadu_si128((const __m128i *)iscan_ptr); + const __m128i zr = _mm_setzero_si128(); + const __m128i lo = _mm_unpacklo_epi16(isc, zr); + const __m128i hi = _mm_unpackhi_epi16(isc, zr); + const __m256i iscan = + _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1); + + const __m256i zero = _mm256_setzero_si256(); + const __m256i zc = _mm256_cmpeq_epi32(dq, zero); + const __m256i nz = _mm256_cmpeq_epi32(zc, zero); + __m256i cur_eob = _mm256_sub_epi32(iscan, nz); + cur_eob = _mm256_and_si256(cur_eob, nz); + *eob = _mm256_max_epi32(cur_eob, *eob); +} + +void av1_highbd_quantize_fp_avx2( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, + const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan, int log_scale) { + (void)scan; + (void)zbin_ptr; + (void)quant_shift_ptr; + const unsigned int step = 8; + + if (LIKELY(!skip_block)) { + __m256i qp[3], coeff; + + init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, qp); + coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr); + + __m256i eob = _mm256_setzero_si256(); + quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob); + + coeff_ptr += step; + qcoeff_ptr += step; + dqcoeff_ptr += step; + iscan += step; + n_coeffs -= step; + + update_qp(qp); + while (n_coeffs > 0) { + coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr); + quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob); + + coeff_ptr += step; + qcoeff_ptr += step; + dqcoeff_ptr += step; + iscan += step; + n_coeffs -= step; + } + { + __m256i eob_s; + eob_s = _mm256_shuffle_epi32(eob, 0xe); + eob = _mm256_max_epi16(eob, eob_s); + eob_s = _mm256_shufflelo_epi16(eob, 0xe); + eob = _mm256_max_epi16(eob, eob_s); + eob_s = _mm256_shufflelo_epi16(eob, 1); + eob = _mm256_max_epi16(eob, eob_s); + const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob), + _mm256_extractf128_si256(eob, 1)); + *eob_ptr = _mm_extract_epi16(final_eob, 0); + } + } else { + do { + const __m256i zero = _mm256_setzero_si256(); + _mm256_storeu_si256((__m256i *)qcoeff_ptr, zero); + _mm256_storeu_si256((__m256i *)dqcoeff_ptr, zero); + qcoeff_ptr += step; + dqcoeff_ptr += step; + n_coeffs -= step; + } while (n_coeffs > 0); + *eob_ptr = 0; + } +} diff --git a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c b/third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c index fa5626002..8d717a083 100644 --- a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c +++ b/third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c @@ -133,9 +133,10 @@ void av1_highbd_quantize_fp_sse4_1( coeff[0] = _mm_loadu_si128((__m128i const *)src); qparam[0] = - _mm_set_epi32(round_ptr[1], round_ptr[1], round_ptr[1], round_ptr[0]); - qparam[1] = _mm_set_epi64x(quant_ptr[1], quant_ptr[0]); - qparam[2] = _mm_set_epi64x(dequant_ptr[1], dequant_ptr[0]); + _mm_set_epi32(round_ptr[1] >> log_scale, round_ptr[1] >> log_scale, + round_ptr[1] >> log_scale, round_ptr[0] >> log_scale); + qparam[1] = _mm_set_epi32(0, quant_ptr[1], 0, quant_ptr[0]); + qparam[2] = _mm_set_epi32(0, dequant_ptr[1], 0, dequant_ptr[0]); // DC and first 3 AC quantize_coeff_phase1(&coeff[0], qparam, shift, log_scale, qcoeff, dequant, @@ -143,8 +144,8 @@ void av1_highbd_quantize_fp_sse4_1( // update round/quan/dquan for AC qparam[0] = _mm_unpackhi_epi64(qparam[0], qparam[0]); - qparam[1] = _mm_set_epi64x(quant_ptr[1], quant_ptr[1]); - qparam[2] = _mm_set_epi64x(dequant_ptr[1], dequant_ptr[1]); + qparam[1] = _mm_set_epi32(0, quant_ptr[1], 0, quant_ptr[1]); + qparam[2] = _mm_set_epi32(0, dequant_ptr[1], 0, dequant_ptr[1]); quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift, log_scale, quanAddr, dquanAddr); diff --git a/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c b/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c new file mode 100644 index 000000000..1c0a120ca --- /dev/null +++ b/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <immintrin.h> + +#include "./av1_rtcd.h" +#include "aom/aom_integer.h" +#include "aom_dsp/aom_dsp_common.h" + +static INLINE void read_coeff(const tran_low_t *coeff, __m256i *c) { +#if CONFIG_HIGHBITDEPTH + const __m256i x0 = _mm256_loadu_si256((const __m256i *)coeff); + const __m256i x1 = _mm256_loadu_si256((const __m256i *)coeff + 1); + *c = _mm256_packs_epi32(x0, x1); + *c = _mm256_permute4x64_epi64(*c, 0xD8); +#else + *c = _mm256_loadu_si256((const __m256i *)coeff); +#endif +} + +static INLINE void write_zero(tran_low_t *qcoeff) { + const __m256i zero = _mm256_setzero_si256(); +#if CONFIG_HIGHBITDEPTH + _mm256_storeu_si256((__m256i *)qcoeff, zero); + _mm256_storeu_si256((__m256i *)qcoeff + 1, zero); +#else + _mm256_storeu_si256((__m256i *)qcoeff, zero); +#endif +} + +static INLINE void init_one_qp(const __m128i *p, __m256i *qp) { + const __m128i ac = _mm_unpackhi_epi64(*p, *p); + *qp = _mm256_insertf128_si256(_mm256_castsi128_si256(*p), ac, 1); +} + +static INLINE void init_qp(const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *dequant_ptr, int log_scale, + __m256i *thr, __m256i *qp) { + __m128i round = _mm_loadu_si128((const __m128i *)round_ptr); + const __m128i quant = _mm_loadu_si128((const __m128i *)quant_ptr); + const __m128i dequant = _mm_loadu_si128((const __m128i *)dequant_ptr); + + if (log_scale > 0) { + const __m128i rnd = _mm_set1_epi16((int16_t)1 << (log_scale - 1)); + round = _mm_add_epi16(round, rnd); + round = _mm_srai_epi16(round, log_scale); + } + + init_one_qp(&round, &qp[0]); + init_one_qp(&quant, &qp[1]); + + if (log_scale > 0) { + qp[1] = _mm256_slli_epi16(qp[1], log_scale); + } + + init_one_qp(&dequant, &qp[2]); + *thr = _mm256_srai_epi16(qp[2], 1 + log_scale); +} + +static INLINE void update_qp(int log_scale, __m256i *thr, __m256i *qp) { + qp[0] = _mm256_permute2x128_si256(qp[0], qp[0], 0x11); + qp[1] = _mm256_permute2x128_si256(qp[1], qp[1], 0x11); + qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11); + *thr = _mm256_srai_epi16(qp[2], 1 + log_scale); +} + +#define store_quan(q, addr) \ + do { \ + __m256i sign_bits = _mm256_srai_epi16(q, 15); \ + __m256i y0 = _mm256_unpacklo_epi16(q, sign_bits); \ + __m256i y1 = _mm256_unpackhi_epi16(q, sign_bits); \ + __m256i x0 = _mm256_permute2x128_si256(y0, y1, 0x20); \ + __m256i x1 = _mm256_permute2x128_si256(y0, y1, 0x31); \ + _mm256_storeu_si256((__m256i *)addr, x0); \ + _mm256_storeu_si256((__m256i *)addr + 1, x1); \ + } while (0) + +#if CONFIG_HIGHBITDEPTH +#define store_two_quan(q, addr1, dq, addr2) \ + do { \ + store_quan(q, addr1); \ + store_quan(dq, addr2); \ + } while (0) +#else +#define store_two_quan(q, addr1, dq, addr2) \ + do { \ + _mm256_storeu_si256((__m256i *)addr1, q); \ + _mm256_storeu_si256((__m256i *)addr2, dq); \ + } while (0) +#endif + +static INLINE void quantize(const __m256i *thr, const __m256i *qp, __m256i *c, + const int16_t *iscan_ptr, tran_low_t *qcoeff, + tran_low_t *dqcoeff, __m256i *eob) { + const __m256i abs = _mm256_abs_epi16(*c); + __m256i mask = _mm256_cmpgt_epi16(abs, *thr); + mask = _mm256_or_si256(mask, _mm256_cmpeq_epi16(abs, *thr)); + const int nzflag = _mm256_movemask_epi8(mask); + + if (nzflag) { + __m256i q = _mm256_adds_epi16(abs, qp[0]); + q = _mm256_mulhi_epi16(q, qp[1]); + q = _mm256_sign_epi16(q, *c); + const __m256i dq = _mm256_mullo_epi16(q, qp[2]); + + store_two_quan(q, qcoeff, dq, dqcoeff); + const __m256i zero = _mm256_setzero_si256(); + const __m256i iscan = _mm256_loadu_si256((const __m256i *)iscan_ptr); + const __m256i zero_coeff = _mm256_cmpeq_epi16(dq, zero); + const __m256i nzero_coeff = _mm256_cmpeq_epi16(zero_coeff, zero); + __m256i cur_eob = _mm256_sub_epi16(iscan, nzero_coeff); + cur_eob = _mm256_and_si256(cur_eob, nzero_coeff); + *eob = _mm256_max_epi16(*eob, cur_eob); + } else { + write_zero(qcoeff); + write_zero(dqcoeff); + } +} + +void av1_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan_ptr, const int16_t *iscan_ptr) { + (void)scan_ptr; + (void)zbin_ptr; + (void)quant_shift_ptr; + const unsigned int step = 16; + + if (LIKELY(!skip_block)) { + __m256i qp[3]; + __m256i coeff, thr; + const int log_scale = 0; + + init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp); + read_coeff(coeff_ptr, &coeff); + + __m256i eob = _mm256_setzero_si256(); + quantize(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob); + + coeff_ptr += step; + qcoeff_ptr += step; + dqcoeff_ptr += step; + iscan_ptr += step; + n_coeffs -= step; + + update_qp(log_scale, &thr, qp); + + while (n_coeffs > 0) { + read_coeff(coeff_ptr, &coeff); + quantize(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob); + + coeff_ptr += step; + qcoeff_ptr += step; + dqcoeff_ptr += step; + iscan_ptr += step; + n_coeffs -= step; + } + { + __m256i eob_s; + eob_s = _mm256_shuffle_epi32(eob, 0xe); + eob = _mm256_max_epi16(eob, eob_s); + eob_s = _mm256_shufflelo_epi16(eob, 0xe); + eob = _mm256_max_epi16(eob, eob_s); + eob_s = _mm256_shufflelo_epi16(eob, 1); + eob = _mm256_max_epi16(eob, eob_s); + const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob), + _mm256_extractf128_si256(eob, 1)); + *eob_ptr = _mm_extract_epi16(final_eob, 0); + } + } else { + do { + write_zero(qcoeff_ptr); + write_zero(dqcoeff_ptr); + qcoeff_ptr += step; + dqcoeff_ptr += step; + n_coeffs -= step; + } while (n_coeffs > 0); + *eob_ptr = 0; + } +} + +static INLINE void quantize_32x32(const __m256i *thr, const __m256i *qp, + __m256i *c, const int16_t *iscan_ptr, + tran_low_t *qcoeff, tran_low_t *dqcoeff, + __m256i *eob) { + const __m256i abs = _mm256_abs_epi16(*c); + __m256i mask = _mm256_cmpgt_epi16(abs, *thr); + mask = _mm256_or_si256(mask, _mm256_cmpeq_epi16(abs, *thr)); + const int nzflag = _mm256_movemask_epi8(mask); + + if (nzflag) { + __m256i q = _mm256_adds_epi16(abs, qp[0]); + q = _mm256_mulhi_epu16(q, qp[1]); + + __m256i dq = _mm256_mullo_epi16(q, qp[2]); + dq = _mm256_srli_epi16(dq, 1); + + q = _mm256_sign_epi16(q, *c); + dq = _mm256_sign_epi16(dq, *c); + + store_two_quan(q, qcoeff, dq, dqcoeff); + const __m256i zero = _mm256_setzero_si256(); + const __m256i iscan = _mm256_loadu_si256((const __m256i *)iscan_ptr); + const __m256i zero_coeff = _mm256_cmpeq_epi16(dq, zero); + const __m256i nzero_coeff = _mm256_cmpeq_epi16(zero_coeff, zero); + __m256i cur_eob = _mm256_sub_epi16(iscan, nzero_coeff); + cur_eob = _mm256_and_si256(cur_eob, nzero_coeff); + *eob = _mm256_max_epi16(*eob, cur_eob); + } else { + write_zero(qcoeff); + write_zero(dqcoeff); + } +} + +void av1_quantize_fp_32x32_avx2( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, + const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan_ptr, const int16_t *iscan_ptr) { + (void)scan_ptr; + (void)zbin_ptr; + (void)quant_shift_ptr; + const unsigned int step = 16; + + if (LIKELY(!skip_block)) { + __m256i qp[3]; + __m256i coeff, thr; + const int log_scale = 1; + + init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp); + read_coeff(coeff_ptr, &coeff); + + __m256i eob = _mm256_setzero_si256(); + quantize_32x32(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob); + + coeff_ptr += step; + qcoeff_ptr += step; + dqcoeff_ptr += step; + iscan_ptr += step; + n_coeffs -= step; + + update_qp(log_scale, &thr, qp); + + while (n_coeffs > 0) { + read_coeff(coeff_ptr, &coeff); + quantize_32x32(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, + &eob); + + coeff_ptr += step; + qcoeff_ptr += step; + dqcoeff_ptr += step; + iscan_ptr += step; + n_coeffs -= step; + } + { + __m256i eob_s; + eob_s = _mm256_shuffle_epi32(eob, 0xe); + eob = _mm256_max_epi16(eob, eob_s); + eob_s = _mm256_shufflelo_epi16(eob, 0xe); + eob = _mm256_max_epi16(eob, eob_s); + eob_s = _mm256_shufflelo_epi16(eob, 1); + eob = _mm256_max_epi16(eob, eob_s); + const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob), + _mm256_extractf128_si256(eob, 1)); + *eob_ptr = _mm_extract_epi16(final_eob, 0); + } + } else { + do { + write_zero(qcoeff_ptr); + write_zero(dqcoeff_ptr); + qcoeff_ptr += step; + dqcoeff_ptr += step; + n_coeffs -= step; + } while (n_coeffs > 0); + *eob_ptr = 0; + } +} diff --git a/third_party/aom/av1/encoder/x86/dct_intrin_sse2.c b/third_party/aom/av1/encoder/x86/dct_intrin_sse2.c index 37c4b0d88..496c33395 100644 --- a/third_party/aom/av1/encoder/x86/dct_intrin_sse2.c +++ b/third_party/aom/av1/encoder/x86/dct_intrin_sse2.c @@ -203,8 +203,12 @@ static void fidtx4_sse2(__m128i *in) { #endif // CONFIG_EXT_TX void av1_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { __m128i in[4]; + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif switch (tx_type) { case DCT_DCT: aom_fdct4x4_sse2(input, output, stride); break; @@ -1301,8 +1305,12 @@ static void fidtx8_sse2(__m128i *in) { #endif // CONFIG_EXT_TX void av1_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { __m128i in[8]; + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif switch (tx_type) { case DCT_DCT: aom_fdct8x8_sse2(input, output, stride); break; @@ -2334,8 +2342,12 @@ static void fidtx16_sse2(__m128i *in0, __m128i *in1) { #endif // CONFIG_EXT_TX void av1_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { __m128i in0[16], in1[16]; + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif switch (tx_type) { case DCT_DCT: @@ -2550,8 +2562,12 @@ static INLINE void write_buffer_4x8(tran_low_t *output, __m128i *res) { } void av1_fht4x8_sse2(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { __m128i in[8]; + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif switch (tx_type) { case DCT_DCT: @@ -2724,8 +2740,12 @@ static INLINE void write_buffer_8x4(tran_low_t *output, __m128i *res) { } void av1_fht8x4_sse2(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { __m128i in[8]; + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif switch (tx_type) { case DCT_DCT: @@ -2864,8 +2884,12 @@ static void row_8x16_rounding(__m128i *in, int bits) { } void av1_fht8x16_sse2(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { __m128i in[16]; + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif __m128i *const t = in; // Alias to top 8x8 sub block __m128i *const b = in + 8; // Alias to bottom 8x8 sub block @@ -3045,8 +3069,12 @@ static INLINE void load_buffer_16x8(const int16_t *input, __m128i *in, #define col_16x8_rounding row_8x16_rounding void av1_fht16x8_sse2(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { __m128i in[16]; + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif __m128i *const l = in; // Alias to left 8x8 sub block __m128i *const r = in + 8; // Alias to right 8x8 sub block, which we store @@ -3355,8 +3383,12 @@ static INLINE void fhalfright32_16col(__m128i *tl, __m128i *tr, __m128i *bl, // For 16x32, this means the input is a 2x2 grid of such blocks. // For 32x16, it means the input is a 4x1 grid. void av1_fht16x32_sse2(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { __m128i intl[16], intr[16], inbl[16], inbr[16]; + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif switch (tx_type) { case DCT_DCT: @@ -3544,8 +3576,12 @@ static INLINE void write_buffer_32x16(tran_low_t *output, __m128i *res0, } void av1_fht32x16_sse2(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { __m128i in0[16], in1[16], in2[16], in3[16]; + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0); switch (tx_type) { @@ -3784,8 +3820,12 @@ static INLINE void write_buffer_32x32(__m128i *in0, __m128i *in1, __m128i *in2, } void av1_fht32x32_sse2(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { __m128i in0[32], in1[32], in2[32], in3[32]; + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "No 32x32 sse2 MRC_DCT implementation"); +#endif load_buffer_32x32(input, in0, in1, in2, in3, stride, 0, 0); switch (tx_type) { diff --git a/third_party/aom/av1/encoder/x86/error_intrin_avx2.c b/third_party/aom/av1/encoder/x86/error_intrin_avx2.c index ae733a1ce..20ba4149c 100644 --- a/third_party/aom/av1/encoder/x86/error_intrin_avx2.c +++ b/third_party/aom/av1/encoder/x86/error_intrin_avx2.c @@ -14,7 +14,20 @@ #include "./av1_rtcd.h" #include "aom/aom_integer.h" -int64_t av1_block_error_avx2(const int16_t *coeff, const int16_t *dqcoeff, +static INLINE void read_coeff(const tran_low_t *coeff, intptr_t offset, + __m256i *c) { + const tran_low_t *addr = coeff + offset; +#if CONFIG_HIGHBITDEPTH + const __m256i x0 = _mm256_loadu_si256((const __m256i *)addr); + const __m256i x1 = _mm256_loadu_si256((const __m256i *)addr + 1); + const __m256i y = _mm256_packs_epi32(x0, x1); + *c = _mm256_permute4x64_epi64(y, 0xD8); +#else + *c = _mm256_loadu_si256((const __m256i *)addr); +#endif +} + +int64_t av1_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz) { __m256i sse_reg, ssz_reg, coeff_reg, dqcoeff_reg; __m256i exp_dqcoeff_lo, exp_dqcoeff_hi, exp_coeff_lo, exp_coeff_hi; @@ -22,16 +35,16 @@ int64_t av1_block_error_avx2(const int16_t *coeff, const int16_t *dqcoeff, __m128i sse_reg128, ssz_reg128; int64_t sse; int i; - const __m256i zero_reg = _mm256_set1_epi16(0); + const __m256i zero_reg = _mm256_setzero_si256(); // init sse and ssz registerd to zero - sse_reg = _mm256_set1_epi16(0); - ssz_reg = _mm256_set1_epi16(0); + sse_reg = _mm256_setzero_si256(); + ssz_reg = _mm256_setzero_si256(); for (i = 0; i < block_size; i += 16) { // load 32 bytes from coeff and dqcoeff - coeff_reg = _mm256_loadu_si256((const __m256i *)(coeff + i)); - dqcoeff_reg = _mm256_loadu_si256((const __m256i *)(dqcoeff + i)); + read_coeff(coeff, i, &coeff_reg); + read_coeff(dqcoeff, i, &dqcoeff_reg); // dqcoeff - coeff dqcoeff_reg = _mm256_sub_epi16(dqcoeff_reg, coeff_reg); // madd (dqcoeff - coeff) diff --git a/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c b/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c index b56eed518..cab36f2bd 100644 --- a/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c +++ b/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c @@ -113,25 +113,13 @@ static void fdct4x4_sse4_1(__m128i *in, int bit) { in[3] = _mm_unpackhi_epi64(v1, v3); } -static INLINE void write_buffer_4x4(__m128i *res, tran_low_t *output) { +static INLINE void write_buffer_4x4(__m128i *res, int32_t *output) { _mm_store_si128((__m128i *)(output + 0 * 4), res[0]); _mm_store_si128((__m128i *)(output + 1 * 4), res[1]); _mm_store_si128((__m128i *)(output + 2 * 4), res[2]); _mm_store_si128((__m128i *)(output + 3 * 4), res[3]); } -// Note: -// We implement av1_fwd_txfm2d_4x4(). This function is kept here since -// av1_highbd_fht4x4_c() is not removed yet -void av1_highbd_fht4x4_sse4_1(const int16_t *input, tran_low_t *output, - int stride, int tx_type) { - (void)input; - (void)output; - (void)stride; - (void)tx_type; - assert(0); -} - static void fadst4x4_sse4_1(__m128i *in, int bit) { const int32_t *cospi = cospi_arr(bit); const __m128i cospi8 = _mm_set1_epi32(cospi[8]); @@ -416,7 +404,7 @@ static INLINE void col_txfm_8x8_rounding(__m128i *in, int shift) { in[15] = _mm_srai_epi32(in[15], shift); } -static INLINE void write_buffer_8x8(const __m128i *res, tran_low_t *output) { +static INLINE void write_buffer_8x8(const __m128i *res, int32_t *output) { _mm_store_si128((__m128i *)(output + 0 * 4), res[0]); _mm_store_si128((__m128i *)(output + 1 * 4), res[1]); _mm_store_si128((__m128i *)(output + 2 * 4), res[2]); @@ -1800,7 +1788,7 @@ static void col_txfm_16x16_rounding(__m128i *in, int shift) { col_txfm_8x8_rounding(&in[48], shift); } -static void write_buffer_16x16(const __m128i *in, tran_low_t *output) { +static void write_buffer_16x16(const __m128i *in, int32_t *output) { const int size_8x8 = 16 * 4; write_buffer_8x8(&in[0], output); output += size_8x8; diff --git a/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c b/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c index 8495ad1aa..af8e9a5f4 100644 --- a/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c +++ b/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c @@ -18,51 +18,6 @@ #include "aom_dsp/txfm_common.h" #include "aom_dsp/x86/txfm_common_avx2.h" -static int32_t get_16x16_sum(const int16_t *input, int stride) { - __m256i r0, r1, r2, r3, u0, u1; - __m256i zero = _mm256_setzero_si256(); - __m256i sum = _mm256_setzero_si256(); - const int16_t *blockBound = input + (stride << 4); - __m128i v0, v1; - - while (input < blockBound) { - r0 = _mm256_loadu_si256((__m256i const *)input); - r1 = _mm256_loadu_si256((__m256i const *)(input + stride)); - r2 = _mm256_loadu_si256((__m256i const *)(input + 2 * stride)); - r3 = _mm256_loadu_si256((__m256i const *)(input + 3 * stride)); - - u0 = _mm256_add_epi16(r0, r1); - u1 = _mm256_add_epi16(r2, r3); - sum = _mm256_add_epi16(sum, u0); - sum = _mm256_add_epi16(sum, u1); - - input += stride << 2; - } - - // unpack 16 int16_t into 2x8 int32_t - u0 = _mm256_unpacklo_epi16(zero, sum); - u1 = _mm256_unpackhi_epi16(zero, sum); - u0 = _mm256_srai_epi32(u0, 16); - u1 = _mm256_srai_epi32(u1, 16); - sum = _mm256_add_epi32(u0, u1); - - u0 = _mm256_srli_si256(sum, 8); - u1 = _mm256_add_epi32(sum, u0); - - v0 = _mm_add_epi32(_mm256_extracti128_si256(u1, 1), - _mm256_castsi256_si128(u1)); - v1 = _mm_srli_si128(v0, 4); - v0 = _mm_add_epi32(v0, v1); - return (int32_t)_mm_extract_epi32(v0, 0); -} - -void aom_fdct16x16_1_avx2(const int16_t *input, tran_low_t *output, - int stride) { - int32_t dc = get_16x16_sum(input, stride); - output[0] = (tran_low_t)(dc >> 1); - _mm256_zeroupper(); -} - static INLINE void load_buffer_16x16(const int16_t *input, int stride, int flipud, int fliplr, __m256i *in) { if (!flipud) { @@ -959,8 +914,12 @@ static void fidtx16_avx2(__m256i *in) { #endif void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { __m256i in[16]; + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif switch (tx_type) { case DCT_DCT: @@ -1084,22 +1043,6 @@ void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride, _mm256_zeroupper(); } -void aom_fdct32x32_1_avx2(const int16_t *input, tran_low_t *output, - int stride) { - // left and upper corner - int32_t sum = get_16x16_sum(input, stride); - // right and upper corner - sum += get_16x16_sum(input + 16, stride); - // left and lower corner - sum += get_16x16_sum(input + (stride << 4), stride); - // right and lower corner - sum += get_16x16_sum(input + (stride << 4) + 16, stride); - - sum >>= 3; - output[0] = (tran_low_t)sum; - _mm256_zeroupper(); -} - static void mm256_vectors_swap(__m256i *a0, __m256i *a1, const int size) { int i = 0; __m256i temp; @@ -1570,9 +1513,13 @@ static void fidtx32_avx2(__m256i *in0, __m256i *in1) { #endif void av1_fht32x32_avx2(const int16_t *input, tran_low_t *output, int stride, - int tx_type) { + TxfmParam *txfm_param) { __m256i in0[32]; // left 32 columns __m256i in1[32]; // right 32 columns + int tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "No avx2 32x32 implementation of MRC_DCT"); +#endif switch (tx_type) { case DCT_DCT: |